// TODO we could upgrade to use Tiktoken but it would require changes to our runtime, which is multi-step, it requires WASM. TBD what the implications of that would be.
// import { encoding_for_model } from "tiktoken";
// import { TiktokenModel } from "tiktoken";
// const DEBUG = true;
// export function countTokensFromString(text: string, model: string):number {
//     // Tiktoken version in case we ever get WASM:
//     // // Time how long this takes and print it out
//     const start = Date.now();
//     const encoding = encoding_for_model(model);
//     const numTokens = encoding.encode(text);
//     encoding.free();
//     if (DEBUG) {
//         console.log(`[countTokens] Tokenizing took ${Date.now() - start}ms to detect ${numTokens.length} tokens.`);
//     }
//     return numTokens.length;
// }

import { ChatCompletionMessageParam } from "openai/resources";


export function estimateTokensFromString(text:string):number {
    // Ref: https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them

    // This is also in the server as openaifunctions.js with the same name, TOKENIZER_FACTOR
    // According to the above, the best estimate for english is 4/3. However, it can be more, and we want to be conservative.
    const TOKENIZER_FACTOR=5/3;
    const tokenEstimate = text.trim().split(/[\s,]+/).length*TOKENIZER_FACTOR;
    return Math.round(tokenEstimate);
}

export function estimateTokensFromChat(chatHistory:ChatCompletionMessageParam[]):number {
    let tokenEstimate=0;    
    for (const message of chatHistory) {
        // @ts-ignore
        if (message.content) {
            tokenEstimate += estimateTokensFromString(message.content as string);
        }
    }
    // const tokenEstimate = JSON.stringify(chatHistory).split(/[\s,]+/).length/4;
    // console.log("The chat history is estimated to have ~"+(tokenEstimate/1000)+"k tokens.");
    return tokenEstimate;
}
