export const getWordCount = content => {
    // return content.split(' ').filter(i => i !== "").length
    const words = content.match(/\w[A-zÀ-ÿ|]+|à/g)
    return words ? words.length : 0
}
export const getSentenceCount = content => {
    // return content.split('.').filter(i => !["", " "].includes(i)).length
    return content.split(/[.?!]\s/).filter(i => !["", " "].includes(i)).length
}
export const getParagraphCount = content => {
    const chunks = content.split(/[\r\n]+/).filter(i => i !== "")
    return chunks.length
}

// Example list of stop words in French (you may want to expand this list)
const stopWordsFR = ["le", "la", "les", "de", "des", "et", "en", "un", "une", "du", "pour", "dans", "à", "est", "avec", "il", "a", "son", "que", "sa", "qui", "se", "ce", "sur", "par"];
const stopWordsEN = ["the", "is", "in", "and", "it", "to", "for", "of", "on", "with", "as", "by", "an", "be", "this", "which", "at", "from", "or", "that", "not"];
const stopWordsES = ["el", "la", "los", "de", "y", "en", "un", "una", "para", "con", "por", "se", "que", "su", "a", "lo", "las", "es", "como", "más", "al", "del"];

export const stopWords = [...stopWordsFR, ...stopWordsEN, ...stopWordsES];


// Function to get most frequent keywords and n-grams
export function getFrequentKeywords(text, ngramType = "unigram") {
    let counts = {};

    text = text.toLowerCase();

    let words = text.match(/(?:^|\s)[a-zA-Z'’àâäèéêëîïôœùûüÿç][a-zA-Z'’àâäèéêëîïôœùûüÿç'’,.]*?(?=\s|$)/g);

    if (words === null) return [];

    words = words.map(word => word.replace(/[.,]$/, ''));

    if (ngramType === "unigram") {
        words = words.map(word => word.trim()).filter(word => !stopWords.includes(word));
    } else if (ngramType === "bigram_trigram") {
        words = words.map(word => word.trim());
    }

    if (ngramType === "unigram") {
        for (let word of words) {
            counts[word] = (counts[word] || 0) + 1;
        }
    } else if (ngramType === "bigram_trigram") {
        for (let i = 0; i < words.length - 1; i++) {
            let bigram = words[i] + ' ' + words[i + 1];
            counts[bigram] = (counts[bigram] || 0) + 1;

            if (i < words.length - 2) {
                let trigram = bigram + ' ' + words[i + 2];
                counts[trigram] = (counts[trigram] || 0) + 1;
            }
        }
    }

    let sortedCountsArray = Object.entries(counts)
        .sort((a, b) => b[1] - a[1])
        .slice(0, 10)
        .map(([keyword, count]) => ({
            keyword,
            count
        }));

    return sortedCountsArray;
}