import { Util } from '@libs/utilities/util';
import { EnumLanguage, STOP_WORDS_EN, STOP_WORDS_DE, STOP_WORDS_ES, STOP_WORDS_FR, STOP_WORDS_IT, STOP_WORDS_JA } from '@libs/constants';


/**------------------------------------------------------
 * StopWord Filter Library
 */
export class StopWordFilter {

	//** Configurations */
	private readonly SUPPORTED_LANGUAGES: EnumLanguage[] = [EnumLanguage.EN, EnumLanguage.DE, EnumLanguage.FR, EnumLanguage.IT, EnumLanguage.ES, EnumLanguage.JA];


	isLanguageSupported(language: EnumLanguage): boolean {
		return this.SUPPORTED_LANGUAGES.includes(language);
	}

	isStopWord(word: string, language: EnumLanguage): boolean {

		//0 - prepare the data
		const stopWordsList: string[] = this.getStopWordsByLanguage(language);
		const lowerCaseWord: string   = word.toLowerCase();

		//1 - check if the word is a stop-word
		return stopWordsList.includes(lowerCaseWord);
	}


	//** Remove StopWords */
	removeFromArray(wordArray: string[], language: EnumLanguage): string[] {
		return wordArray.filter((word: string) => !this.isStopWord(word, language));
	}

	removeFromText(text: string, language: EnumLanguage): string {

		//0 - get all stop-words
		const stopWords			: string[] = this.getStopWordsByLanguage(language);
		const stopWordsCombined	: string   = stopWords.join('|');

		//1 - remove all stop-words from the text
		return text.replace(Util.RegExp.textBoundCaptureGroup(stopWordsCombined), ' ').replace(/\s{2,}/g, ' ');
	}

	private removeFromTextFallbackSafari(text: string, language: EnumLanguage) {

		//0 - get stop-words which have only latin chars
		const latinStopWords: string = this.getStopWordsByLanguage(language)
			.filter((elem: string) => Util.String.hasOnlyLatinCharacters(elem))
			.filter((elem: string) => elem.length >= 3)
			.join('|');

		//1 - remove all stop-words from the text
		const finishedTextParts: string [] = [];
		for (const textPart of text.split(' ')) {

			//a. none latin chars break the word bound, skip them
			if (Util.String.hasNonLatinCharacters(textPart)) continue;

			//b. replace with word bound for latin chars
			finishedTextParts.push(
				textPart.replace(new RegExp(`\\b(${latinStopWords})\\b`, 'gi'), ' ').replace(/\s{2,}/g, ' ')
			);
		}

		//2 - combine the results
		return finishedTextParts.join(' ').trim();
	}


	//** Get StopWord List */
	getStopWordsByLanguage(language: EnumLanguage): string[] {

		switch (language) {
			case EnumLanguage.EN:
				return STOP_WORDS_EN;

			case EnumLanguage.DE:
				return STOP_WORDS_DE;

			case EnumLanguage.FR:
				return STOP_WORDS_FR;

			case EnumLanguage.IT:
				return STOP_WORDS_IT;

			case EnumLanguage.ES:
				return STOP_WORDS_ES;

			case EnumLanguage.JA:
				return STOP_WORDS_JA;

			default:
				throw new Error(`StopWordFilter => getStopWordsByLanguage => FATAL ERROR: no stop words were defined for the "${language}" language (supported languages are: ${this.SUPPORTED_LANGUAGES.join(', ')})`);
		}
	}
}
