import { EnumUnicodeCategory, UNICODE_CATEGORIES } from '@libs/constants';


/**------------------------------------------------------
 * Unicode Regex Handling
 * ----------------------
 * Info: Handles special encodings, invisible characters
 * and control chars in strings / texts.
 */
export class UtilUnicodeRegex {


	/**------------------------------------------------------
	 * Text Purify Functions
	 */
	removeSpecialCharacters(text: string): string {

		//0 - remove unassigned special characters
		const unassignedCharacterRegExp: RegExp = new RegExp(`[${UNICODE_CATEGORIES[EnumUnicodeCategory.Unassigned]}]`, 'g');
		text = text.replace(unassignedCharacterRegExp, '');

		//1 - remove mark characters
		const markCharacterRegExp: RegExp = new RegExp(`([${UNICODE_CATEGORIES[EnumUnicodeCategory.NonSpacingMark]}${UNICODE_CATEGORIES[EnumUnicodeCategory.EnclosingMark]}${UNICODE_CATEGORIES[EnumUnicodeCategory.CombiningSpacingMark]}])`, 'g');
		text = text.replace(markCharacterRegExp, '');

		//2 - remove other special characters (like formatting, ...)
		const otherCharacterRegExp: RegExp = new RegExp(`([${UNICODE_CATEGORIES[EnumUnicodeCategory.Control]}${UNICODE_CATEGORIES[EnumUnicodeCategory.PrivateUse]}${UNICODE_CATEGORIES[EnumUnicodeCategory.Format]}${UNICODE_CATEGORIES[EnumUnicodeCategory.Surrogate]}])`, 'g');
		text = text.replace(otherCharacterRegExp, '');

		//3 - replace all separators with the appropriate ascii formats
		const spaceSeparatorRegExp	  : RegExp = new RegExp(`[${UNICODE_CATEGORIES[EnumUnicodeCategory.SpaceSeparator]}]`, 		'g');
		const lineSeparatorRegExp	  : RegExp = new RegExp(`[${UNICODE_CATEGORIES[EnumUnicodeCategory.LineSeparator]}]`, 	 	'g');
		const paragraphSeparatorRegExp: RegExp = new RegExp(`[${UNICODE_CATEGORIES[EnumUnicodeCategory.ParagraphSeparator]}]`, 	'g');
		text = text.replace(spaceSeparatorRegExp, 	  ' ');
		text = text.replace(lineSeparatorRegExp, 	  '\n');
		text = text.replace(paragraphSeparatorRegExp, '\n');

		//4 - return the purified text
		return text.trim();
	}


	/**------------------------------------------------------
	 * Numeric Text
	 */
	removeNonNumeric(text: string): string {
		return text.replace(
			new RegExp(`[^${UNICODE_CATEGORIES[EnumUnicodeCategory.DecimalDigitNumber]}]`, 'g')
			, '').trim();
	}

	removeNumbers(text: string): string {
		return text.replace(
			new RegExp(`[${UNICODE_CATEGORIES[EnumUnicodeCategory.DecimalDigitNumber]}]`, 'g')
			, '').trim();
	}


	/**------------------------------------------------------
	 * Alphabetic
	 */
	removeNonAlphabetic(text: string): string {
		return text.replace(
			new RegExp(`[^${UNICODE_CATEGORIES[EnumUnicodeCategory.UpperCaseLetter]}${UNICODE_CATEGORIES[EnumUnicodeCategory.LowerCaseLetter]}${UNICODE_CATEGORIES[EnumUnicodeCategory.TitleCaseLetter]}${UNICODE_CATEGORIES[EnumUnicodeCategory.ModifierLetter]}${UNICODE_CATEGORIES[EnumUnicodeCategory.OtherLetter]}${UNICODE_CATEGORIES[EnumUnicodeCategory.SpaceSeparator]}]`, 'g')
			, '').trim();
	}

	removeAlphabetic(text: string): string {
		return text.replace(
			new RegExp(`[${UNICODE_CATEGORIES[EnumUnicodeCategory.UpperCaseLetter]}${UNICODE_CATEGORIES[EnumUnicodeCategory.LowerCaseLetter]}${UNICODE_CATEGORIES[EnumUnicodeCategory.TitleCaseLetter]}${UNICODE_CATEGORIES[EnumUnicodeCategory.ModifierLetter]}${UNICODE_CATEGORIES[EnumUnicodeCategory.OtherLetter]}]`, 'g')
			, '').trim();
	}


	/**------------------------------------------------------
	 * Alphanumeric
	 */
	removeNonAscii(text: string): string {
		return text.replace(/[^\x20-\x7E]/g, '');
	}

	removeNonAlphanumeric(text: string, exceptionCharacters: string = ''): string {
		return text.replace(
			new RegExp(`[^${exceptionCharacters}${UNICODE_CATEGORIES[EnumUnicodeCategory.DecimalDigitNumber]}${UNICODE_CATEGORIES[EnumUnicodeCategory.UpperCaseLetter]}${UNICODE_CATEGORIES[EnumUnicodeCategory.LowerCaseLetter]}${UNICODE_CATEGORIES[EnumUnicodeCategory.TitleCaseLetter]}${UNICODE_CATEGORIES[EnumUnicodeCategory.ModifierLetter]}${UNICODE_CATEGORIES[EnumUnicodeCategory.OtherLetter]}${UNICODE_CATEGORIES[EnumUnicodeCategory.SpaceSeparator]}]`, 'g')
			, '').trim();
	}

	removeAlphanumeric(text: string): string {
		return text.replace(
			new RegExp(`[${UNICODE_CATEGORIES[EnumUnicodeCategory.DecimalDigitNumber]}${UNICODE_CATEGORIES[EnumUnicodeCategory.UpperCaseLetter]}${UNICODE_CATEGORIES[EnumUnicodeCategory.LowerCaseLetter]}${UNICODE_CATEGORIES[EnumUnicodeCategory.TitleCaseLetter]}${UNICODE_CATEGORIES[EnumUnicodeCategory.ModifierLetter]}${UNICODE_CATEGORIES[EnumUnicodeCategory.OtherLetter]}]`, 'g')
			, '').trim();
	}


	/**------------------------------------------------------
	 * Whitespaces
	 */
	removeWhitespaces(text: string): string {
		return text.replace(
			new RegExp(`[${UNICODE_CATEGORIES[EnumUnicodeCategory.SpaceSeparator]}${UNICODE_CATEGORIES[EnumUnicodeCategory.LineSeparator]}${UNICODE_CATEGORIES[EnumUnicodeCategory.ParagraphSeparator]}]`, 'g')
			, '').replace(/[\u200B-\u200D\uFEFF]/g, '').replace(/\s+/g, '').trim();
	}

	purifyWhitespaces(text: string): string {
		return text.replace(
			new RegExp(`[${UNICODE_CATEGORIES[EnumUnicodeCategory.SpaceSeparator]}${UNICODE_CATEGORIES[EnumUnicodeCategory.LineSeparator]}${UNICODE_CATEGORIES[EnumUnicodeCategory.ParagraphSeparator]}]`, 'g')
			, ' ').replace(/[\u200B-\u200D\uFEFF]/g, '').replace(/\s+/g, ' ').trim();
	}


	/**------------------------------------------------------
	 * Symbols & Others
	 */
	removeSymbols(text: string): string {
		return text.replace(
			new RegExp(`[${UNICODE_CATEGORIES[EnumUnicodeCategory.MathSymbol]}${UNICODE_CATEGORIES[EnumUnicodeCategory.CurrencySymbol]}${UNICODE_CATEGORIES[EnumUnicodeCategory.ModifierSymbol]}${UNICODE_CATEGORIES[EnumUnicodeCategory.OtherSymbol]}]`, 'g')
			, '').trim();
	}


	/**------------------------------------------------------
	 * Create Regex
	 * ------------
	 * Gets a regex written in a dialect that supports unicode
	 * categories and translates it to a dialect supported by JS.
	 */
	createUnicodeRegExp(regexpStr: RegExp | string): RegExp {

		//0 - add the modifiers
		let modifiers: string = '';
		if (regexpStr instanceof RegExp) {
			modifiers = (regexpStr.global ? 'g' : '') + (regexpStr.ignoreCase ? 'i' : '') + (regexpStr.multiline ? 'm' : '');
			regexpStr = regexpStr.source;
		}

		//1 - place all the special char of required categories into the regex
		regexpStr = regexpStr.replace(/\\p\{(..?)\}/g, (match: string, group: EnumUnicodeCategory) => {
			return `[${UNICODE_CATEGORIES[group]}]` || match;
		});

		//2 - create the return the new regular expression
		return new RegExp(regexpStr, modifiers);
	}
}
