import * as tf from '@tensorflow/tfjs';

export async function preprocessText(
  text: string,
  windowSize: number
): Promise<any> {
  if (windowSize < 1) {
    windowSize = 1;
  }

  const commaPattern: RegExp = /[,]/g;
  const specialCharacterPattern: RegExp = /[^a-zA-Z0-9ßöäüÖÄÜ ]/g;
  // Regex pattern to filter any special characters except dots
  const pattern: RegExp = /[^a-zA-Z0-9ßöäüÖÄÜ\. ]/g;

  // Preprocess text to array of lowercase sentences without special characters
  let processedText: any = text.toLowerCase();

  const textArray: string[] = processedText
    .replace(specialCharacterPattern, '')
    .split(' ')
    .filter((i) => i !== '');

  processedText = processedText.replace(commaPattern, '');
  processedText = processedText.replace(pattern, '.');

  // Create a dictionary mapping each word to a unique number
  const uniqueWords: string[] = [...new Set(textArray)];
  const dictionary: any = {};
  uniqueWords.forEach((word: string, i) => {
    dictionary[word] = i;
  });

  const vocabSize: number = Object.entries(dictionary).length;

  // Generate an array of arrays containing the words of the sentences
  processedText = processedText.split('.').filter((i) => i !== '');
  const sentences: string[] = [];
  for (let sentence of processedText) {
    sentences.push(sentence.split(' ').filter((i) => i !== ''));
  }

  return [sentences, dictionary, vocabSize, windowSize];
}

export async function generateFeaturesCBOW(
  sentences: string[],
  dictionary: any,
  vocabSize: number,
  windowSize: number
): Promise<any> {
  // Generate training data
  const features: string[][] = [];
  for (let sentence of sentences) {
    for (let i = 0; i < sentence.length; i++) {
      const contextWords: string[] = [];
      for (let j = 0; j < windowSize; j++) {
        if (i - j - 1 >= 0) {
          contextWords.push(dictionary[sentence[i - j - 1]]);
        }

        if (i + j + 1 < sentence.length) {
          contextWords.push(dictionary[sentence[i + j + 1]]);
        }
      }
      features.push(contextWords);
    }
  }

  let featureTensors: any = [];
  
  for (let feature of features) {
    const oneHotTensor: any = await tf.oneHot(feature, vocabSize);
    const featureTensor: any = await oneHotTensor.sum(0).div(feature.length);
    const featureTensorArray = await featureTensor.array();
    oneHotTensor.dispose();
    featureTensor.dispose();
    featureTensors.push(featureTensorArray);
  }

  return featureTensors;
}

export async function generateLabelsCBOW(
  sentences: string[],
  dictionary: any
): Promise<any> {
  // Generate training data
  const labels: string[] = [];
  for (let sentence of sentences) {
    for (let i = 0; i < sentence.length; i++) {
      labels.push(dictionary[sentence[i]]);
    }
  }

  return labels;
}

export async function generateFeaturesSkipGram(
  sentences: string[],
  dictionary: any,
  windowSize: number
): Promise<any> {
  // Generate training data
  const features: string[][] = [];
  for (let sentence of sentences) {
    for (let i = 0; i < sentence.length; i++) {
      const contextWords: string[] = [];
      for (let j = 0; j < windowSize; j++) {
        if (i - j - 1 >= 0) {
          contextWords.push(dictionary[sentence[i - j - 1]]);
        }

        if (i + j + 1 < sentence.length) {
          contextWords.push(dictionary[sentence[i + j + 1]]);
        }
      }

      for (let contextWord of contextWords) {
        features.push(dictionary[sentence[i]]);
      }
    }
  }

  return features;
}

export async function generateLabelsSkipGram(
  sentences: string[],
  dictionary: any,
  windowSize: number
): Promise<any> {
  // Generate training data
  const labels: string[] = [];
  for (let sentence of sentences) {
    for (let i = 0; i < sentence.length; i++) {
      const contextWords: string[] = [];
      for (let j = 0; j < windowSize; j++) {
        if (i - j - 1 >= 0) {
          contextWords.push(dictionary[sentence[i - j - 1]]);
        }

        if (i + j + 1 < sentence.length) {
          contextWords.push(dictionary[sentence[i + j + 1]]);
        }
      }

      for (let contextWord of contextWords) {
        labels.push(contextWord);
      }
    }
  }

  return labels;
}
