import type { TMIEmoteMap, TMIEmotePositionMap } from '../models';

/**
 * Finds emotes in an outbound message, based on the emotes the current user has access to. Used to
 * locally render outbound messages.
 */
export function generateEmotePositions(
  message: string,
  emoteMap: TMIEmoteMap,
): TMIEmotePositionMap {
  const emotes: TMIEmotePositionMap = {};
  // We have to iterate per-character instead of splitting by whitespace in order to
  // properly capture symbols like emojis in a single pass through the string.
  const characters = getSymbolsInString(message);
  // Having a whitespace character at the end helps us parse the last token with a simple for loop.
  characters.push(' ');

  let token = '';
  let startIndex = 0;
  for (let i = 0; i < characters.length; i++) {
    const char = characters[i];

    // Continue building the current token.
    if (!isWhitespace(char) && !isEmoteDelimiter(char)) {
      token += char;
      continue;
    }

    // Check whether the finished token is an emote the user has access to.
    const emoteID: string | null = emoteMap[token] ? emoteMap[token].id : null;
    if (emoteID) {
      emotes[startIndex] = { id: emoteID, startIndex };
    }

    // Prepare to build the next token.
    token = '';
    startIndex = i + 1;
  }

  return emotes;
}

/**
 * This will find and tokenize the string into single characters. This is necessary
 * to accurately capture non-standard text like emoji and certain ASCII characters.
 * From the article JavaScript Has a Unicode Issue:
 * https://mathiasbynens.be/notes/javascript-unicode#iterating-over-symbols
 */
export function getSymbolsInString(message: string): RegExpMatchArray {
  const regexCodePoint =
    /[^\uD800-\uDFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDFFF]/g;
  return message.match(regexCodePoint) || [];
}

function isWhitespace(char: string) {
  return /\s/.test(char);
}

function isEmoteDelimiter(char: string) {
  return /[.,!]/.test(char);
}
