import * as Sentry from '@sentry/browser'

// Regex to identify all HTML tags and text nodes
const matchHtmlTagsRegex = /<\/?[^>]+(>|$)/g // just tags
const matchWordsAndTagsRegex = /<[^>]+>+[^\S\r\n]?|([^<\s]+[^\S\r\n]?)/g // word tokens and html tags (including single horizontal trailing whitespace)

// Remove punctuation and lowercase from a string
const matchCleaner = (str: string | null) =>
  str == null
    ? null
    : str
        .replace(/[.,/#!$%^&*;:{}=\-_`~()]/g, '')
        .trim()
        .toLowerCase()

export default function htmlTokenSequenceHighlighter(mainHtmlString: string, highlightHtmlStrings: string[]) {
  // Convert the highlightHtmlStrings into lists of searchList tokens
  const searchLists: string[][] = []
  highlightHtmlStrings.forEach((highlightString) => {
    // Create a flat list with separate tokens for words and HTML tags
    const searchSegments = [...highlightString.matchAll(matchWordsAndTagsRegex)]

    // Create a filtered searchList that does not include HTML tags
    const searchList = searchSegments.map((match) => match[0]).filter((segment) => !segment.match(matchHtmlTagsRegex))

    // Split any searchList greater than X elements into smaller lists.
    // TODO: Opportunity to improve sequence matching by correcting matching errors rather than splitting the list
    const minListSize = 20
    const maxListLength = minListSize * 2 - 1

    // IF the searchList is greater than maxListLength,
    // THEN: split it into smaller lists
    if (searchList.length > maxListLength) {
      const splitSearchLists = splitArray(searchList, minListSize, maxListLength)
      // console.log('Original searchList: ', searchList)
      // console.log('Split searchLists: ', splitSearchLists)

      searchLists.push(...splitSearchLists)
    } else {
      searchLists.push(searchList)
    }
  })

  // HTML Document Tokens
  // Create a flat list with separate tokens for words and HTML tags
  // Create a filtered htmlList that does not include HTML tags, and an indexMap that contains the original index of the item
  const htmlSegments = [...mainHtmlString.matchAll(matchWordsAndTagsRegex)]
  const htmlList = htmlSegments.map((match) => match[0])
  const filteredHtmlList: string[] = []
  const indexMap: number[] = []
  htmlList.forEach((htmlSegment, index) => {
    if (!htmlSegment.match(matchHtmlTagsRegex)) {
      filteredHtmlList.push(htmlSegment)
      indexMap.push(index)
    }
  })
  // console.log('htmlList: ', htmlList)
  // console.log('filteredHtmlList: ', filteredHtmlList)
  // console.log('indexMap: ', indexMap)

  // If filteredHtmlList is empty, return the original htmlString
  if (filteredHtmlList.length == 0) {
    console.log('Highlighter: Source does not include any content when HTML tags are removed. Original HTML: ', htmlList)
    return mainHtmlString
  }

  // HTML Highlight Indices
  // List of list of htmlList element indices that need to be wrapped in a <mark> tag
  // Each matched searchList will generate a list of indices
  const highlightedHtmlListIndices: number[][] = []

  // For each searchList, find the matching sequence in the htmlList
  searchLists.forEach((searchList): void => {
    // console.log('matching searchList: ', searchList)

    // SEQUENCE MATCHING
    // Match the entire search token sequence with the htmlList stripped of html tags
    //
    // Match Sequence:
    // Look for the sequence of consecutive matches between the searchList and the filteredHtmlList
    // To store the original indices of matching sequences
    const searchListLength = searchList.length
    for (let i = 0; i < filteredHtmlList.length - searchListLength; i++) {
      let matchFound = true

      for (let j = 0; j < searchListLength; j++) {
        if (matchCleaner(filteredHtmlList[i + j]) != matchCleaner(searchList[j])) {
          matchFound = false
          break
        }
      }

      if (matchFound) {
        // Map back to original indices
        const originalIndices = indexMap.slice(i, i + searchListLength) // Get the original indices of the matching sequence
        highlightedHtmlListIndices.push(originalIndices)
      }
    }
    // console.log('Matching indices: ', highlightedHtmlListIndices)

    if (highlightedHtmlListIndices.length == 0) {
      // console.warn('No highlight matches found for searchList: ', searchLists)

      Sentry.captureException(new Error(`Client highlighter: No matches found for searchList.`), {
        extra: {
          exceptionDescription: "The highlighting algorigthm couldn't find any matches for the searchList.",
          searchLists: JSON.stringify(searchLists),
          filteredHtmlList: JSON.stringify(filteredHtmlList),
        },
      })
    }
  })

  // After all searchLists have been processed
  // Wrap the matched text elements in <mark> tags for highlighting
  highlightedHtmlListIndices.forEach((group) => {
    // For each group
    // Create an initial anchor tag
    // Then highlight all the elements in the group

    group.forEach((htmlListIndex, thisIndex) => {
      if (thisIndex == 0) {
        htmlList[htmlListIndex] = `<mark class="ref-highlight ref-highlight-start">${htmlList[htmlListIndex]}</mark>`
      } else {
        htmlList[htmlListIndex] = `<mark class="ref-highlight">${htmlList[htmlListIndex]}</mark>`
      }
    })
  })
  // console.log('highlightedHtmlListIndexes: ', highlightedHtmlListIndices)

  // Recombine the htmlList into a string
  const highlightedHtml = htmlList.join('')

  return highlightedHtml
}

/**
 * Split Array
 * @param inputArray
 * @param minSize
 * @param maxListLength
 * @returns
 */
function splitArray<T>(inputArray: T[], minSize: number, maxListLength: number): T[][] {
  if (minSize < 1) {
    throw new Error('Minimum size must be at least 1')
  }

  const result: T[][] = []
  for (let i = 0; i < inputArray.length; i += minSize) {
    // Check if the remaining elements are fewer than maxListLength
    if (inputArray.length - i <= maxListLength) {
      // Add the remaining elements as the last chunk and break the loop
      const lastChunk = inputArray.slice(i)
      result.push(lastChunk)
      break
    }

    // Proceed as normal if there are still more than maxListLength elements remaining
    const chunk = inputArray.slice(i, i + minSize)
    result.push(chunk)
  }

  return result
}
