import { visit } from 'unist-util-visit'
import type { Root, Text, ElementContent } from 'hast'

export const kAgentCitationRegex = /\[ref_[a-zA-Z0-9]{6}\]/g

/**
 * Rehype Agent Citations
 *
 * This rehype plugin converts instances of "[ref_xxxxxx]" (6 alphanumeric chars) into <span> elements,
 * with data attributes for the citation_id and the index for the order it was encountered in the content.
 *
 * The <Markdown /> component can then replace these span elements with
 * react components inside its `component` property.
 *
 * Example usage:
 *
 * ```tsx
 * import { rehypeAgentCitations } from '...'
 * import { rehypeRaw } from 'rehype-raw'
 * import { Markdown } from 'react-markdown'
 *
 * <Markdown
 *  rehypePlugins={[rehypeRaw, rehypeAgentCitations]}
 *  components={{
 *   span: ({ node, ...props }) => {
 *    if ('data-agent-citation_id' in props) {
 *     const citation_id = props['data-agent-citation_id'] as string
 *    ...
 *    }
 *  }}
 * >
 * {content}
 * </Markdown>
 * ```
 *
 * @returns
 */
export function rehypeAgentCitations() {
  // Hold a unique set of citation_ids
  const citationsSet = new Set<string>()

  // Track occurrence counts for each citation ID
  const citationCounts: Record<string, number> = {}

  return (tree: Root) => {
    visit(tree, 'text', (node: Text, index: number | undefined, parent) => {
      // Match all citation references "[ref_xxxxxx]"
      const matches = node.value.matchAll(kAgentCitationRegex)

      // Array to hold new nodes
      const children: ElementContent[] = []
      let lastIndex = 0

      // For every citation reference match
      for (const match of matches) {
        // Get the start and end positions of the match
        const start = match.index
        if (start === undefined) continue
        const end = start + match[0].length

        // Extract the ref_xxxxxx portion without the brackets []
        const matched_citation_id = match[0].slice(1, -1)

        // Add the citation_id to the set
        citationsSet.add(matched_citation_id)

        // Track the occurrence count for this citation ID
        // Instantiate if new
        if (!citationCounts[matched_citation_id]) {
          citationCounts[matched_citation_id] = 0
        }
        // Increment if existing
        const currentIndex = citationCounts[matched_citation_id]++

        // Push preceding text if any
        if (start > lastIndex) {
          children.push({
            type: 'text',
            value: node.value.slice(lastIndex, start),
          })
        }

        // Push a span with a data attribute for the citation
        children.push({
          type: 'element',
          tagName: 'span',
          properties: {
            className: ['agent-citation'],
            'data-agent-citation_id': matched_citation_id,
            'data-agent-citation_id_index': currentIndex,
          },
          children: [{ type: 'text', value: matched_citation_id }],
        })

        lastIndex = end
      }

      // Push remaining text if any
      if (lastIndex < node.value.length) {
        children.push({
          type: 'text',
          value: node.value.slice(lastIndex),
        })
      }

      if (parent === undefined || index === undefined) {
        return
      }

      // Replace the original text node with the new nodes
      parent.children.splice(index, 1, ...children)
    })
  }
}
