import markdownit, { type PluginSimple } from 'markdown-it'
import { MarkdownParser } from 'prosemirror-markdown'
import { parseSpec } from '../MarkdownEditor/parser'
import { schema } from './schema'

/**
 * This Markdown-it plugin adds support for grounding claims indicators in a markdown document.
 * It tokenizes a claim in the form of `<CLAIM:claimId:sourceId:start:end>` where:
 * - `id` is the claim id
 * - `start` is the start position of the claim in the plaintext document
 * - `end` is the end position of the claim in the plaintext document
 *
 * Before updating this plugin (or creating a new plugin) it's worth having a read
 * of the markdown-it docs to understand how rules are applied:
 * https://github.com/markdown-it/markdown-it/blob/master/docs/architecture.md
 */
const claimPlugin: PluginSimple = (md) => {
  md.inline.ruler.after('emphasis', 'claim', (state) => {
    const max = state.posMax
    const start = state.pos

    // Process the claim if there are no open/close emphasis tokens
    // before the claim in the current block.
    const claimRegex = /(?<![*_].*)<CLAIM(_START)?:(\d+)(:(\d+):(\d+):(\d+))?>/
    let textToSearch = state.src.slice(start, max)
    const match = claimRegex.exec(textToSearch)
    /**
     * There are no claims in this block so return false to continue with the next rule.
     */
    if (!match) {
      return false
    }

    const matchPosition = textToSearch.search(claimRegex)
    const [claimText, isClaimStart, claimId, , sourceId, claimStart, claimEnd] = match

    const claimRange = [parseInt(claimStart), parseInt(claimEnd)]

    // Push the text before the claim into a text token
    const startText = textToSearch.slice(0, matchPosition)
    const startToken = state.push('text', '', 0)
    startToken.content = startText

    // Push the claim into a claim token
    const tokenType = isClaimStart ? 'claim_start' : 'claim'
    const tokenSo = state.push(tokenType, tokenType, 0)
    tokenSo.meta = { claimId, sourceId, claimRange }

    // Update the text to search and run the regex again
    textToSearch = textToSearch.slice(matchPosition + claimText.length)

    // Increment the position to the end of what has been scanned
    state.pos = state.posMax - textToSearch.length

    return true
  })
}

export const groundingMarkdownTokenizer = markdownit().use(claimPlugin)

export const parser = new MarkdownParser(schema, groundingMarkdownTokenizer, {
  ...parseSpec,
  claim: {
    node: 'claim',
    getAttrs: (tok) => ({
      claimId: tok.meta.claimId,
      sourceId: tok.meta.sourceId,
      range: tok.meta.claimRange,
    }),
  },
  claim_start: {
    node: 'claim_start',
    getAttrs: (tok) => ({
      claimId: tok.meta.claimId,
    }),
  },
})
