import markdownit, { type PluginSimple } from 'markdown-it'
import { MarkdownParser } from 'prosemirror-markdown'
import { parseSpec } from '../MarkdownEditor/parser'
import { schema } from './schema'
/**
 * An explanation on how markdown-it tokenises a markdown string:
 *
 * The tokeniser runs from the start to the end of a string, converting the string into a token stream and does something like:
 *
 * 1. Start at position 0, run through each rule in the chain
 *    a. If a rule doesn't modify the token stream, return false. It then moves on to the next rule in the chain.
 *    b. If a rule does modify the token stream, return true, and also mutate the position marker in the state (which is preserved between rule callbacks)
 * 2. Start at position N (the position we set in (1b)), and repeat step 1 until the entire source string is processed
 *
 * The output token stream is the markdown-it representation of the input text, e.g. **hello, world** becomes something like:
 *
 * [
 *   {type: 'strong_open', tag: 'strong', markup: '**', content: ''},
 *   {type: 'text', tag: '', markup: '', content: 'hello, world'},
 *   {type: 'strong_close', tag: 'strong', markup: '**', content: ''},
 * ]
 */

/**
 * This Markdown-it plugin adds support for grounding claims indicators in a markdown document.
 * It tokenizes a claim in the form of `<CLAIM:claimId:sourceId:start:end>` where:
 * - `id` is the claim id
 * - `start` is the start position of the claim in the plaintext document
 * - `end` is the end position of the claim in the plaintext document
 *
 * Before updating this plugin (or creating a new plugin) it's worth having a read
 * of the markdown-it docs to understand how rules are applied:
 * https://github.com/markdown-it/markdown-it/blob/master/docs/architecture.md
 */
const claimPlugin: PluginSimple = (md) => {
  md.inline.ruler.after('emphasis', 'claim', (state) => {
    const max = state.posMax
    const start = state.pos
    let textToSearch = state.src.slice(start, max)

    // Regex to find a string that opens/closes a markdown emphasis token.
    const emphasisOrStrongRegex = /((\s|^)[*_]{1,2}[^\s])|([^\s][*_]{1,2}(\s|$))/m
    const emphasisMatch = emphasisOrStrongRegex.exec(textToSearch)

    // Regex to find a serialized claim
    const claimRegex = /<CLAIM(_START)?:(\d+)(:(\d+):(\d+):(\d+))?>/
    const match = claimRegex.exec(textToSearch)

    const shouldProcessEmphasisFirst = emphasisMatch && match && emphasisMatch.index < match.index

    /**
     * By returning false we tell the markdown tokeniser to move on to the next rule.
     * In the case where there is an emphasis token that should be processed first,
     * we will come back to this rule after the emphasis token has been processed.
     */
    if (!match || shouldProcessEmphasisFirst) {
      return false
    }

    const matchPosition = textToSearch.search(claimRegex)
    const [claimText, isClaimStart, claimId, , sourceId, claimStart, claimEnd] = match

    const claimRange = [parseInt(claimStart), parseInt(claimEnd)]

    // Push the text before the claim into a text token
    const startText = textToSearch.slice(0, matchPosition)
    const startToken = state.push('text', '', 0)
    startToken.content = startText

    // Push the claim into a claim token
    const tokenType = isClaimStart ? 'claim_start' : 'claim'
    const tokenSo = state.push(tokenType, tokenType, 0)
    tokenSo.meta = { claimId, sourceId, claimRange }

    // Update the text to search and run the regex again
    textToSearch = textToSearch.slice(matchPosition + claimText.length)

    // Increment the position to the end of what has been scanned
    state.pos = state.posMax - textToSearch.length

    return true
  })
}

export const groundingMarkdownTokenizer = markdownit().use(claimPlugin)

export const parser = new MarkdownParser(schema, groundingMarkdownTokenizer, {
  ...parseSpec,
  claim: {
    node: 'claim',
    getAttrs: (tok) => ({
      claimId: tok.meta.claimId,
      sourceId: tok.meta.sourceId,
      range: tok.meta.claimRange,
    }),
  },
  claim_start: {
    node: 'claim_start',
    getAttrs: (tok) => ({
      claimId: tok.meta.claimId,
    }),
  },
})
