/**
 * Function to export the tags as a JSONL file of NER annotations,
 * which can be imported into Prodigy and Spacy to train our NER model.
 */
import saveAs from 'file-saver'
import { type Tag } from '../types'

export interface AnnotatedSpan {
  start: number
  end: number
  label: string
  text: string
  id: number
}

export interface AnnotatedText {
  text: string
  spans: AnnotatedSpan[]
}

export const exportAnnotations = (
  filename: string,
  text: string,
  inputDiff: Array<[string, number, Tag | null]>
): void => {
  const docOffset = inputDiff[0][1]
  const spans: AnnotatedSpan[] = inputDiff.map(
    ([cleartext, offset, tagOrNull]) => tagOrNull
  ).filter(
    (tagOrNull) => tagOrNull !== null
  ).map(
    (tag) => tag as Tag
  ).map(
    ({ cleartext, entityType, position, entityId }) => ({
      start: position[0] - docOffset,
      end: position[1] - docOffset,
      label: entityType,
      text: cleartext,
      // The ID will not be used for NER, but might serve
      // later for entity linking
      id: entityId ?? -1
    })
  )
  const annotatedText = { text, spans }
  const jsonlFilename = `${filename}.jsonl`
  // Download the JSONL file
  const blob = new Blob([JSON.stringify(annotatedText, null)], { type: 'application/json' })
  saveAs(blob, jsonlFilename)
}
