import {
  normalizeBody,
  normalizeSpacing,
  normalizeStyles,
  splitTextBlocks,
} from '../lib'

export function isMatch(html: string) {
  return html.includes('<html xmlns="http://www.w3.org/1999/xhtml">')
}

export function normalizePdf(document: Document) {
  // Boldness from font family
  const spanEls = document.querySelectorAll<HTMLSpanElement>('span')
  for (const spanEl of spanEls) {
    const { fontFamily } = spanEl.style
    if (fontFamily.includes('Bold')) {
      const boldEl = document.createElement('strong')
      boldEl.replaceChildren(...spanEl.childNodes)
      spanEl.append(boldEl)
    }
  }
}

export function normalizeHtml(html: string) {
  const document = new DOMParser().parseFromString(html, 'text/html')

  // Safely guess at the format
  normalizePdf(document)

  // Standard formatting
  normalizeBody(document)
  splitTextBlocks(document)
  normalizeSpacing(document)
  normalizeStyles(document)

  return document.body.innerHTML
}
