import * as docx from 'docx';

const SPACING_SINGLE = { before: 100, after: 100 };
const SPACING_DOUBLE = { before: 200, after: 200 };

export const parseHtmlToDocx = (element: HTMLElement): docx.Paragraph[] => {
  const sanitizeText = (text?: string | null) => {
    return text?.replace(/ +(?= )/g, '') || '';
  };

  const parseInlineHtml = (element: HTMLElement) => {
    const textRuns: docx.TextRun[] = [];

    element.childNodes.forEach((node, i) => {
      if (node.nodeType === Node.ELEMENT_NODE) {
        const el = node as HTMLElement;
        const text = sanitizeText(el.innerText);

        if (text.trim() === '' || el.tagName === 'br') {
          textRuns.push(new docx.TextRun({ text: '', break: 1 }));
        } else {
          switch (el.tagName.toLowerCase()) {
            case 'b':
              textRuns.push(new docx.TextRun({ text, bold: true }));
              break;
            case 'i':
              textRuns.push(new docx.TextRun({ text, italics: true }));
              break;
            case 'u':
              textRuns.push(
                new docx.TextRun({
                  text,
                  underline: { type: docx.UnderlineType.SINGLE },
                }),
              );
              break;
            default:
              textRuns.push(new docx.TextRun({ text }));
              break;
          }
        }
      } else if (node.nodeType === Node.TEXT_NODE) {
        const text = sanitizeText(node.nodeValue);
        if (text.trim() !== '') textRuns.push(new docx.TextRun({ text }));
      }
    });

    return textRuns;
  };

  const paragraphs: docx.Paragraph[] = [];

  element.childNodes.forEach((node) => {
    if (node.nodeType === Node.ELEMENT_NODE) {
      const el = node as HTMLElement;
      switch (el.tagName.toLowerCase()) {
        case 'ul':
          el.querySelectorAll('li').forEach((li, i) => {
            const spacing = i === 0 ? { before: 200, after: 100 } : SPACING_SINGLE;
            paragraphs.push(
              new docx.Paragraph({
                children: parseInlineHtml(li as HTMLElement),
                bullet: { level: 0 },
                spacing,
              }),
            );
          });
          paragraphs.push(new docx.Paragraph({}));
          break;
        case 'h1':
          paragraphs.push(
            new docx.Paragraph({
              children: parseInlineHtml(el),
              heading: docx.HeadingLevel.HEADING_1,
              spacing: SPACING_DOUBLE,
            }),
          );
          break;
        case 'h2':
          paragraphs.push(
            new docx.Paragraph({
              children: parseInlineHtml(el),
              heading: docx.HeadingLevel.HEADING_2,
              spacing: SPACING_DOUBLE,
            }),
          );
          break;
        case 'h3':
          paragraphs.push(
            new docx.Paragraph({
              children: parseInlineHtml(el),
              heading: docx.HeadingLevel.HEADING_3,
              spacing: SPACING_DOUBLE,
            }),
          );
          break;
        case 'h4':
          paragraphs.push(
            new docx.Paragraph({
              children: parseInlineHtml(el),
              heading: docx.HeadingLevel.HEADING_4,
              spacing: SPACING_DOUBLE,
            }),
          );
          break;
        case 'h5':
          paragraphs.push(
            new docx.Paragraph({
              children: parseInlineHtml(el),
              heading: docx.HeadingLevel.HEADING_5,
              spacing: SPACING_DOUBLE,
            }),
          );
          break;
        case 'h6':
          paragraphs.push(
            new docx.Paragraph({
              children: parseInlineHtml(el),
              heading: docx.HeadingLevel.HEADING_6,
              spacing: SPACING_DOUBLE,
            }),
          );
          break;
        default:
          paragraphs.push(
            new docx.Paragraph({
              children: parseInlineHtml(el),
              spacing: SPACING_SINGLE,
            }),
          );
          break;
      }
    }
  });

  return paragraphs;
};
