const HTML_CONTENT_REGEX = /^```html\s*([\s\S]*?)\s*```$/gm;

export const isHtmlContent = (text: string): boolean =>
  HTML_CONTENT_REGEX.test(text);

export function extractHtmlContent(responseText: string): string {
  return responseText.replace(HTML_CONTENT_REGEX, "$1").trim();
}

export const isElement = (node: ChildNode): node is Element =>
  node.nodeType === Node.ELEMENT_NODE;

// Tokenize HTML content into an array of tokens
export const tokenizeHtml = (html: string): string[] => {
  const div = document.createElement("div");
  div.innerHTML = html;

  const tokens: string[] = [];
  const walkNodes = (node: ChildNode) => {
    if (node.nodeType === Node.TEXT_NODE) {
      const words = (node.textContent || "").match(/\S+\s*/g) || [];
      tokens.push(...words);
    } else if (isElement(node)) {
      tokens.push(node.outerHTML.match(/^<[^>]+>/)?.[0] || "");

      node.childNodes.forEach(walkNodes);

      const tagName = node.tagName.toLowerCase();
      tokens.push(`</${tagName}>`);
    }
  };

  div.childNodes.forEach(walkNodes);
  return tokens;
};
