
#include "DocFilters.h"
#include "ParseHTML.h"
#include "document.h"


documentHTML *pCurrentDocH;
extern std::string CheckUnicodeSignature(DocumentPtr D);


// Check signature of buffer to detect BOM (unicode signature)
// return a UTF-8 equivalent string, without BOM
//
// UTF-32 is not yet recognized
//

DocumentPtr PreFilterHTML(DocumentPtr D) {

  std::string utf8_text = CheckUnicodeSignature(D);
  if (utf8_text.length()) {
    D->buffer = utf8_text;
    D->mimeencoding = "UTF-8";
  }

  xmlNodePtr root = xmlDocGetRootElement(D->xmltree);
  while (root && root->type != XML_ELEMENT_NODE)
    root=root->next;
  if (!root){
    fprintf(stderr,"Error in internal filter (2)\n");
    return 0;
  }

  documentHTML Doc(root);
  pCurrentDocH = &Doc;

  int ec;
  if ((ec = ecHtmlParse(D->buffer.c_str(),D)) != ecOK) {
    if (ec != ecABORTED) fprintf(stderr,"Error during html parsing\n");
    delete D;
    return NULL;
  }

  return D;
}
