
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
#include <libxml/tree.h>

#include <list>
#include <string>
#include <sstream>
#include <stdio.h>

struct Document {
  std::string mimetype;
  std::string mimeencoding;
  //preprocess_state segmented;
  //preprocess_state translated;
  xmlDocPtr xmltree;
  std::string buffer;
  std::string original_document;
  std::string original_mimetype;
  int nbtu; /* nb of tu in document, set by segmentation */
  int nbtoken; /* nb of token in document, set by Tokenization */
  int nbword; /* nb of word in document, set by Tokenization */
  /* Language mapping in the document */
  struct lm {
    std::string language;
    int ratio;
  };
  std::list<lm> langmap;
  std::string mainlanguage; /* and main language */
  void *filter_data;
  void *original_filter_data;

  //const Filter *filter;
  //const Filter *original_filter;

  Document(const std::string &s="");
  ~Document();
};

typedef struct Document *DocumentPtr;

DocumentPtr PreFilter(FILE *, const char *type);
DocumentPtr PreFilter(const char *input, const char *type);
DocumentPtr PreFilterHTML(DocumentPtr D);
DocumentPtr PreFilterTXT(DocumentPtr D);
std::string PostFilter(DocumentPtr D, const char *type);
std::string PostFilterHTML(DocumentPtr D);
std::string PostFilterTXT(DocumentPtr D);

