#include <libxml/tree.h>
#include <libxml/parser.h>

#include <string>
#include <iostream>

#include "Tokenization.h"
#include "Segmentation.h"
#include "DocFilters.h"

using namespace std;

/**
 *
 */
static const char *guessFormatByExtension(const string &filename) {
  if (filename.find(".htm") != string::npos ||
      filename.find(".HTM") != string::npos) {
    return "html";
  }
  return "txt";
}

/**
 *
 */
static void dumpToFile(xmlDocPtr xml,
			  const char *filename,
			  const char *suffix) {
  string outputfile(filename);
  outputfile += ".";
  outputfile += suffix;
  FILE *file = fopen(outputfile.c_str(), "w");
  xmlDocDump(file, xml);
  fclose(file);
}

/**
 *
 */
static void dumpToFile(const string &s,
		       const char *filename,
		       const char *suffix) {
  string outputfile(filename);
  outputfile += ".";
  outputfile += suffix;
  FILE *file = fopen(outputfile.c_str(), "w");
  fprintf(file, "%s", s.c_str());
  fclose(file);
}

/**
 *
 */
int main(int argc, char *argv[]) {

  for (int i = 1; i < argc; i++) {
    char *filename = argv[i];

    DocumentPtr document;

    {
      FILE *file = fopen(filename, "r");
      document = PreFilter(file, guessFormatByExtension(filename));
      fclose(file);
    }

    xmlDocPtr xml = document->xmltree;
    {
       doTokenization(xml);
       dumpToFile(xml, filename, "tokenization");
    }

    {
       doSegmentation(xml);
       dumpToFile(xml, filename, "segmentation");
    }

    {
      string output = PostFilter(document, guessFormatByExtension(filename));
      dumpToFile(output, filename, "postfilter");
    }
    
    delete document;
  }

  return 0;
}
