#include <iostream>
#include <string>

#include <boost/lexical_cast.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/filesystem/convenience.hpp>

#include <outilex/xmlReader.h>
#include <outilex/xmlWriter.h>
#include <outilex/xml.h>


using namespace std;
using namespace boost;

namespace fs = boost::filesystem;



namespace {

xmlwriter writer;
int pos = 0;
bool addsentence = false;


void startDocument(void * d) {
  writer.start_document();
}

void endDocument(void * d) {
  writer.end_document();
}


void startElement(void * ctx, const xmlChar * name, const xmlChar ** atts) {

  if (xmlStrcmp(name, "p") == 0) { return; }

  if (addsentence && xmlStrcmp(name, "tu") == 0) { // if addsentence, don't keep older sentence elements
    return;
  }

  if (xmlStrcmp(name, "token") == 0) {
    writer.start_element("p");
    writer.write_attribute("pos", lexical_cast<string>(pos));
    pos++;
    writer.end_element();
  }


  writer.start_element(name);

  if (atts != NULL) {
    for (int i = 0; atts[i]; ++i) {
      if (atts[i + 1]) {
        writer.write_attribute(atts[i], atts[i+1]);
        ++i;
      }
    }
  }

  if (addsentence && xmlStrcmp(name, "par") == 0) { // addsentence surounding 'par' element
    writer.start_element("tu");
  }
}


void endElement(void * ctx, const xmlChar * name) {

  if (xmlStrcmp(name, "p") == 0) { return; }

  if (addsentence && xmlStrcmp(name, "tu") == 0) { return; }


  /* add position at end of paragraph and sentence too */

  if ((xmlStrcmp(name, "par") == 0) || (xmlStrcmp(name, "tu") == 0)) {
    writer.start_element("p");
    writer.write_attribute("pos", lexical_cast<string>(pos));
    pos++;
    writer.end_element();

    if (addsentence) { writer.end_element(); /* where in end of 'par' -> add end of 'sentence' */ }
  }

  writer.end_element();
}


void characters(void * ctx, const xmlChar * ch, int len) {

  string str((const char *) ch, len);
  writer.write_string(str);
}

void cdataBlock(void * ctx, const xmlChar * value, int len) {

  string str((const char *) value, len);
  writer.write_CDATA(str);
}


/*
 * SAX block
 */

xmlSAXHandler mySAXhandler = {
    NULL, /* internalSubset */
    NULL, /* isStandalone */
    NULL, /* hasInternalSubset */
    NULL, /* hasExternalSubset */
    NULL, /* resolveEntity */
    NULL, /* getEntity */
    NULL, /* entityDecl */
    NULL, /* notationDecl */
    NULL, /* attributeDecl */
    NULL, /* elementDecl */
    NULL, /* unparsedEntityDecl */
    NULL, /* setDocumentLocator */
    startDocument, /* startDocument */
    endDocument, /* endDocument */
    startElement, /* startElement */
    endElement, /* endElement */
    NULL, /* reference */
    characters, /* characters */
    NULL, /* ignorableWhitespace */
    NULL, /* processingInstruction */
    NULL, /* comment */
    NULL, /* xmlParserWarning */
    NULL, /* xmlParserError */
    NULL, /* xmlParserError */
    NULL, /* getParameterEntity */
    cdataBlock, /* cdataBlock; */
    NULL, /* externalSubset; */
    1,
    NULL,
    NULL, /* startElementNs */
    NULL, /* endElementNs */
    NULL  /* xmlStructuredErrorFunc */
};


char * progname;

void usage() {
  cout << "usage: " << progname << " [-o <output> | -addsentence ] <tokenized text>\n";
  exit(0);
}

} // namespace anonymous



int main(int argc, char ** argv) {

  progname = *argv;
  fs::path inpath, outpath;

  argv++, argc--;

  if (! argc) { usage(); }


  while (argc) {
    
    string arg = *argv;
 
    if (arg == "-o") {
      
      argv++, argc--;
      if (! argc) { usage(); }
      
      outpath = fs::path(*argv, fs::native);
    
    } else if (arg == "-addsentence") {
    
      addsentence = true;
    
    } else if (arg == "-h") {
    
      usage();
    
    } else {
      inpath = fs::path(arg, fs::native);
    }
    argv++, argc--;
  }
 
  if (inpath.empty()) {
    cerr << "bad args\n";
    exit(1);
  }
 
  if (outpath.empty()) {
  /*
    string fname = fs::basename(inpath) + "-pos.xml";
    outpath = inpath.branch_path() / fname;
    */
    outpath = fs::change_extension(inpath, ".pos");
  }

  writer.open(outpath);
  pos = 0;
  
  int res = xmlSAXUserParseFile(& mySAXhandler, NULL, inpath.native_file_string().c_str());

  if (res) { cerr << "some error occured\n"; }

  return res;
}

