#include <iostream>
#include <stdexcept>
#include <sstream>
#include <vector>

#include <boost/progress.hpp>

#include <unistd.h>
#include <libxml/xmlreader.h>

#include <outilex/dic_entry.h>
#include <outilex/xml.h>
#include <outilex/dic_indexer.h>


using namespace std;
using namespace boost;


void my_unexpected() {
  cerr << "unexpected exception!\n";
  terminate();
}



void usage(char * progname) {
  cout << "usage : " << progname << " [-validate] [-ratio <r>] <dicofile>\n";
}


int main(int argc, char ** argv) {

  char * progname = *argv;
  char * filename = NULL;

  bool validate = false;

  double ratio = 5. / 3.;
  int next = 100000;

  set_unexpected(my_unexpected);

  argv++, argc--;

  if (argc == 0) { usage(progname); return 1; }

  while (argc) {

    if (**argv == '-') {

      string arg = *argv;

      if (arg == "-validate") {

        validate = true;

      } else if (arg == "-h" || arg == "-help" || arg == "-?") {

        usage(progname);
        return 0;

      } else if (arg == "-ratio") {

        argv++, argc--;
        if (argc == 0) {
          cerr << "error: -ratio needs an arg\n";
          exit(1);
        }
        ratio = atof(*argv);

      } else {

        cerr << "unknow option : " << *argv << endl;
        exit(1);
      }

    } else { filename = *argv; }

    argv++, argc--;
  }

  if (filename == NULL) {
    cerr << "no dico specified\n";
    exit(1);
  }


  LIBXML_TEST_VERSION;

  xml_status xmlstatus;

  dic_indexer indexer;


  int nbentries = 0;
  int nbflex = 0;

  cerr << "ratio = " << ratio << ", next = " << next << '\n';

  try {

    xmlTextReaderPtr reader = xmlReaderForFile(filename, NULL, 0); // XML_PARSE_DTD_VALID ...

    if (! reader) {
      cerr << "unable to open '" << filename << "'\n";
      exit(1);
    }

    if (validate) { xmlTextReaderSetParserProp(reader, XML_PARSER_VALIDATE, 1); }

    xmlTextReaderSetErrorHandler(reader, xml_reader_default_error_handler, & xmlstatus);

    progress_timer pt;
    timer t;

    int ret; /*, valid; */
    while (((ret = xmlTextReaderRead(reader)) == 1)) {

      if ((xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT)
          && (xmlStrcmp(xmlTextReaderConstName(reader), (const xmlChar *) DIC_ENTRY) == 0)) {

        xmlNodePtr cur = xmlTextReaderExpand(reader);

        xmlcheck(xmlstatus);

     
        //  cerr << "loading new entry...\n";
        dic_entry entry(cur);

        nbflex += entry.inflecteds.size();

        indexer.add_entry(entry);

        //cerr << "entry indexed\n";

        if (indexer.size() > next) {
          indexer.minimize();
          next = max(next, (int) ((double) indexer.size() * ratio));
          cerr << "next = " << next << '\n';
        }

        nbentries++;

        if ((nbentries % 1000) == 0) {
          cerr << nbentries << " entries (" << t.elapsed() << " s)\n";
          t.restart();
	}

        xmlTextReaderNext(reader);
      }
      xmlcheck(xmlstatus);
    }

    xmlcheck(xmlstatus);
 
    xmlFreeTextReader(reader);

    if (ret == -1) {
      cerr << "parsing error with '" << filename << "'\n"; 
      cerr << '\n' << nbentries << " entries parsed\n";
      exit(1);
    }

  } catch (xml_parse_error & error) {
    cerr << "XML parsing error : " << error.what() << '\n';
    exit(1);
  } catch (exception & e) {
    cerr << "caught an exception : " << e.what() << " (" << nbentries << " entries) \n";
    exit(1);
  } catch (...) {
    cerr << "caught an OVNI?\n"; 
    exit(1);
  }

  indexer.minimize();
  cerr << '\n' << nbentries << " entries parsed. " << nbflex << "inflected forms\n";

  //  indexer.dump(cerr);

  string pfx(filename);
  if (pfx.substr(pfx.size() - 3) == ".gz") { pfx.erase(pfx.size() - 3); }
  if (pfx.substr(pfx.size() - 4) == ".xml") { pfx.erase(pfx.size() - 4); }
  pfx += ".idx";

  indexer.write(pfx);

  return 0;
}

