#include <iostream>
#include <boost/filesystem/path.hpp>
#include <boost/filesystem/convenience.hpp>
#include <boost/scoped_ptr.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/progress.hpp>

#include <cstdlib>

#include <outilex/lingdef.h>
#include <outilex/text_fsa.h>

#include <outilex/ugrammar.h>
#include <outilex/text_uchart.h>
#include <outilex/xml_text_uchart.h>

#include <outilex/topdown_parser.h>
#include <outilex/earley_parser.h>
//#include <outilex/earley2_parser.h>


using namespace std;
using namespace boost;

namespace fs = boost::filesystem;


namespace {

char * progname;

void usage() {
  cout << "\nusage: " << progname << " [options] -g <grammar> (<textfsa>|<textchart>)\n"
    "\n"
    "apply the specified decorated RTN grammar to the specified text FSA or chart.\n"
    "the result is stored in a chart named as the input text file with the .chart extension.\n"
    "\n"
    "options:\n"
    "\n"
    "-o <out>     : specify the name of the resulting chart\n"
    "-l <lingdef> : specify the tagset description file,\n"
    "               if not specified look for LINGDEF environment variable\n"
    "-earley      : EARLEY parsing (default)\n"
    "-earley2     : parse using another (modified) EARLEY algorithm (which don't work)\n"
    "-topdown     : use a simple top-down parsing algorithm (do not handle left-recursive grammar)\n"
    "-max <n>     : stop parsing at the n-ieme sentence\n"
    "-clean       : do a chart cleanup, dismiss useless computed syntagms and keep longest\n"
    "               and best matches (this is the default)\n"
    "-dontclean   : do not clean the result chart\n"
    "-wellformed  : keep only successfull parse with well-formed feature structure\n"
    "-gz          : compress the resulting chart\n"
    "-surf        : apply the grammar from every positions in the text (default is to begin the parse from\n"
    "-v           : verbose mode\n"
    "               the start of each sentence)\n"
    "\n";
  exit(0);
}

} // anonymous namespace


enum parse_type { TOPDOWN, EARLEY };

int main(int argc, char ** argv) try {

  fs::path itextpath, lingdefpath, grammarpath, opath;
  int cleanflags = CLEAN_BESTMATCH|CLEAN_LONGESTMATCH|CLEAN_WELLFORMED;
  int compression = 0;
  bool surf = false;
  parse_type PARSER_TYPE = EARLEY;
  bool verbose = false;

  int maxsentence = std::numeric_limits<int>::max();

  //enum { TEXT_FSA, TEXT_CHART, NONE } input_type = NONE;

  char * text = getenv("LINGDEF");
  if (text) {
    lingdefpath = fs::path(text, fs::native);
  }


  progname = *argv;
  argv++, argc--;

  if (argc == 0) { usage(); }

  while (argc) {

    string arg = *argv;

    if (arg == "-l") {

      argv++, argc--;
      if (argc == 0) { cerr << "bad args\n"; exit(1); }
      lingdefpath = fs::path(*argv, fs::native);

    } else if (arg == "-o") {

      argv++, argc--;
      if (argc == 0) { cerr << "bad args\n"; exit(1); }
      opath = fs::path(*argv, fs::native);

    } else if (arg == "-g") {

      argv++, argc--;
      if (argc == 0) { cerr << "bad args\n"; exit(1); }
      grammarpath = fs::path(*argv, fs::native);

    } else if (arg == "-clean") {

      cleanflags |= CLEAN_BESTMATCH|CLEAN_LONGESTMATCH;

    } else if (arg == "-dontclean") {

      cleanflags = 0;

    } else if (arg == "-wellformed") {

      cleanflags |= CLEAN_WELLFORMED;

    } else if (arg == "-h") {

      usage();

    } else if (arg == "-gz") {
    
      compression = 6;

    } else if (arg == "-surf") {
    
      surf = true;

    } else if (arg == "-earley") {

      PARSER_TYPE = EARLEY;
    
    } else if (arg == "-topdown") {
    
      PARSER_TYPE = TOPDOWN;

    } else if (arg == "-max") {

      argv++, argc--;
      if (argc == 0) { cerr << "bad args\n"; exit(1); }
      maxsentence = lexical_cast<int>(*argv);

    } else if (arg == "-v") {

      verbose = true;

    } else {
      if (arg[0] == '-') {
        cerr << "error: unknow argument '" << arg << "'\n";
        exit(1);
      }
      // text fsa
      itextpath = fs::path(arg, fs::native);
    }

    argv++, argc--;
  }


  if (itextpath.empty() || lingdefpath.empty() || grammarpath.empty()) {
    cerr << "arguments missing\n"; exit(1);
  }


  if (opath.empty()) {

    string leaf = itextpath.leaf();
    string::size_type dot = leaf.rfind('.');

    if (dot != string::npos && leaf.substr(dot) == ".gz") {
      leaf.erase(dot);
      dot = leaf.rfind('.');
    }
    if (dot != string::npos && leaf.substr(dot) == ".fsa") {
      leaf.erase(dot);
    }

    leaf += ".chart";
    if (compression) { leaf += ".gz"; }

    opath = itextpath.branch_path() / leaf;
  }



  ling_def lingdef(lingdefpath);

  unification_init(& lingdef);

  boost::timer timr;

  cout << "loading grammar " << grammarpath.string() << "...\n";
  ugrammar gram(grammarpath, & lingdef);
  cout << "done. " << timr.elapsed() << "s.\n";


  cerr << "axiom = " << gram.start_name() << endl;

  cout << "init parser ...\n";

  //uparser * p_parser;
  scoped_ptr<uparser> p_parser;
  switch (PARSER_TYPE) {

  case TOPDOWN:
    cout << "TOP DOWN parsing\n";
    p_parser.reset(new topdown_parser(gram));
    break;

  case EARLEY:
    cout << "EARLEY parsing\n";
    p_parser.reset(new earley_parser(gram));
    break;

  default:
    cerr << "unknow parser mode ?\n";
    exit(1);
    break;
  }
 

  uparser & parser = *p_parser;

  /* parsing */

  int sentenceno = 0, nbmatches = 0;
  timr.restart();

  //xml_itext_uchart itext(itextpath, & lingdef);
 
  cout << "loading text ...\n";
  scoped_ptr<itext_uchart> p_itext(new_itext_uchart(itextpath, & lingdef));
  itext_uchart & itext = *p_itext;

  int total = std::min(itext.size(), maxsentence);

  xml_otext_uchart ochart(opath, total, compression);

  cout << "ok. parsing...\n";

  progress_display show_progress(total, cout);

  uchart chart;

  boost::timer ntimer;
  int nmatches;

  while (itext >> chart) {

    if (verbose) {
      cout << "sentence #" << sentenceno << "/" << total << "\n";
      cout << "text = " << chart.fsa.text << endl;
      ntimer.restart();
    }

    parser.parse(chart, surf);

    if (cleanflags) { chart.cleanup(gram.start_name(), cleanflags); }

    ochart << chart;

    nmatches = 0;
    for (int q = 0; q < chart.size(); ++q) {
      nmatches += chart[q].synt_by_name.count(gram.start_name());
      if (! surf) { break; }
    }
    nbmatches += nmatches;

    if (verbose) {
      cout << nmatches << " analyse(s). " << ntimer.elapsed() << "s.\n\n";
    } else {
      ++show_progress;
    }

    ++sentenceno;

    if (sentenceno == maxsentence) { break; }
  }

    cerr << "cleanflags = " << cleanflags << endl;

  cout << "done. " << sentenceno << " sentences. " << nbmatches << " analyse(s). "
    << timr.elapsed() << "s.\n";

  return 0;

} catch (exception & e) {

  cerr << "caught an exception : " << e.what() << endl;
  exit(1);

} catch (...) { cerr << "ouch!\n"; exit(1); }

