#include <iostream>
#include <boost/filesystem/path.hpp>
#include <boost/filesystem/convenience.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/progress.hpp>

#include <cstdlib>

#include <outilex/lingdef.h>
#include <outilex/text_fsa.h>

#include <outilex/ugrammar.h>
#include <outilex/text_uchart.h>
#include <outilex/uparser.h>


using namespace std;
using namespace boost;

namespace fs = boost::filesystem;


namespace {

char * progname;

void usage() {
  cout << "\nusage: " << progname << " [options] -g <grammar> (-fsa <textfsa>|-chart <textchart>)\n"
    "\n"
    "apply the specified decorated RTN grammar to the specified text FSA or chart.\n"
    "the result is stored in a chart named as the input text file with the .chart extension.\n"
    "\n"
    "options:\n"
    "\n"
    "-o <out>     : specify the name of the resulting chart\n"
    "-l <lingdef> : specify the tagset description file,\n"
    "               if not specified look for LINGDEF environment variable\n"
    "-clean       : do a chart cleanup, dismiss useless computed syntagms and keep longest and best matches\n"
    "-dontclean   : do not clean the result chart\n"
    "-wellformed  : keep only successfull parse with well-formed feature structure\n"
    "-gz          : compress the resulting chart\n"
    "-surf        : apply the grammar from every positions in the text (default is to begin the parse from\n"
    "               the start of each sentence)\n"
    "\n";
  exit(0);
}

/*
void usage() {
  cout << "usage: " << progname
    << " [-clean|-dontclean] [-gz][-surf] -l <lingdef> -g <grammar> [ -o <out> ] [ -fsa <textfsa> | -chart <textchart> ]\n";
  exit(0);
}
*/

} // anonymous namespace


int main(int argc, char ** argv) try {

  fs::path itextpath, lingdefpath, grammarpath, opath;
  int cleanflags = 0;
  int clean = -1, compression = 0;
  bool surf = false;

  int maxsentence = std::numeric_limits<int>::max();

  enum { TEXT_FSA, TEXT_CHART, NONE } input_type = NONE;

  char * text = getenv("LINGDEF");
  if (text) {
    lingdefpath = fs::path(text, fs::native);
  }


  progname = *argv;
  argv++, argc--;

  if (argc == 0) { usage(); }

  while (argc) {

    string arg = *argv;

    if (arg == "-l") {

      argv++, argc--;
      if (argc == 0) { cerr << "bad args\n"; exit(1); }
      lingdefpath = fs::path(*argv, fs::native);

    } else if (arg == "-o") {

      argv++, argc--;
      if (argc == 0) { cerr << "bad args\n"; exit(1); }
      opath = fs::path(*argv, fs::native);

    } else if (arg == "-g") {

      argv++, argc--;
      if (argc == 0) { cerr << "bad args\n"; exit(1); }
      grammarpath = fs::path(*argv, fs::native);

    } else if (arg == "-clean") {

      cleanflags |= CLEAN_BESTMATCH|CLEAN_LONGESTMATCH;
      clean = 1;

    } else if (arg == "-dontclean") {

      clean = 0;

    } else if (arg == "-wellformed") {

      cleanflags |= CLEAN_WELLFORMED;

    } else if (arg == "-h") {

      usage();

    } else if (arg == "-gz") {
    
      compression = 6;

    } else if (arg == "-surf") {
    
      surf = true;

    } else if (arg == "-fsa") {

      argv++, argc--;
      if (argc == 0) { cerr << "bad args\n"; exit(1); }
      itextpath = fs::path(*argv, fs::native);
      input_type = TEXT_FSA;

    } else if (arg == "-chart") {

      argv++, argc--;
      if (argc == 0) { cerr << "bad args\n"; exit(1); }
      itextpath = fs::path(*argv, fs::native);
      input_type = TEXT_CHART;
    
    } else if (arg == "-max") {

      argv++, argc--;
      if (argc == 0) { cerr << "bad args\n"; exit(1); }
      maxsentence = lexical_cast<int>(*argv);

    } else {
      cerr << "error: unknow argument '" << arg << "'\n";
      exit(1);
    }

    argv++, argc--;
  }

  if ((input_type == NONE) || lingdefpath.empty() || grammarpath.empty()) {
    cerr << "bad args\n"; exit(1);
  }

  if (clean == -1) { // by default, we clean the chart only if input comes from a text fsa
    if (input_type == TEXT_FSA) {
      cleanflags |= CLEAN_BESTMATCH|CLEAN_LONGESTMATCH;
    }
  }


  if (opath.empty()) {

    string leaf = itextpath.leaf();
    string::size_type dot = leaf.rfind('.');

    if (dot != string::npos && leaf.substr(dot) == ".gz") {
      leaf.erase(dot);
      dot = leaf.rfind('.');
    }
    if (dot != string::npos && leaf.substr(dot) == ".fsa") {
      leaf.erase(dot);
    }

    leaf += ".chart";
    if (compression) { leaf += ".gz"; }

    opath = itextpath.branch_path() / leaf;
  }



  ling_def lingdef(lingdefpath);


  ugrammar gram(grammarpath, & lingdef);

  topdown_parser parser(gram);



  int sentenceno = 0, nbmatches = 0;
  boost::timer timr;


  if (input_type == TEXT_FSA) {

    itext_fsa itext(itextpath, & lingdef);
    int total = std::min(itext.size(), maxsentence);

    otext_uchart ochart(opath, total, compression);

    progress_display show_progress(total, cout);

    sentence_fsa fsa;
    uchart chart;

    while (itext >> fsa) {

//      if (surf) { cerr << "sentence #" << sentenceno << '/' << total << '\n'; }
      if (sentenceno > maxsentence) { break; }

      chart.eat(fsa);
      // cerr << "parse...\n";
      parser.parse(chart, surf);
      //  cerr << "parsing done\n";

      // chart cleanup
      if (cleanflags) { chart.cleanup(gram.start_name(), cleanflags); }

      ochart << chart;

      for (int q = 0; q < chart.size(); ++q) {
        nbmatches += chart[q].synt_by_name.count(gram.start_name());
        if (! surf) { break; }
      }
      
      ++sentenceno, ++show_progress;
    }

  } else { // TEXT_CHART


    itext_uchart itext(itextpath, & lingdef);
    int total = itext.size();

    otext_uchart ochart(opath, total, compression);

    progress_display show_progress(total, cout);
    
    uchart chart;
    while (itext >> chart) {
    
      parser.parse(chart, surf);
      if (cleanflags) { chart.cleanup(gram.start_name(), cleanflags); }

      ochart << chart;

      for (int q = 0; q < chart.size(); ++q) {
        nbmatches += chart[q].synt_by_name.count(gram.start_name());
        if (! surf) { break; }
      }

      ++sentenceno, ++show_progress;
    }
  }

  cout << "done. "
    << sentenceno << " sentences. "
    << nbmatches << " matches. "
    << timr.elapsed() << "s.\n";

  return 0;

} catch (exception & e) {

  cerr << "caught an exception : " << e.what() << endl;
  exit(1);

} catch (...) { cerr << "ouch!\n"; exit(1); }

