#include <iostream>
#include <fstream>
#include <stdexcept>
#include <sstream>
#include <vector>

#include <boost/progress.hpp>
#include <boost/scoped_ptr.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/filesystem/fstream.hpp>

#include <outilex/text_fsa.h>
#include <outilex/bin_text_fsa.h>
#include <outilex/sentence_fsa.h>
#include <outilex/wrtn_grammar.h>
#include <outilex/wrtn_chart.h>
#include <outilex/wrtn_parser.h>

#include <outilex/escape.h>
#include <outilex/stringtok.h>


namespace {

using namespace std;
using namespace boost;
namespace fs = boost::filesystem;

char * progname;

bool verbose = false;

void usage() {
  cerr << "usage: " << progname
    << " -l <lingdef> -gram <fst> [-v][-lgmatch][-ipath][-iout][-o <outputres>] <txtfsa>\n";
  exit(1);
}



void output_match(const wrtn_chart & chart, const wrtn_match & match, ostream & os) {

  const sentence_fsa & text = chart.fsa;

  int size = match.path.size();
  int i;
  for (i = 0; i < size; ++i) {
    
    const syntref & ref = match.path[i];
    
    if (! match.out[i].empty()) { os << "'" << match.out[i] << "'"; }
 
    if (ref.transno < 0) {
      output_match(chart, chart[ref.qno][- ref.transno - 1], os);
    } else {
      os << text.get_trans(ref.qno, ref.transno).in().form << ' ';
    }
  }

  if (! match.out[i].empty()) { os << "'" << match.out[i] << "'"; }
}



void add_feats_from_output(ostringstream & code, string & lemma, sentence_fsa & text,
                           const syntref & ref, const string & label) {

  //cerr << "add_feats: code = " << code.str() << " label = '" << label << "'\n";

  vector<string> codes;
  stringtok(label, "+,[]", back_inserter(codes));

  for (int n = 0; n < codes.size(); ++n) {

    if (codes[n].empty()) { continue; }

    if (codes[n][0] == '^') { // retrieve feat from the lexical entry

      if (ref.transno < 0) {
        cerr << "error: label " << codes[n] << " under a subcall\n";
        continue;
      }

      const lexical_mask & m = text.get_trans(ref.qno, ref.transno).in();
      const string attrname = codes[n].substr(1);

      if (attrname == "lemma") {

        lemma = m.lemma;

      } else {

        const feat_set & fs = m[attrname];
        code << "+" << attrname << "=" << fs.get_val_text();
      }

    } else {
      code << "+" << codes[n];
    }
  }
  //cerr << "out of add_feats: code = " << code.str() << " label = '" << label << "'\n\n";
}


int process_match(wrtn_chart & chart, const wrtn_match & match,
                  int q0, string & form, string & lemma, ostringstream & code,
                  mutable_lexic & lexic, ling_def * ldef) {

  sentence_fsa & text = chart.fsa;

  int size = match.path.size();
  int nbmatch = 0;

  int i = 0; // we need it outside loop
  for (i = 0; i < size; ++i) {

    const syntref & ref = match.path[i];
    string out = match.out[i];

    //cerr << "out = " << out << "\n\n";

    string::size_type eot = out.find(']'); // eot stands for end of tag

    /* end of a tag */
    if (q0 >= 0 
        && eot != string::npos) {

      form.resize(form.size() - 1); // strip ending ' '

      add_feats_from_output(code, lemma, text, ref, out.substr(0, eot));

      if (lemma.empty()) { lemma = form; }

      string txt = "{" + form + "," + lemma + "." + code.str().substr(1) + "}";
      
      if (verbose) { cerr << "find something : " << txt << "\n"; }
 
      try {

        lexical_mask m(txt, ldef);
        if (! m) { throw runtime_error("invalid entry"); }

        if (verbose) { cerr << "lexmask = " << m << "\n"; }

        int lbl = lexic.add(m);
        text.A.add_trans(q0, lbl, match.path[i].qno); // add transition

        ++nbmatch;

      } catch (exception & e) {
        cerr << "error with " << txt << ": " << e.what() << endl;
      }

      q0 = -1;
      out = out.substr(eot + 1);
    }

    string::size_type bot = out.find('[');

    /* begin of a tag */
    if (q0 == -1
        && bot != string::npos) {

      form.clear(); code.str(""); lemma.clear();
      q0 = match.path[i].qno;
      out = out.substr(bot);
    }

 

    /* travel */

    if (ref.transno < 0) { // sub-pattern

      nbmatch += process_match(chart, chart[ref.qno][-ref.transno - 1],
                               (q0 == -1) ? -1 : -2, form, lemma, code, lexic, ldef);

      // -2 c'est pour forcer que les sortie '[' et ']' soient dans le meme graphe
 
    } else { // lexical trans

      if (q0 != -1) { // -2 ou >= 0
        form += escape(text.get_trans(ref.qno, ref.transno).in().form, ",.") + " ";
      }
    }

    if (q0 != -1 && ! out.empty()) {
      add_feats_from_output(code, lemma, text, ref, out);
    }
  }


  /* end of match : look for last output
   */

  const string & out = match.out[i];
  if (q0 >= 0
      && ! out.empty() && out[out.size() - 1] == ']') { // end of tag

    form.resize(form.size() - 1); // strip ending ' '

    if (out.size() > 1) {
      add_feats_from_output(code, lemma, text,
                            syntref(0, -1),     // fake syntref
                            out);
    }
 
    if (lemma.empty()) { lemma = form; }

    string txt = "{" + form + "," + lemma + "." + code.str().substr(1) + "}";
  
    if (verbose) { cerr << "find something (end) : " << txt << "\n"; }

    try {

      lexical_mask m(txt, ldef);
      if (! m) { throw runtime_error("invalid entry"); }

      if (verbose) { cerr << "lexmask = " << m << endl; }

      int lbl = lexic.add(m);
      text.A.add_trans(q0, lbl, match.to); // add transition

      ++nbmatch;

    } catch (exception & e) {
      cerr << "(at end of match) error with " << txt << ": " << e.what() << endl;
    }
    q0 = -1;
  }

  if (q0 == -2 && ! out.empty()) {
    add_feats_from_output(code, lemma, text, syntref(0,-1), out);
  }

  if (q0 >= 0) {
    cerr << "error: tag '[' and ']' mismatch :";
    output_match(chart, match, cerr);
    cerr << '\n';
  }

  return nbmatch;
}


int process_match(wrtn_chart & chart, const wrtn_match & match,
                  mutable_lexic & lexic, ling_def * ldef) {

  string form, lemma;
  ostringstream code;
  return process_match(chart, match, -1, form, lemma, code, lexic, ldef);
}



int decore_fsa(wrtn_chart & chart, const string & axiom,
               mutable_lexic & lexic, ling_def * ldef) {

  int nbmatch = 0;
  int size = chart.size();
  for (int q = 0; q < size; ++q) {

    for (wrtn_chart::const_match_iterator it = chart[q].begin(), end = chart[q].end();
         it != end; ++it) {

      if (it->name == axiom) {    
        nbmatch += process_match(chart, *it, lexic, ldef);
      }
    }
  }

  return nbmatch;
}


}; // namespace ""



int main(int argc, char ** argv) try {

  fs::path txtpath, lingdefpath, wrtnpath, opath;
  int PARSER_FLAGS = wrtn_parser::SURF_PARSING; //wrtn_parser::IGNORE_DIFF_PATHS;
  bool longest_match = false;
  
  char * text = getenv("LINGDEF");
  if (text) {
    lingdefpath = fs::path(text, fs::native);
  }


  progname = *argv;

  argv++, argc--;

  while (argc) {
    
    string arg = *argv;
    
    if (arg == "-l") {
      
      argv++, argc--;
      if (argc == 0) { usage(); }
      lingdefpath = fs::path(*argv, fs::native);
    
    } else if (arg == "-o") {
      
      argv++, argc--;
      if (argc == 0) { usage(); }
      opath = fs::path(*argv, fs::native);
    
    } else if ((arg == "-gram") || (arg == "-rtn")) {
      
      argv++, argc--;
      if (argc == 0) { usage(); }
      wrtnpath = fs::path(*argv, fs::native);
    
    } else if (arg == "-lgmatch") {
    
      longest_match = true;

    } else if (arg == "-ipath") {
    
      PARSER_FLAGS |= wrtn_parser::IGNORE_DIFF_PATHS;

    } else if (arg == "-iout") {
    
      PARSER_FLAGS |= wrtn_parser::IGNORE_DIFF_OUTPUTS;

    } else if (arg == "-h") {
    
      usage();
    
    } else if (arg == "-v") {
    
      verbose = true;
    
    } else {

      txtpath = fs::path(arg, fs::native);
    }

    argv++, argc--;
  }

  if (txtpath.empty() || lingdefpath.empty() || wrtnpath.empty()) { usage(); }

  if (opath.empty()) {

    string resname = txtpath.leaf() + ".out";
    opath = txtpath.branch_path() / resname;
  }
  
  
  ling_def lingdef(lingdefpath);

  scoped_ptr<itext_fsa> p_itext(new_itext_fsa(txtpath, & lingdef));
  itext_fsa & itext = *p_itext;

  int total = itext.size();

  bin_otext_fsa otext(opath, total);

  wrtn_grammar wrtn(wrtnpath, & lingdef);
  wrtn_parser parser(wrtn, PARSER_FLAGS);

  string axiom = wrtn.start_name();

  sentence_fsa fsa;

  int sentenceno = 0, nbmatches = 0;
  progress_display show_progress(total, cout);
  boost::timer tmr;

  while (itext >> fsa) {

    //cerr << "sentence #" << sentenceno <<  "/" << total << ": " << fsa.text <<  endl;

    if (fsa.empty()) {

      cerr << "warning: sentence #" << sentenceno << " is empty\n";

    } else {

      /* create a new lexic for the sentence */
      mutable_lexic lexic;
      fsa.strip_lexic(lexic);

      wrtn_chart chart;
      chart.eat(fsa);
      parser.parse(chart);

      chart.cleanup(axiom, longest_match);

      nbmatches += decore_fsa(chart, axiom, lexic, & lingdef);

      fsa.swap(chart.fsa);
      fsa.set_lexic(lexic);
      fsa.determinize();
      fsa.topological_sort();
    }

    otext << fsa;

    ++sentenceno;
    ++show_progress;
  }

  cout << "done. " << sentenceno << " sentences.\n"
     << nbmatches << " new transitions. " << tmr.elapsed() << "s.\n";

  cout << "result text in " << opath.string() << endl;

  return 0;

} catch (exception & e) {

  cerr << "fatal error :" << e.what() << endl; exit(1);

} catch (...) { cerr << "caught an OVNI?\n"; exit(1); }

