#include <string>
#include <stdexcept>

#include <boost/filesystem/path.hpp>
#include <boost/filesystem/fstream.hpp>

#include <outilex/stringtok.h>
#include <outilex/lg_table.h>
#include <outilex/unitex_grf.h>
#include <outilex/generic_fst.h>
#include <outilex/flatten.h>
#include <outilex/fsa-union.h>
#include <outilex/fsa-determinize.h>
#include <outilex/fsa-prune.h>
#include <outilex/fsa-minimize.h>
#include <outilex/epsilon_removal.h>


using namespace std;
using namespace boost;
namespace fs = boost::filesystem;

namespace {

bool is_epsilon(const generic_fst::transition & tr) {
  return (tr.in() == "<E>" || tr.in().empty()) && tr.out().empty();
}


char * progname;

void usage() {
  cout << "usage: " << progname << " -table <tab> -o <res> [-grf | -list] <ref>\n";
  exit(0);
}


bool lexicalize(const string & in, string & res, const lg_table & table, int e) {

  if (in.empty()) { res.clear(); return true; }

  vector<string> split;
  stringtok(in, "@", back_inserter(split));

  if (split.empty()) { return false; }


  /* if input string starts with a '@', parameters are even indiced */

  int param = (in[0] == '@') ? 0 : 1;

  for (int i = 0; i < split.size(); ++i) {

    if ((i % 2) == param) {

     string val;
     if (! table.resolve(e, split[i], val)) { return false; }

     res += val;

    } else { res += split[i]; }
  }

  return true;
}


void lexicalize(const generic_fst & reference, generic_fst & res, const lg_table & table, int i) {

  res.clear();
  res.resize(reference.size());

  for (int q = 0; q < reference.size(); ++q) {
 
    res.set_final(q, reference.final(q));

    for (generic_fst::const_trans_iterator tr = reference.trans_begin(q);
         tr != reference.trans_end(q); ++tr) {
    
      string in, out;

      if (lexicalize(tr->in(), in, table, i) && lexicalize(tr->out(), out, table, i)) {
        res.add_trans(q, in, out, tr->to());
      }
    }
  }
}


void load_grf(const fs::path & grfpath, generic_fst & res) {
  unitex_grf grf(grfpath);
  grf_to_generic_fst(grf, res);
}


void load_list(const fs::path & listpath, generic_fst & res) {

  fs::ifstream f(listpath);
  if (not f) { throw runtime_error("file not found : " + listpath.string()); }

  fs::path root = listpath.branch_path();

  string line;
  while (getline(f, line)) {

    if (line.empty() || line[0] == '#') { continue; }

    fs::path grfpath = root / (line + ".grf8");
    generic_fst fst;
    load_grf(grfpath, fst);
    fsa_union(res, fst);
  }
}


bool forget(lg_table & tab, int e) {
  string res;
  if (! tab.resolve(e, "FORGET", res)) { return false; }
  return res == "X";
}


} // namespace ""


int main(int argc, char ** argv) try {
  
  fs::path tablepath, outpath, grfpath, lstpath;
  bool do_clean = true;

  progname = *argv;
  argv++, argc--;

  if (! argc) { usage(); }

  while (argc) {

    string arg = *argv;
  
    if (arg == "-table") {
     
      argv++, argc--;
      if (! argc) { cerr << "bad args\n"; }

      tablepath = fs::path(*argv, fs::native);

    } else if (arg == "-o") {

      argv++, argc--;
      if (! argc) { cerr << "bad args\n"; }

      outpath = fs::path(*argv, fs::native);

    } else if (arg == "-grf") {

      argv++, argc--;
      if (! argc) { cerr << "bad args\n"; }

      grfpath = fs::path(*argv, fs::native);

    } else if (arg == "-lst" || arg == "-list") {

      argv++, argc--;
      if (! argc) { cerr << "bad args\n"; }

      lstpath = fs::path(*argv, fs::native);

    } else if (arg == "-dontclean") {

      do_clean = false;

    } else if (arg == "-h") {
    
      usage();

    } else {
      cerr << "unknow argument: " << arg << endl;
      exit(1);
    }

    argv++, argc--;
  }
  
  if (tablepath.empty() || outpath.empty() || (grfpath.empty() && lstpath.empty())) {
    cerr << "missing some args\n";
    exit(1);
  }

  cout << "loading table " << tablepath.string() << "...\n";
  lg_table table(tablepath);

  generic_fst reffst;

  fs::path root;
  if (! grfpath.empty()) {
    cout << "loading ref graph " << grfpath.string() << "...\n";
    load_grf(grfpath, reffst);
    root == grfpath.branch_path();
  } else {
    cout << "loading ref graph list " << lstpath.string() << "...\n";
    load_list(lstpath, reffst);
    root = lstpath.branch_path();
  }

  cout << "ref size = " << reffst.size() << endl;

  cout << "flatten ...\n";
  flatten(reffst, root);
  cout << "ref size = " << reffst.size() << endl;

  if (do_clean) {

    cout << "pruning ...\n";
    fsa_prune(reffst);
    cout << "done. (" << reffst.size() << " states).\n";

    cout << "epsilon removal\n";
    fsa_remove_epsilon(reffst, is_epsilon);
    cout << "done. (" << reffst.size() << " states).\n";

    cout << "determinization ...\n";
    fsa_determinize(reffst);
    cout << "done. (" << reffst.size() << " states).\n";

    cout << "minimisation ...\n";
    fsa_minimize(reffst);
    cout << "done. (" << reffst.size() << " states).\n";
  }


  generic_fst res;

  cout << "lexicalisation ...\n";

  for (int e = 0; e < table.size(); ++e) {

    if (forget(table, e)) { continue; }

    generic_fst specialized;
    lexicalize(reffst, specialized, table, e);
    fsa_union(res, specialized);
  }

  if (do_clean) {
    cout << "done. (" << res.size() << " states).\n";
    cout << "done.\n";

    cout << "pruning ...\n";
    fsa_prune(res);
    cout << "done. (" << res.size() << " states).\n";

    cout << "epsilon removal\n";
    fsa_remove_epsilon(res, is_epsilon);
    cout << "done. (" << res.size() << " states).\n";

    cout << "determinization ...\n";
    fsa_determinize(res);
    cout << "done. (" << res.size() << " states).\n";

    cout << "minimisation ...\n";
    fsa_minimize(res);
    cout << "done. (" << res.size() << " states).\n";
  }

  cout << "done. storing result in " << outpath.string() << "\n";

  res.write(outpath);

  cout << "ok\n";
  
} catch (exception & e) {

  cerr << "fatal error: " << e.what() << endl;
  exit(1);
}

