#include <iostream>

#include <boost/filesystem/path.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/progress.hpp>
#include <boost/timer.hpp>
#include <boost/scoped_ptr.hpp>

#include <outilex/text_fsa.h>
#include <outilex/sentence_fsa.h>


using namespace std;
using namespace boost;
namespace fs = boost::filesystem;

char * progname;


void usage(ostream & os = cerr, int status = 1) {
  os << "usage: " << progname << " [-addpos][-gz][-l <lingdef>][-f format][-o <output>] <txtfsa>\n";
  exit(status);
}


int main(int argc, char ** argv) try {

  fs::path lingdefpath, ipath, opath;
  bool addpos = false;
  int compression = 0;
  string format = "bin";

  char * text = getenv("LINGDEF");

  if (text) {
    lingdefpath = fs::path(text, fs::native);
  }

  progname = *argv;
  argv++, argc--;

  while (argc) {

    string arg = *argv;

    if (arg == "-h") {

      usage(cout, 0);

    } else if (arg == "-l") {

      argv++, argc--;
      if (! argc) { usage(); }
      lingdefpath = fs::path(*argv, fs::native);
    
    } else if (arg == "-o") {
    
      argv++, argc--;
      if (! argc) { usage(); }
      opath = fs::path(*argv, fs::native);
 
    } else if (arg == "-addpos") {

      addpos = true;

    } else if (arg == "-gz") {

      compression = 6;

    } else if (arg == "-f") {

      argv++, argc--;
      if (! argc) { usage(); }
      format = *argv;

    } else {
      ipath = fs::path(*argv, fs::native);
    }
    argv++, argc--;    
  }

  if (lingdefpath.empty() || ipath.empty()) { usage(); }

  if (opath.empty()) {
    string fname = ipath.leaf();
    string::size_type dot = fname.rfind('.');
    
    if (dot != string::npos && fname.substr(dot) == ".gz") {
      fname.erase(dot);
    }

    fname += ".clean";
  
    // if (compression) { fname += ".gz"; }

    opath = ipath.branch_path() / fname;
  }


  ling_def * lingdef = new ling_def(lingdefpath);

  //itext_fsa itext(ipath, lingdef);
  scoped_ptr<itext_fsa> p_itext(new_itext_fsa(ipath, lingdef));
  itext_fsa & itext = *p_itext;

  int total = itext.size();

  scoped_ptr<otext_fsa> p_otext(new_otext_fsa(opath, format, total, compression));
  otext_fsa & otext = *p_otext;


  cout << "cleaning " << ipath.leaf() << ", " << total << " sentences expected...\n";

  int sentenceno = 0, pos = 0;
  sentence_fsa fsa;
  timer tmr;
  progress_display show_progress(total, cout);

  while (itext >> fsa) {

    //cerr << "sentence #" << sentenceno << endl;
    //cerr << "text = " << fsa.text << endl;

    if (fsa.empty()) {

      cerr << "\nerror sentence #" << sentenceno << " is empty\n";
      otext << fsa;

      show_progress.restart(total);
      show_progress += sentenceno;

      ++show_progress; ++sentenceno;
      continue;
    }

    //fsa_dump_dot(fsa, "first.dot");

    //cerr << "determinize ...\n";
    fsa.determinize();

    //fsa_dump_dot(fsa, "deter.dot");

    //cerr << "prune ...\n";
    fsa.prune();

    //fsa_dump_dot(fsa, "prune.dot");

    if (fsa.empty()) {

      cerr << "error: sentence #" << sentenceno << " is empty (after prune)\n";
      otext << fsa;

      show_progress.restart(total);
      show_progress += sentenceno;

      ++show_progress; ++sentenceno;
      continue;
    }

    //cerr << "topo sort...\n";
    fsa.topological_sort();

    // set pos tag in states
    if (addpos) {
      for (int q = 0; q < fsa.size(); ++q) {
        fsa.set_pos(q, pos++);
      }
    } else { pos += fsa.size(); }

    //cerr << "done.\n";
    otext << fsa;

    ++show_progress; ++sentenceno;
  }

  cout << "done, " << sentenceno << "/" << total << " sentences proceed (" << pos << " positions)\n"
    << tmr.elapsed() << "s.\n";

} catch (exception & e) {
  cerr << "error: exception caught: " << e.what() << endl;
  exit(1);
}

