#include <fstream>
#include <sstream>
#include <cassert>

#include <boost/progress.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/filesystem/convenience.hpp>
#include <boost/lexical_cast.hpp>

#include <outilex/DELAcorresp.h>
#include <outilex/dic_lex_entry.h>
#include <outilex/xml.h>


using namespace std;
using namespace boost;

namespace fs = boost::filesystem;

char * progname;

DELAcorresp corresp;
vector<vector<dic_lex_entry> > FST_symbols;


bool load_FST_symbols(const fs::path & fstpath) {

  FST_symbols.clear();

  fs::ifstream is(fstpath);

  if (! is) { throw runtime_error("load_FST_symbols : unable to open " + fstpath.string()); }

  try {
    
    is.exceptions(ios::badbit | ios::failbit);
 
    int nbsentences;
    string line;

    getline(is, line);

    istringstream(line) >> nbsentences;

    int i = 0;
    while (i < nbsentences) {
      getline(is, line);
      if (! line.empty() && line[0] == 'f') { i++; }
    }

    int size = 0;

    while (getline(is, line) && line[0] != 'f') {
      assert(line[0] == '%');
      if (line[line.size() - 1] == '\r') { line.resize(line.size() - 1); }
      FST_symbols.resize(size + 1);    
      corresp.load_text_label(line.substr(1), back_inserter(FST_symbols[size]));
      size++;
    }
  } catch (ios::failure & e) {
    throw runtime_error("FST_symbol: parsing error in " + fstpath.string());
  }

  return true;
}

void dump_FST_symbols() {
  for (int i = 0; i < FST_symbols.size(); ++i) {
    vector<dic_lex_entry> & v = FST_symbols[i];
    for (int j = 0; j < v.size(); ++j) {
      cerr << v[j] << ", ";
    }
    cerr << endl;
  }
}


void fst2xml(const fs::path & fstpath, xmlwriter & writer, int nbsentences) {

  fs::ifstream is(fstpath);

  if (! is) { throw runtime_error("fst2xml : unable to open " + fstpath.string()); }

  try {

    is.exceptions(ios::badbit | ios::failbit);

    timer tmr;

    writer.start_element("text_fsa");

    string line, flags;

    getline(is, line);

    if (nbsentences == -1) {
      istringstream(line) >> nbsentences;
    } 
    progress_display show_progress(nbsentences, cout);

    writer.write_attribute("size", boost::lexical_cast<string>(nbsentences));

 
    for (int i = 0; i < nbsentences; ++i) {

      getline(is, line);
      if (line.empty() || line[0] != '-') {
        throw runtime_error("fst2xml : empty line in " + fstpath.string());
      }

      writer.start_element("sentence_fsa");

      string::size_type end = line.size() - 1;
      while (end > 0 && isspace(line[end])) { end--; }
      line.resize(end + 1);

      string::size_type begin = 1;
      while (begin < line.size() && isdigit(line[begin])) { begin++; }
      while (begin < line.size() && isspace(line[begin])) { begin++; }

    //  cerr << "line = " << line << endl;
      writer.write_element("text", line.substr(begin));

      int qno = 0;
      while (1) {

        if (! getline(is, line) || line.empty()) {
          throw runtime_error("fst2xml: " + fstpath.string() +"bad file format");
        }
        if (line[0] == 'f') { break; }

        istringstream iss(line);
        iss >> flags;

        // dump state line

        writer.start_element("state");
        writer.write_attribute("no", boost::lexical_cast<string>(qno));
        writer.write_attribute("final", (flags[0] == 't') ? "1" : "0");

        int label, dest;
        while (iss >> label) {
          iss >> dest;
          for (vector<dic_lex_entry>::const_iterator it = FST_symbols[label].begin();
               it != FST_symbols[label].end(); ++it) {
            writer.start_element("transition");
            writer.write_attribute("to", boost::lexical_cast<string>(dest));
            it->write_XML(writer);
            writer.end_element();
          }
        }
        writer.end_element();
        qno++;
      }
      writer.end_element();

      ++show_progress;
    }

    writer.end_element();

    cerr << " done. " << nbsentences << " translated. " << tmr.elapsed() << "s.\n";

  } catch (ios::failure & e) {
    throw runtime_error("fst2xml: parsing error in file " + fstpath.string());
  }
  
}


void usage() {
  cerr << "usage: " << progname << " -c <coresp> [ -gz ] [ -o <outfname> ] [ -n <nbsentences>] <fsttext>"
    << endl;
  exit(1);
}


int main(int argc, char ** argv) try {

  fs::path fst2path, corresppath, outpath;

  int nbsentences = -1;
  int compression = 0;

  progname = *argv;
  argv++, argc--;

  while (argc) {

    string arg = *argv;

    if (arg == "-c") {

      argv++, argc--;
      if (! argc) { usage(); }
      //correspname = *argv;
      corresppath = fs::path(*argv, fs::native);

    } else if (arg == "-o") {

      argv++, argc--;
      if (! argc) { usage(); }
      //      outfname = *argv;
      outpath = fs::path(*argv, fs::native);

    } else if (arg == "-n") {

      argv++, argc--;
      if (! argc) { usage(); }
      istringstream(*argv) >> nbsentences;

    } else if (arg == "-gz") {

      compression = 6;

    } else if (arg == "-h") {

      usage();
 
    } else { fst2path = fs::path(*argv, fs::native); }

    argv++, argc--;
  }

  if (fst2path.empty() || corresppath.empty()) { usage(); }

  if (outpath.empty()) {
    outpath = fs::change_extension(fst2path, ".fsa");
    if (compression) {
      outpath = outpath.branch_path() / (outpath.leaf() + ".gz");
    }
  }

  corresp.read(corresppath);
  xmlwriter writer(outpath, compression);
  writer.set_indent(true);

  cout << "translating " << fst2path.string() << " into " << outpath.string() << endl;
  load_FST_symbols(fst2path);

  writer.start_document();
  fst2xml(fst2path, writer, nbsentences);
  writer.end_document();

  return 0;

} catch (exception & e) {

  cerr << "fatal error : " << e.what() << endl;
  exit(1);
}
