#include <fstream>
#include <sstream>
#include <cassert>

#include <boost/progress.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/filesystem/convenience.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/function_output_iterator.hpp>


#include <outilex/DELAcorresp.h>
#include <outilex/dic_lex_entry.h>
#include <outilex/lexico_hash_table.h>
#include <outilex/xml.h>
#include <outilex/xml-names.h>
#include <outilex/xml_format_lexmask.h>
#include <outilex/lexical_mask.h>


using namespace std;
using namespace boost;
using namespace xmlnames;

namespace fs = boost::filesystem;


const int FINAL = 0x80000000;

char * progname;

DELAcorresp corresp;
//vector<vector<dic_lex_entry> > FST_symbols;
vector<vector<lexical_mask> > FST_symbols;


template<typename OutputIterator>
struct lexmask_transformer {

  OutputIterator out;
  ling_def * lingdef;

  lexmask_transformer(const OutputIterator & it, ling_def * ldef) : out(it), lingdef(ldef) {}

  void operator()(const dic_lex_entry & entry) {
    try {
      *out = lexical_mask(entry, lingdef);
      ++out;
    } catch (exception & e) {
      cerr << "error with dic entry : " << entry << ": " << e.what() << "\n";
    }
  }
};


template<typename OutputIterator>
function_output_iterator<lexmask_transformer<OutputIterator> >
lexmask_inserter(const OutputIterator & it, ling_def * ldef) {
  return make_function_output_iterator(lexmask_transformer<OutputIterator>(it, ldef));
}


void dump_symbols(const vector<lexical_mask> & v) {
  for (int j = 0; j < v.size(); ++j) {
    cerr << v[j] << ", ";
  }
  cerr << endl;
}


void dump_FST_symbols() {
  for (int i = 0; i < FST_symbols.size(); ++i) {
    dump_symbols(FST_symbols[i]);
  }
}


bool load_FST_symbols(const fs::path & fstpath, ling_def * lingdef) {

  FST_symbols.clear();

  fs::ifstream is(fstpath);

  if (! is) { throw runtime_error("load_FST_symbols : unable to open " + fstpath.string()); }

  try {
    
    is.exceptions(ios::badbit | ios::failbit);
 
    int nbsentences;
    string line;

    getline(is, line);

    istringstream(line) >> nbsentences;

    int i = 0;
    while (i < nbsentences) {
      getline(is, line);
      if (! line.empty() && line[0] == 'f') { i++; }
    }

    int size = 0;

    while (getline(is, line) && line[0] != 'f') {
      assert(line[0] == '%');
      if (line[line.size() - 1] == '\r') { line.resize(line.size() - 1); }
      FST_symbols.resize(size + 1);    

      corresp.load_text_label(line.substr(1),
                              lexmask_inserter(back_inserter(FST_symbols[size]), lingdef));
      //cerr << "symbol = " << line << endl;
      //cerr << "apres: "; dump_symbols(FST_symbols[size]);
      size++;
    }
  } catch (ios::failure & e) {
    throw runtime_error("FST_symbol: parsing error in " + fstpath.string());
  }

  return true;
}



#if 0
void dic_entry_write_xml(xmlwriter & writer, const dic_lex_entry & e) {

  writer.start_element(LEXMASK_ELEM);
  
  writer.start_element(FORM_ELEM);
  writer.write_element(FEAT_ELEM, e.form);
  writer.end_element();

  writer.start_element(LEMMA_ELEM);
  writer.write_element(FEAT_ELEM, e.lemma);
  writer.end_element();

  writer.start_element(POS_ELEM);
  writer.write_attribute(VAL_ATTR, e.POS);
  writer.end_element();

  for (map<string, string>::const_iterator it = e.feats.begin();
       it != e.feats.end(); ++it) {
    writer.start_element(FEAT_ELEM);
    writer.write_attribute(NAME_ATTR, it->first);
    writer.write_attribute(VAL_ATTR, it->second);
    writer.end_element();
  }
  writer.end_element();
}
#endif


void core_write_xml(xmlwriter & writer, const vector<int> & core, const string * txt) {

  const int * curr = & core[0];
  const int * end = curr + core.size();

  while (curr < end) {
  
    int nstates = *curr;

    writer.start_element(SENTENCE_FSA_ELEM);
    writer.write_attribute(SIZE_ATTR, lexical_cast<string>(nstates));
    ++curr;

    writer.write_element(TEXT_ELEM, *txt);
    ++txt;

    while (nstates) {
      writer.start_element(STATE_ELEM);

      if (*curr & FINAL) { 
        writer.write_attribute(FINAL_ATTR, "1");
      }

      int ntrans = *curr & ~FINAL;
      ++curr;

      while (ntrans) {
        writer.start_element(TRANS_ELEM);
        writer.write_attribute(LABEL_ATTR, lexical_cast<string>(*curr));
        ++curr;
        writer.write_attribute(TO_ATTR, lexical_cast<string>(*curr));
        ++curr;
        writer.end_element();
        --ntrans;
      }
      writer.end_element(); // STATE_ELEM
      --nstates;
    }
    writer.end_element(); // sentence fsa
  }
}


void fst2xml(const fs::path & fstpath, xmlwriter & writer, int block_size,
             int nbsentences = -1) try {

  fs::ifstream is(fstpath);

  if (! is) { throw runtime_error("fst2xml : unable to open " + fstpath.string()); }

  is.exceptions(ios::badbit | ios::failbit);

  string line, flags;

  getline(is, line);

  if (nbsentences == -1) {
    istringstream(line) >> nbsentences;
  } 

  if (nbsentences < block_size) {
    block_size = nbsentences;
  }

  timer tmr;

  writer.start_element(TEXT_FSA_ELEM);
  writer.write_attribute(SIZE_ATTR, boost::lexical_cast<string>(nbsentences));

  progress_display show_progress(nbsentences, cout);

  //string text[block_size];
  vector<string> text(block_size);

  int no = 0;
  lexico_hash_table<const lexical_mask *> lexic;
  vector<int> core;

  while (no < nbsentences) {

    //cerr << "BLOC\n";

    lexic.clear();
    core.clear();

    for (int i = 0; no < nbsentences && i < block_size; ++i, ++no, ++show_progress) { // readafsa
 
      //cerr << "sentence #" << no << endl;

      int start = core.size();
      core.push_back(0);

      getline(is, line);
      if (line.empty() || line[0] != '-') {
        throw runtime_error("fst2xml : bad header line in " + fstpath.string());
      }

      string::size_type end = line.size() - 1;
      while (end > 0 && isspace(line[end])) { end--; }
      line.resize(end + 1);

      string::size_type begin = 1;
      while (begin < line.size() && isdigit(line[begin])) { begin++; }
      while (begin < line.size() && isspace(line[begin])) { begin++; }

      text[i] = line.substr(begin);
 
      int qno = 0;
      while (1) { // readastate

        //cerr << "q = " << qno << endl;

        if (! getline(is, line) || line.empty()) {
          throw runtime_error("fst2xml: " + fstpath.string() +"bad file format");
        }
        if (line[0] == 'f') { break; }

        int currq = core.size();
        core.push_back(0);

        istringstream iss(line);
        string flags;
        iss >> flags;

        int ntrans = 0;
        int label, dest;
        while (iss >> label) {
          //cerr << "trans = " << ntrans << endl;
          iss >> dest;
          for (vector<lexical_mask>::const_iterator it = FST_symbols[label].begin();
               it != FST_symbols[label].end(); ++it) {
            int labno = lexic.add_if_not_here(lexical_cast<string>(*it), & (*it));
            core.push_back(labno);
            core.push_back(dest);
            ++ntrans;
          }
        }
        core[currq] = ntrans;
        if (flags[0] == 't') { core[currq] |= FINAL; }
        ++qno;
      }
      core[start] = qno;
    }
 
    //cerr << "end of BLOC (dump)\n";

    /* dump lexic */
    writer.start_element(LEXIC_ELEM);
    writer.write_attribute(SIZE_ATTR, lexical_cast<string>(lexic.size()));
    for (int i = 0; i < lexic.size(); ++i) {
      lexmask_write_xml(writer, *(lexic[i]));
    }
    writer.end_element();

    core_write_xml(writer, core, & text[0]);  

    //cerr << "dump ok\n";
  }

  writer.end_element();

  cout << "done. " << nbsentences << " sentence(s) translated, " << tmr.elapsed() << "s.\n";

} catch (ios::failure & e) {
  throw runtime_error("fst2xml: parsing error in file " + fstpath.string());
}
  




void usage() {
  cerr << "usage: " << progname <<
    " -c <coresp> [-gz][-b <blocsize>][-o <outfname>][-n <nbsentences>] <fsttext>" << endl;
  exit(1);
}


int main(int argc, char ** argv) try {

  fs::path fst2path, corresppath, lingdefpath, outpath;

  int nbsentences = -1;
  int compression = 0;
  int block_size = 1000;

  char * text = getenv("LINGDEF");
  if (text) { lingdefpath = fs::path(text); }

  progname = *argv;
  argv++, argc--;

  while (argc) {

    string arg = *argv;

    if (arg == "-c") {

      argv++, argc--;
      if (! argc) { usage(); }
      corresppath = fs::path(*argv);

    } else if (arg == "-o") {

      argv++, argc--;
      if (! argc) { usage(); }
      outpath = fs::path(*argv);

    } else if (arg == "-l") {

      argv++, argc--;
      if (! argc) { usage(); }
      lingdefpath = fs::path(*argv);

    } else if (arg == "-n") {

      argv++, argc--;
      if (! argc) { usage(); }
      nbsentences = lexical_cast<int>(*argv);

    } else if (arg == "-b") {

      argv++, argc--;
      if (! argc) { usage(); }
      block_size = lexical_cast<int>(*argv);

    } else if (arg == "-gz") {

      compression = 6;

    } else if (arg == "-h") {

      usage();
 
    } else { fst2path = fs::path(*argv); }

    argv++, argc--;
  }

  if (fst2path.empty() || corresppath.empty() || lingdefpath.empty()) { usage(); }

  if (outpath.empty()) {
    outpath = fs::change_extension(fst2path, ".fsa");
    if (compression) {
      outpath = outpath.branch_path() / (outpath.leaf() + ".gz");
    }
  }

  corresp.read(corresppath);

#warning "we have to allocate lingdef on the heap (and not free it) because we still need it"
#warning "on FST_symbol deallocation which happen after main"

  //ling_def lingdef(lingdefpath);
  ling_def * lingdef = new ling_def(lingdefpath);

  xmlwriter writer(outpath, compression);
  writer.set_indent(true);

  cout << "translating " << fst2path.string() << " into " << outpath.string() << endl;

  if (block_size <= 0) {
    block_size = numeric_limits<int>::max();
  }
  cout << "bloc size = " << block_size << " sentences.\n";

  cout << "loading symbols ...\n";
  load_FST_symbols(fst2path, lingdef);
  cout << "done.\n";

  writer.start_document();
  fst2xml(fst2path, writer, block_size, nbsentences);
  writer.end_document();

  return 0;

} catch (exception & e) {

  cerr << "fatal error : " << e.what() << endl;
  exit(1);
}

