#include <iostream>
#include <sstream>
#include <set>
#include <string>

#include <boost/filesystem/path.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/filesystem/convenience.hpp>
#include <boost/progress.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/scoped_ptr.hpp>

#include <outilex/text_fsa.h>
#include <outilex/lingdef.h>
#include <outilex/sentence_fsa.h>
#include <outilex/foreach.h>
#include <outilex/usage.h>



using namespace std;
using namespace boost;
namespace fs = boost::filesystem;



const char * USAGE_DESCRIPTION =
"usage: tfsa-dump-lexic [-l <lingdef>][-o <out>] <txtfsa>\n";



namespace {

void add_lexic(set<string> & dic, ling_def & ldef, const LEXIC & lex) {

  string text;

  foreach_(const lexical_mask & m, lex.lex->tab) {

    if (m.pos == ldef.punc_pos() || m.pos == ldef.number_pos()
        || m.pos == ldef.unknown_pos() || m.pos == ldef.epsilon_pos()
        || m.pos == ldef.lex_pos()) {
      continue;
    }

    text = lexical_cast<string>(m);

    if (text.size() < 3 || text[0] != '<' || text[text.size()-1] != '>') {
      throw runtime_error("bad lexmask : " + text);
    }

    dic.insert(text.substr(1, text.size() - 2));
      /*
       cerr << "not a dic entry : '" << m << "' (" << text << ")\n";
       */
  }
}


void dump_dic(const set<string> & dic, ostream & os) {

  foreach_(const string & e, dic) {
    os << e << "\n";
  }
}

} // namespace ""


int main(int argc, char ** argv) try {

  fs::path lingdefpath, textpath, opath;

  {
    char * text = getenv("LINGDEF");

    if (text) {
      lingdefpath = fs::path(text, fs::native);
    }
  }

  argv++, argc--;
  if (argc == 0) { usage(); }

  while (argc) {

    string arg = *argv;

    if (arg == "-h") {

      usage();

    } else if (arg == "-l") {

      argv++, argc--;
      if (! argc) { arg_needed("-l"); }
      lingdefpath = fs::path(*argv, fs::native);

    } else if (arg == "-o") {

      argv++, argc--;
      if (! argc) { arg_needed("-l"); }
      opath = fs::path(*argv, fs::native);

    } else {
      textpath = fs::path(*argv, fs::native);
    }
    argv++, argc--;    
  }

  if (lingdefpath.empty() || textpath.empty()) { bad_args(); }

  if (opath.empty()) {
    opath = fs::change_extension(textpath, ".dic");
  }


  ling_def lingdef(lingdefpath);

  scoped_ptr<itext_fsa> p_itext(new_itext_fsa(textpath, & lingdef));
  itext_fsa & itext = *p_itext;

  fs::ofstream os(opath);
  if (! os) { cerr << "unable to open " << opath.string() << endl; exit(1); }


  sentence_fsa fsa;
  set<string> dic;
  LEXIC lex;

  progress_display show_progress(itext.size(), cout);

  while  (itext >> fsa) {
    if (lex != fsa.lexic) {
      lex = fsa.lexic;
      add_lexic(dic, lingdef, lex);
    }
    ++show_progress;
  }

  dump_dic(dic, os);

  cout << "done. " << dic.size() << " dic entries. result in " << opath.string() << ".\n";

} catch (exception & e) {
  cerr << "fatal error: exception caught: " << e.what() << endl;
  exit(1);
}
