#include <iostream>
#include <fstream>
#include <stdexcept>
#include <sstream>
#include <vector>

#include <boost/progress.hpp>
#include <boost/scoped_ptr.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/filesystem/convenience.hpp>
#include <boost/filesystem/fstream.hpp>

#include <outilex/text_fsa.h>
#include <outilex/sentence_fsa.h>
#include <outilex/wrtn_grammar.h>

#include <outilex/wparsing_helper.h>
#include <outilex/concordances.h>

#include <outilex/usage.h>


using namespace std;
using namespace boost;
namespace fs = boost::filesystem;



const char * USAGE_DESCRIPTION = 
"usage: concordancer  -l <lingdef> -gram <gram> [-v][-longest-match][-tags][-tree][-w][-m][-ipath][-iout][-o <outputres>] <txtfsa>\n"
"\n"
"with options :\n"
" <txtfsa>       : input text fsa\n"
" -gram <gram>   : wrtn grammar to apply\n"
" -o <concord>   : name of the resulting concordance index file (default to concord.idx)\n"
" -longest-match : keep only longest matching sequences\n"
" -tags          : display morpho-syntactic tags\n"
" -tree          : display syntactic tree\n"
" -w             : display weights of matching sequences\n"
" -m             : merge grammar's outputs into concordances\n"
" -all           : shortcut for : -tags -tree -w -m\n"
" -timeout <s>   : specify a maximum amount of time (in seconds) to spend to parse a sentence\n"
" -ipath         : keep only one concordance for the same text segment\n"
"                  (can be a lot faster for ambigous grammars)\n"
" -v             : verbose mode (for debugging)\n"
"\n"
"apply a wrtn grammar to a text fsa and saves the matching sequences index into\n"
"a file (default to concord.idx), which can be proceed by make-concord-html.\n\n";


int main(int argc, char ** argv) try {

  fs::path txtpath, lingdefpath, grampath, opath;
  bool longest_match = false;
  bool verbose = false;
  double timeout = 0.;

  char * text = getenv("LINGDEF");
  if (text) {
    lingdefpath = fs::path(text, fs::native);
  }


  /* defaut to surf parsing */
  int PARSER_FLAGS  = wrtn_parser::SURF_PARSING;
  int CONCORD_FLAGS = 0;

  argv++, argc--;

  if (argc == 0) { usage(); }

  while (argc) {
    
    string arg = *argv;
    
    if (arg == "-l") {
      
      argv++, argc--;
      if (argc == 0) { bad_args(); }
      lingdefpath = fs::path(*argv, fs::native);
    
    } else if (arg == "-o") {
      
      argv++, argc--;
      if (argc == 0) { bad_args(); }
      opath = fs::path(*argv, fs::native);
    
    } else if (arg == "-gram") {
 
      argv++, argc--;
      if (argc == 0) { bad_args(); }
      grampath = fs::path(*argv, fs::native);
 
    } else if (arg == "-ipath") {
    
      PARSER_FLAGS |= wrtn_parser::IGNORE_DIFF_PATHS;

    } else if (arg == "-iout") {
    
      PARSER_FLAGS |= wrtn_parser::IGNORE_DIFF_OUTPUTS;

    } else if (arg == "-timeout") {

      argv++, argc--;
      if (argc == 0) { bad_args(); }
      timeout = lexical_cast<double>(*argv);

    } else if (arg == "-dontsurf") {
    
      PARSER_FLAGS &= ~(wrtn_parser::SURF_PARSING);

    } else if (arg == "-longest-match") {
 
      CONCORD_FLAGS |= CONCORD_LONGEST_MATCH;
      longest_match = true;

    } else if (arg == "-tags") {
 
      CONCORD_FLAGS |= CONCORD_SHOW_TAGS;

    } else if (arg == "-tree") {
 
      CONCORD_FLAGS |= CONCORD_SHOW_TREE;

    } else if (arg == "-w") {
 
      CONCORD_FLAGS |= CONCORD_SHOW_WEIGHTS;

    } else if (arg == "-m") {
 
      CONCORD_FLAGS |= CONCORD_SHOW_OUTPUTS;
    
    } else if (arg == "-all") {

      CONCORD_FLAGS |= CONCORD_SHOW_OUTPUTS
        |CONCORD_SHOW_WEIGHTS
        |CONCORD_SHOW_TREE
        |CONCORD_SHOW_TAGS;

    } else if (arg == "-v") {
 
      verbose = true;
    
    } else if (arg == "-h") {
    
      usage();
    
    } else {

      txtpath = fs::path(arg, fs::native);
    }

    argv++, argc--;
  }

  if (txtpath.empty() || lingdefpath.empty() || grampath.empty()) { bad_args(); }

  if (opath.empty()) { opath = txtpath.branch_path() / "concord.idx"; }


  ling_def lingdef(lingdefpath);

  scoped_ptr<itext_fsa> p_itext(new_itext_fsa(txtpath, & lingdef));
  itext_fsa & itext = *p_itext;

  int total = itext.size();

  wrtn_grammar gram(grampath, & lingdef);


  fs::ofstream out(opath);

  wchart_concordancer concordancer(out, gram.start_name(), CONCORD_FLAGS);
  progress_displayer<wchart_concordancer> displayer(concordancer, total);


  boost::timer tmr;

  wrtn_parse(itext, gram, displayer, PARSER_FLAGS, longest_match, timeout, verbose);

  cout << "done. " << displayer.count() << "/" << total << " sentences parsed. "
    << concordancer.nbmatch << " matching sequences. " << tmr.elapsed() << "s.\n"
    << "resulting concordances index in " << opath.string() << "\n";

  return 0;

} catch (exception & e) {

  cerr << "fatal error :" << e.what() << endl; exit(1);

} catch (...) { cerr << "caught an OVNI?\n"; exit(1); }

