

#include <iostream>
#include <stdexcept>

#include <boost/progress.hpp>
#include <boost/scoped_ptr.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/filesystem/convenience.hpp>
#include <boost/filesystem/fstream.hpp>

#include <outilex/text_fsa.h>
#include <outilex/wrtn_grammar.h>

#include <outilex/information_extraction.h>
#include <outilex/concordances.h>
#include <outilex/wparsing_helper.h>

#include <outilex/usage.h>



using namespace std;
using namespace boost;
namespace fs = boost::filesystem;



const char * USAGE_DESCRIPTION =
"usage: extract-infos -l <lingdef> -gram <fst> [-ipath][-iout][-o <outputres>] <txtfsa>\n";



int main(int argc, char ** argv) try {

  fs::path txtpath, lingdefpath, grampath, infopath, concordpath;
  bool longest_match = true;

  char * text = getenv("LINGDEF");
  if (text) {
    lingdefpath = fs::path(text, fs::native);
  }


  /* defaut to surf parsing */
  int PARSER_FLAGS = wrtn_parser::SURF_PARSING;
  int CONCORD_FLAGS  = CONCORD_LONGEST_MATCH|CONCORD_SHOW_OUTPUTS;

  argv++, argc--;

  if (argc == 0) { usage(); }

  while (argc) {
    
    string arg = *argv;
    
    if (arg == "-l") {
      
      argv++, argc--;
      if (argc == 0) { bad_args(); }
      lingdefpath = fs::path(*argv, fs::native);
    
    } else if (arg == "-o") {
      
      argv++, argc--;
      if (argc == 0) { bad_args(); }
      infopath = fs::path(*argv, fs::native);
    
    } else if (arg == "-gram") {
      
      argv++, argc--;
      if (argc == 0) { bad_args(); }
      grampath = fs::path(*argv, fs::native);
 
    } else if (arg == "-ipath") {
    
      PARSER_FLAGS |= wrtn_parser::IGNORE_DIFF_PATHS;

    } else if (arg == "-iout") {
    
      PARSER_FLAGS |= wrtn_parser::IGNORE_DIFF_OUTPUTS;

    } else if (arg == "-longest-match") {
 
      longest_match = true;

    } else if (arg == "-h") {
    
      usage();
    
    } else {

      txtpath = fs::path(arg, fs::native);
    }

    argv++, argc--;
  }

  if (txtpath.empty() || lingdefpath.empty() || grampath.empty()) { bad_args(); }

  if (infopath.empty()) {
    infopath = txtpath.branch_path() / (txtpath.leaf() + ".infos");
  }
  if (concordpath.empty()) {
    concordpath = txtpath.branch_path() / "concord.idx";
  }


  ling_def lingdef(lingdefpath);

  scoped_ptr<itext_fsa> p_itext(new_itext_fsa(txtpath, & lingdef));
  itext_fsa & itext = *p_itext;

  wrtn_grammar gram(grampath, & lingdef);


  int total = itext.size();

  infos_extractor extractor(infopath, gram.start_name());

  fs::ofstream concordout(concordpath);
  wchart_concordancer concordancer(concordout, gram.start_name(), CONCORD_FLAGS);

  typedef tee_functor<infos_extractor, wchart_concordancer> dispatcher_type;
  dispatcher_type dispatcher(extractor,concordancer);

  progress_displayer<dispatcher_type> displayer(dispatcher, total);


  boost::timer tmr;

  /* this is where the black magic lies.
   */
  int nbsentence = wrtn_parse(itext, gram, displayer, PARSER_FLAGS, longest_match);


  cout << "done. " << nbsentence << "/" << total << " sentences parsed.\n"
    << extractor.nbmatch << " information extracted. "
    << concordancer.nbmatch << " matching sequences. " << tmr.elapsed() << "s.\n"
    << "infos in " << infopath.string()
    << " and concordance index in " << concordpath.string() << ".\n";

  return 0;

} catch (exception & e) {

  cerr << "fatal error :" << e.what() << endl; exit(1);

} catch (...) { cerr << "caught an OVNI?\n"; exit(1); }

