#include <iostream>
#include <string>
#include <set>
#include <vector>

#include <boost/filesystem/path.hpp>
#include <boost/filesystem/fstream.hpp>

#include <outilex/DELAentry.h>
#include <outilex/DELAcorresp.h>

#include <outilex/stringtok.h>
#include <outilex/usage.h>

using namespace std;
using namespace boost;
namespace fs = boost::filesystem;

const char * USAGE_DESCRIPTION =
"usage: delaf-light -c <delafcorresp> <delaf>\n"
"\n"
"\"lighten\" a delaf by suppressing syntactic and POS codes\n"
"not specified in the specified delafcorresp file.\n";

namespace {

set<string> unknown_codes;

int delaf_light(istream & in, ostream & out, DELAcorresp & corresp) {

  int nbentries = 0;

  string form, lemma, cat, synt, flex;

  while (getDELAline(in, form, lemma, cat, synt, flex)) {

    const DELAcorresp::POS_inf & inf = corresp[cat];

    if (inf.name.empty()) { // unknown POS
      if (unknown_codes.find(cat) == unknown_codes.end()) {
        unknown_codes.insert(cat);
        cerr << "unknown POS : " << cat << "\n";
      }
      continue;
    }


    /* syntactic features */

    out << form << "," << lemma << "." << cat;

    vector<string> feats;
    stringtok(synt, "+", back_inserter(feats));

    for (int i = 0; i < feats.size(); i++) {
      if (inf.SYNTs.find(feats[i]) != inf.SYNTs.end()) {
        out << "+" << feats[i];
      } else {
        string unkn = cat + "+" + feats[i];
        if (unknown_codes.find(unkn) == unknown_codes.end()) {
          unknown_codes.insert(unkn);
          cerr << "unknown code '" << feats[i] << "' in " << cat << " POS\n";
        }
      }
    }


    /* inflexional */

    out << flex << "\n";

    ++nbentries;
    if ((nbentries % 10000) == 0) {
      if ((nbentries % 100000) == 0) { cout << ' ' << flush; }
      cout << '.' << flush;
    }
  }

  return nbentries;
}


} // namespace ""

int main(int argc, char ** argv) try {
  
  fs::path ipath, opath, corresppath;

  argv++, argc--;
  if (argc == 0) { usage(); }

  
  while (argc) {
    string arg = *argv;

    if (arg[0] == '-') {
    
      if (arg == "-h") {
        usage();
      } else if (arg == "-c") {
        argv++, argc--;
        if (argc == 0) { arg_needed(arg); }
        corresppath = fs::path(*argv);
      } else {
        unknown_arg(arg);
      }
    } else {
      ipath = fs::path(arg);
    }
    argv++, argc--;
  }

  if (corresppath.empty() || ipath.empty()) { bad_args("argument missing"); }

  if (opath.empty()) {
    opath = ipath.branch_path() / (ipath.leaf() + ".light");
  }


  DELAcorresp corresp(corresppath);

  fs::ifstream in(ipath);
  fs::ofstream out(opath);

  int nbentries = delaf_light(in, out, corresp);

  cout << "\ndone. " << nbentries << " entries. " << unknown_codes.size() << " codes suprressed.\n"
    << "result in " << opath.string() << ".\n";

} catch (exception & e) {
  cerr << "fatal error: " << e.what() << "\n";
  exit(1);
}

