#include <iostream>
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <set>
#include <map>
#include <vector>

#include <outilex/DELAcorresp.h>
#include <outilex/stringtok.h>

using namespace std;

#if 0
map<string, string> POSs;
map<char, vector<string> > FLEXs;
map<string, vector<string> > SYNTs;

set<string> garbage;
set<string> nolose;
set<char>   nolosef;
#endif

bool output_unknow_feats = false;

DELAcorresp corresp;



bool getAline(istream & is, string & lemma, string & inflex) {

  string line;

  if (! getline(is, line)) { return false; }

  if (line[line.size() - 1] == '\r') { line.resize(line.size() - 1); }

  int pos = line.find('#');
  if (pos == line.npos) { cerr << "bad line : "<< line << "\n"; return false; }

  lemma.assign(line, 0, pos);
  inflex.assign(line, pos + 1, line.npos);
  return true;
}


// suppress ' ('PCN' -> PCN) not XML nice

static inline string chomp_ap(const string & s) {
  return s.substr(1, s.size() - 2);
}


void output_dtd(ostream & os) {
  os << "<!DOCTYPE dico SYSTEM \"dico.dtd\">\n";
}

void open_dic(ostream & os = cout) {
  os << "<dico>\n";
}

void close_dic(ostream & os) {
  os << "</dico>\n";
}


void open_entry(ostream & os, const string & lemmasynt, DELAcorresp::POS_inf & posinf) {
 
  string lemma, synt;

  os << "<entry>\n";

  int dot = lemmasynt.find('.');

  if (dot == lemmasynt.npos) { throw runtime_error("no POS?"); } 

  lemma.assign(lemmasynt, 0, dot);
  synt.assign(lemmasynt, dot + 1, lemmasynt.npos);

  vector<string> feats;
  stringtok(synt, "+", back_inserter(feats));

  // POS in feats[0], feats in feats[1..n]

  posinf = corresp[feats[0]];

  if (posinf.name.empty()) {
    cerr << "unknow POS " << feats[0] << '\n';
    posinf.name = feats[0];
  }

  os
    << "  <lemma>" << lemma << "</lemma>\n"
    << "  <pos name='" << posinf.name << "'/>\n";


  for (int i = 1; i < feats.size(); ++i) {

    DELAcorresp::synt_code_map::iterator it;

    if (corresp.garbage.find(feats[i]) != corresp.garbage.end()) {

      if (output_unknow_feats) {
        os << "  <garbage code='" << (feats[i][0] == '\'' ? chomp_ap(feats[i]) : feats[i]) << "'/>\n";
      }

    } else if ((it = posinf.SYNTs.find(feats[i])) != posinf.SYNTs.end()) {

      vector<DELAcorresp::feat_def> & v = (*it).second;

      for (int j = 0; j < v.size(); ++j) {
        const DELAcorresp::feat_def & f = v[j]; 
        if (v[j].type == DELAcorresp::LEMMA_FEAT) {
          os << "  " << "<feat name='" << f.attr << "' value='" << f.val << "'/>\n";
        }
      }

    } else { 

      if (output_unknow_feats) {
        os << "  <nolose code='" << (feats[i][0] == '\'' ? chomp_ap(feats[i]) : feats[i]) << "'/>\n";
      }
      if (corresp.nolose.find(feats[i]) == corresp.nolose.end()) {
        cerr << "unknow synt code : " << feats[i] << '\n';
        corresp.nolose.insert(feats[i]);
      }
    }
  }
}


void output_inflected(ostream & os, const DELAcorresp::POS_inf & posinf, const string & inflex) {

  int colon = inflex.find(':');

  vector<string> sfeats;
  stringtok(inflex.substr(0, colon), "+", back_inserter(sfeats));

  // feats[0] == form, feats[1,...,n] = form feats

  if (colon == inflex.npos) { // no flexional codes

    os << "  <inflected>\n"
       << "    <form>" << inflex << "</form>\n";

    for (int i = 1; i < sfeats.size(); ++i) {

      DELAcorresp::synt_code_map::const_iterator it = posinf.SYNTs.find(sfeats[i]);

      if (it != posinf.SYNTs.end()) {

        const vector<DELAcorresp::feat_def> & v = (*it).second;

        for (int j = 0; j < v.size(); ++j) {

          const DELAcorresp::feat_def & f = v[j]; 

          if (f.type == DELAcorresp::FORM_FEAT) {
            os << "     " << f.to_XML() << '\n';
          }
        }
      }
    }

    os << "  </inflected>\n";
    return;
  }
    
  string flex;

  flex.assign(inflex, colon, inflex.npos);

  vector<string> ffeats;
  stringtok(flex, ":", back_inserter(ffeats));

  for (int i = 0; i < ffeats.size(); i++) {
  
    //cerr << "processing ffeats : " << ffeats[i] << endl;

    os 
      << "  <inflected>\n"
      << "    <form>" << sfeats[0] << "</form>\n";

    for (int j = 1; j < sfeats.size(); ++j) {

      //cerr << "  sfeats = " << sfeats[j] << endl;

      DELAcorresp::synt_code_map::const_iterator it = posinf.SYNTs.find(sfeats[j]);
      

      if (it != posinf.SYNTs.end()) {

        const vector<DELAcorresp::feat_def> & v = (*it).second;

        for (int k = 0; k < v.size(); ++k) {

          const DELAcorresp::feat_def & f = v[k]; 

          if (f.type == DELAcorresp::FORM_FEAT) {
            os << "     " << f.to_XML() << '\n';
          }
        }
      }
    }

    string & flexs = ffeats[i];
    for (int j = 0; j < flexs.size(); j++) {

      DELAcorresp::flex_code_map::const_iterator it = posinf.FLEXs.find(flexs[j]);

      if (it != posinf.FLEXs.end()) {

        const vector<DELAcorresp::feat_def> & v = (*it).second;

        for (int k = 0; k < v.size(); ++k) {
          os << "    " << v[k].to_XML() << '\n';
        }

      } else {

        if (output_unknow_feats) {
          os << "    <nolose code='" <<  flexs[j] << "'/>\n";
        }
        if (corresp.nolose.find(string(1, flexs[j])) == corresp.nolose.end()) {
          cerr << "unknow inflexionnal feature : " << flexs[j]
            << " (in POS " << posinf.name << ")\n";
          corresp.nolose.insert(string(1, flexs[j]));
        }
      }
    }
    os << "  </inflected>\n";
  }
}

void close_entry(ostream & os) {
  os << "</entry>\n";
}


int main(int argc, char ** argv) {

  int nbentries = 0;

  if (argc < 2) { cerr << "usage: " << *argv << "<corresp-file>\n";  exit(1); }


  try {

    corresp.read(argv[1]);

    string lemma, inflex, oldlemma;
    DELAcorresp::POS_inf posinf;

    ostream & os = cout;

    //  output_dtd(os);
    open_dic(os);

    if (! getAline(cin, lemma, inflex)) { cerr << "bad input\n"; exit(1); }

    open_entry(os, lemma, posinf);
    output_inflected(os, posinf, inflex);
 
    oldlemma = lemma;

    int nbline = 0;
    while (getAline(cin, lemma, inflex)) {

      //cerr << lemma << " : " << inflex << endl;

      if (lemma == oldlemma) { // same entry

        output_inflected(os, posinf, inflex);

      } else { // new entry

        close_entry(os);

        nbentries++;

        open_entry(os, lemma, posinf);
        output_inflected(os, posinf, inflex);
        oldlemma = lemma;
      }
      nbline++;
      if ((nbline % 10000) == 0) {
        cerr << '.';
        if ((nbline % 100000) == 0) { cerr << ' '; }
      }
    }
    close_entry(os);
    nbentries++;
    close_dic(os);

  } catch (exception & e) {
    cerr << "caught an exception : " << e.what() << '\n';
  }

  cerr << nbentries << " lexical entries\n";
}


