#ifndef _DELAF_CORRESP_H_
#define _DELAF_CORRESP_H_

#include <iostream>
#include <string>
#include <map>
#include <set>
#include <vector>

#include <boost/filesystem/path.hpp>

#include <outilex/DELAentry.h>
#include <outilex/dic_lex_entry.h>
#include <outilex/stringtok.h>
#include <outilex/unicode.h>

class DELAcorresp {

public:


  typedef enum { LEMMA_FEAT, FORM_FEAT } feat_type;

  struct feat_def {

    feat_type type;
    std::string attr;
    std::string val;

    feat_def() : type(), attr(), val() {}
    feat_def(feat_type t, const std::string & a, const std::string & v)
      : type(t), attr(a), val(v) {}

    std::string to_XML() const { return "<feat name='" + attr + "' value='" + val + "'/>"; }
  };

  typedef std::map<std::string, std::vector<feat_def> > synt_code_map;
  typedef std::map<char, std::vector<feat_def> > flex_code_map;


  struct POS_inf {

    std::string name;

    synt_code_map SYNTs;
    flex_code_map FLEXs;

    POS_inf() : name(), SYNTs(), FLEXs() {}
    POS_inf(const std::string & n) : name(n), SYNTs(), FLEXs() {}

    void add_flex_code(char code, const feat_def & fdef) {
      FLEXs[code].push_back(fdef);
    }

    void add_synt_code(const std::string & code, const feat_def & fdef) {
      SYNTs[code].push_back(fdef);
    }
  };


  typedef std::map<std::string, POS_inf> POS_map;
  POS_map POSs;
  std::set<std::string> nolose;
  std::set<std::string> garbage;
  std::set<std::string> unknow;

  DELAcorresp() : POSs(), nolose(), garbage(), unknow() {}
  DELAcorresp(std::istream & is) { read(is); }
  DELAcorresp(const boost::filesystem::path & fname) { read(fname); }

  const POS_inf & operator[](const std::string & name) { return POSs[name]; }

  template<typename OutputIterator>
  bool load_text_label(const std::string & txt, OutputIterator out);

  
  void read(std::istream & is);
  void read(const boost::filesystem::path & fname);

  void clear() { POSs.clear(); nolose.clear(); garbage.clear(); unknow.clear(); }
};


template<typename OutputIterator>
bool DELAcorresp::load_text_label(const std::string & txt, OutputIterator out) {

  int size = txt.size();

  if (txt.empty()) {
    std::cerr << "empty text entry?\n";
    return false;
  }

  if (txt[0] == '{' && size > 1) { // dic entry

    assert(txt[size - 1] == '}');

    std::string form, lemma, POS, sfeats, ffeats;

    if (cutDELAentry(txt.substr(1, size - 2), form, lemma, POS, sfeats, ffeats) == false) {
      std::cerr << "bad text entry : " << txt << std::endl;
      return false;
    }


    POS_map::iterator it = POSs.find(POS);
  
    if (it == POSs.end()) {
      if (unknow.find(POS) == unknow.end()) {
        std::cerr << "in text entry: " << txt << ": unknown POS " << POS << std::endl;
        unknow.insert(POS);
      }
      return false;
    }
  
    POS_inf & inf = it->second;
    dic_lex_entry e;

    e.form = form, e.lemma = lemma, e.POS = inf.name;
 
    std::vector<std::string> vec;
    stringtok(sfeats, "+", back_inserter(vec));
 
    for (int i = 0; i < vec.size(); ++i) {

      synt_code_map::iterator it = inf.SYNTs.find(vec[i]);

      if (it != inf.SYNTs.end()) {
       
        std::vector<feat_def> & fs = it->second; //inf.SYNTs[vec[i]];
 
        for (int j = 0; j < fs.size(); ++j) {
          
          const std::string & attr = fs[j].attr, & val = fs[j].val;
          
          if (e.feats.find(attr) == e.feats.end()) {
            e.feats[attr] = val;
          } else {
            e.feats[attr] += "|" + val;
          }
        }
      } else {
        if (garbage.find(vec[i]) == garbage.end()) {
          std::cerr << "in text entry: " << txt
            << ": unknow feat : " << vec[i] << " (in " << POS << " POS)\n";
        }
        inf.SYNTs[vec[i]] = std::vector<feat_def>();
      }
    }

    vec.clear();
    stringtok(ffeats, ":", back_inserter(vec));

    if (vec.size()) {

      for (int i = 0; i < vec.size(); ++i) {
        
        dic_lex_entry e2 = e;
        const std::string & flex = vec[i];
 
        for (int j = 0; j < flex.size(); ++j) {

          flex_code_map::iterator it = inf.FLEXs.find(flex[j]);

          if (it != inf.FLEXs.end()) {

            const std::vector<feat_def> & fs = it->second; //inf.FLEXs[flex[j]];

            for (std::vector<feat_def>::const_iterator it = fs.begin(); it != fs.end(); ++it) {
              const std::string & attr = it->attr, & val = it->val;
              if (e2.feats.find(attr) == e2.feats.end()) {
                e2.feats[attr] = val;
              } else {
                e2.feats[attr] += "|" + val; 
              }
            }

          } else {
            std::cerr << "in text entry : " << txt
            << " : unknow inflex feat '" << flex[j] << "' (in POS " << POS << ")\n";
            inf.FLEXs[flex[j]] = std::vector<feat_def>();
          }
        }

        *out = e2;
        ++out;
      }
    } else { // no inflex feats
      *out = e;
      ++out;
    }
    return true;
  }

  dic_lex_entry e;

  unicode::utf8_iterator<std::string::const_iterator> it(txt.begin());
  unicode::code_point c = *it;

  
  if (unicode::is_digit(c)) { // number

    e.lemma = e.form = txt;
    e.POS = "number";

  } else if (unicode::is_alpha(c)) { /* unknow word */

    e.form = txt;
    e.POS = "unknow";

  } else { // assume punctuation symbol
    ++it;
    
    if (it != unicode::make_utf8_iterator(txt.end())) {
      std::cerr << "warning: " << txt << " wierd punc symbol?" << std::endl;
    }

    // translate XML special symbols
    if (txt == "&") {
      e.lemma = e.form = "%";
    } else if (txt == "<") {
      e.lemma = e.form = "[";
    } else if (txt == ">") {
      e.lemma = e.form = "]";
    } else { 
      e.lemma = e.form = txt;
    }
    e.POS = "punc";
  }

  *out = e;
  ++out;
  return true;
}
#endif

