#include <iostream>

#include <boost/lexical_cast.hpp>

#include <outilex/dico.h>
#include <outilex/serialize.h>

using namespace std;
using namespace boost;

void dico::read_state(istream & is, int no) {

  //cerr << "read_state(" << no << ")\n";

  FSA::state & q = fsa.states[no];

  unsigned short n;

  read_ushort(is, n);

  if (n & 0x8000) { // q is final
    q.final = true;
    n  &= ~(0x8000);
  } else { q.final = false; }

  unsigned short uchar;
  int to;
  for (int i = 0; i < n; i++) { // read i-eme transition
    read_ushort(is, uchar);
    read_int3(is, to);
    q.trans[uchar] = to;
  }
}


void dico::read_values(istream & is) {

  int size;
  read_int(is, size);

  fsa.values.resize(size); 

  int count, idx, feat;

  for (int i = 0; i < size; ++i) {

    read_int2(is, count);

    for (int j = 0; j < count; ++j) {
      read_int3(is, idx);
      read_int3(is, feat);
      fsa.values[i].insert(dic_entry_value(idx, feat));
    }
  }
}


void dico::read_feats(istream & is) {

  //cerr << "read_feats\n";

  string line;
  getline(is, line);
  assert(line.compare(0, 2, "%%") == 0 && 1);


  getline(is, line);
  int size = lexical_cast<int>(line);
  entries_feats.resize(size);

  for (int i = 0; i < size; i++) {
    getline(is, entries_feats[i]);
  }

  getline(is, line);
  assert(line.compare(0, 2, "%%") == 0 && 2);

  getline(is, line);
  size = lexical_cast<int>(line);
  inflex_feats.resize(size);
  for (int i = 0; i < size; ++i) {
    getline(is, inflex_feats[i]);
  }

  getline(is, line);
  assert(line.compare(0, 2, "%%") == 0 && 3);
}

void dico::read_fsa(istream & is) {

  //cerr << "read_fsa\n";

  int size;
  read_int(is, size); // read fsa size

  //cerr << "fsa size=" << size << "\n";

  fsa.states.resize(size);


  for (int i = 0; i < size; ++i) {
    read_state(is, i);
  }

  fsa.compute_card();
  read_values(is);
}


void dico::read(const std::string & fname) {

  //cerr << "read(" << fname << ")\n";
  
  ifstream bin(fname.c_str());

  if (! bin) { cerr << "unable to open " << fname << '\n'; exit(1); }

  int magic;
  read_int(bin, magic);
  if (magic != DICOMAGIC) {
    throw runtime_error("dico::read : " + fname + ": bad file format\n");
  }

  read_fsa(bin);
  read_feats(bin);
}



void dico::make_lex_entry(dic_lex_entry & e, const vector<UChar> & form,
                          const dic_entry_value & v) const {


  const string & sfeats = entries_feats[v.idx];

  const string & ifeats = inflex_feats[v.feats];


  /* looking for '.' */

  int dot = ifeats.find('.');

  if (dot == string::npos) { // '.' not found, lemma == form
    
    e.lemma = e.form;
    e.read_pos_n_feats(sfeats + ifeats);

    return;
  }

  int lemmcode = dot + 1;

  if (isdigit(ifeats[lemmcode])) { // uncompress lemma
  
    UErrorCode uerror = U_ZERO_ERROR;

    int idx, no = 0;
    for (idx = lemmcode; isdigit(ifeats[idx]); ++idx) {
      no = (no * 10) + ifeats[idx] - '0';
    }

    vector<UChar> suff(ifeats.size());
    int lens = unicode::u_str_from_utf8(suff, ifeats.substr(idx), uerror);
    suff.resize(lens);
    unicode::normalize(suff, UNORM_NFD, uerror);

    vector<UChar> lemma(form.begin(), form.end() - no);
    copy(suff.begin(), suff.end(), back_inserter(lemma));

    unicode::normalize(lemma, UNORM_NFC, uerror);
    unicode::utf8_from_u_str(e.lemma, lemma, uerror);

    unicode_check(uerror);

  } else { // copy lemma
    e.lemma.assign(ifeats, lemmcode, ifeats.size() - lemmcode);
    unicode::normalize(e.lemma, UNORM_NFC);
  }

  e.read_pos_n_feats(sfeats + ifeats.substr(0, dot));
}



struct dump_entries {

  ostream & os;
  dico & dic;

  dump_entries(ostream & _os, dico & _dic) : os(_os), dic(_dic) {}

  void operator()(const vector<UChar> & _form, const set<dic_entry_value> & vals) {
    
    UErrorCode uerror = U_ZERO_ERROR;
    string form = unicode::utf8_from_u_str(_form, uerror);

    os << form << ":\n";
    for (set<dic_entry_value>::const_iterator it = vals.begin(); it != vals.end(); ++it) {
      os << " -> " << dic.entries_feats[(*it).idx] << ' ' << dic.inflex_feats[(*it).feats] << '\n';
    }
  }
};

void dico::dump(ostream & os) {
  fsa.apply_lexic(dump_entries(os, *this));
}



#if 0
class exact_match {
public:
  static bool match(UChar indic, Uchar inform) { return indic == inform; }
  static bool ignore_in_dic(UChar indic, UChar inform) { return false; }
  static bool ignore_in_form(UChar inform) { return false; }
};



#define EXACT_MATCH  0
#define IGNORE_CASE  1
#define IGNORE_MAJ   2
#define IGNORE_MARKS 4


typedef bool match_f(wchar_t indic, wchar_t inform);

static bool match_exact(wchar_t indic, wchar_t inform) { return indic == inform; }

static bool match_ignore_maj(wchar_t indic, wchar_t inform) {
  return (indic == inform) || (indic == unicode::tolower(inform));
}

static bool match_ignore_case(wchar_t indic, wchar_t inform) {
  return (unicode::tolower(indic) == unicode::tolower(inform));
}


inline bool match_all(wchar_t indic, wchar_t inform) { return false; }
inline bool ignore_marks(wchar_t indic, wchar_t inform) { return unicode::is_mark(indic); }

class unicode_match {

  int matchtype;

public:

  unicode_match(int lookup_type) {
  }
  static bool match(UChar indic, UChar inform) {
  }
};


template<typename Dic, typename Iterator, typename MatchTune>
int lookup(const Dic & dic, int qno, Iterator pos, Iterator end, int idx, vector<UChar> & form,
           set<lookup_result> & res, Matchtune & tune, int cost, int maxcost) {

  if (cost > maxcost) { return 0; }

  int nb = 0;
  const state & q = states[qno];
  const_trans_iterator it;

  if (pos == end) {

    if (q.final) {
      res.insert(lookup_result(form, idx, cost));
      nb++, idx++;
    }

    for (it = q.trans.begin(); it != q.trans.end(); ++it) {
      form.push_back((*it).first);
      if (tune.ignore_in_dic((*it).first, 0)) {
        nb += lookup((*it).second, pos, end, idx, form, res, cost, maxcost);
      } else { // deletion
        nb += lookup((*it).second, pos, end, idx, form, res, cost + 1, maxcost);
      }
      form.pop_back();
    }

    return nb;
  }

  if (q.final) { idx++; }

  for (it = q.trans.begin(); it != q.trans.end(); ++it) {
    
    form.push_back((*it).first);
    if (tune.match_char((*it).first, *pos)) {
      nb += lookup((*it).second, pos + 1, end, idx, form, res, cost, maxcost);
    } else { // substitution
      nb += lookup((*it).second, pos + 1, end, idx, form, res, cost + 1, maxcost);
    }


    if (tune.ignore_in_dic((*it).first, *pos)) {
      nb += lookup((*it).second, pos, end, idx, form, res, cost, maxcost);
    } else { // character deletion
      nb += lookup((*it).second, pos, end, idx, form, res, cost + 1, maxcost);
    }

    form.pop_back();
    idx += states[(*it).second].card;
  }

  if (tune.ignore_in_form(*pos)) {
    nb += lookup(qno, pos + 1, end, idx, form, res, cost, maxcost);
  } else { // character addition
    nb += lookup(qno, pos +, end, idx, form, res, cost + 1, maxcost)
  }

  return nb;
}

#endif


