#include <iterator>

#include <outilex/unicode.h>
#include <outilex/ulookup.h>
#include <outilex/foreach.h>


using namespace std;


typedef bool match_f(UChar indic, UChar inform);

inline bool match_exact(UChar indic, UChar inform) { return indic == inform; }

inline bool match_ignore_maj(UChar indic, UChar inform) {
  return (indic == inform) || (indic == unicode::tolower(inform));
}

inline bool match_ignore_case(UChar indic, UChar inform) {
  return (unicode::tolower(indic) == unicode::tolower(inform));
}


// shell like regexp extensions

inline bool match_regexp(UChar indic, UChar inform) {
  return inform == '*' || inform == '?' || inform == indic;
}

inline bool match_ignore_case_regexp(UChar indic, UChar inform) {
  return inform == '*' || inform == '?' || match_ignore_case(indic, inform);
}

inline bool match_ignore_maj_regexp(UChar indic, UChar inform) {
  return inform == '*' || inform == '?' || match_ignore_maj(indic, inform);
}


// ignore in dic functions
inline bool match_all(UChar indic, UChar inform) { return false; }
inline bool ignore_marks(UChar indic, UChar inform) { return unicode::is_mark(indic); }

inline bool ignore_regexp(UChar indic, UChar inform) {
  return inform == '*';
}

inline bool ignore_regexp_n_marks(UChar indic, UChar inform) {
  return inform == '*' || ignore_marks(indic, inform);
}



struct ulookup_result {

  std::vector<UChar> form;
  int idx;

  ulookup_result(const std::vector<UChar> & v, int _i) : form(v), idx(_i) {}

  friend bool operator<(const ulookup_result & a, const ulookup_result & b) { // sort by dist first
    if (a.idx  != b.idx)  { return a.idx  < b.idx;  }
    return a.form < b.form;
  }
};



void advance_next_token(const dico::position_ & dic_pos, vector<UChar> & form,
                        set<dico::position> & left) {

  bool ok = false;

  for (dico::trans_iterator tr = dic_pos.trans_begin(), end = dic_pos.trans_end();
       tr != end; ++tr) {

    if (tr.label() == ' ') {

      ok = true;
      form.push_back(' ');
      advance_next_token(tr.follow(), form, left);
      form.pop_back();

    } else if (tr.label() == '#') {

      ok = true;
      advance_next_token(tr.follow(), form, left);
    }
  }

  if (ok == false) { left.insert(dico::position(form, dic_pos)); }
}



void advance_next_token(set<dico::position> & pos) {
  
  set<dico::position> res;

  foreach_(const dico::position & p, pos) {

    vector<UChar> form(p.form);
    const dico::position_ dic_pos = p.pos;

    for (dico::trans_iterator tr = dic_pos.trans_begin(), end = dic_pos.trans_end();
         tr != end; ++tr) {
      if (tr.label() == '#') { // end of token
        advance_next_token(tr.follow(), form, res);
      }
    }
  }
  pos.swap(res);
}



template<match_f match, match_f ignore_in_dic, typename Iterator>
void ulookup(const dico::position_ & dic_pos, Iterator curr, Iterator end,
            vector<UChar> & form, set<ulookup_result> & res, set<dico::position> & left) {

  dico::trans_iterator tr;

  if (curr == end) { // end of word

    left.insert(dico::position(form, dic_pos));

    if (dic_pos.final()) {
      res.insert(ulookup_result(form, dic_pos.idx));
    }

    // look for diacritics to skip too

    for (tr = dic_pos.trans_begin(); tr != dic_pos.trans_end(); ++tr) {
      form.push_back(tr.label());
      if (ignore_in_dic(tr.label(), 0)) {
        ulookup<match, ignore_in_dic>(tr.follow(), curr, end, form, res, left);
      }
      form.pop_back();
    }
    return;
  }


  for (tr = dic_pos.trans_begin(); tr != dic_pos.trans_end(); ++tr) {
  
    if (match(tr.label(), *curr)) {
      form.push_back(tr.label());
      ulookup<match, ignore_in_dic>(tr.follow(), curr + 1, end, form, res, left); 
      form.pop_back();
    }

    if (ignore_in_dic(tr.label(), *curr)) {
      form.push_back(tr.label());
      ulookup<match, ignore_in_dic>(tr.follow(), curr, end, form, res, left);
      form.pop_back();
    }
  }
}


template<typename Iterator>
void ulookup(const dico::position & pos, int type, Iterator begin, Iterator end,
             set<ulookup_result> & res, set<dico::position> & left) {

  vector<UChar> form = pos.form;

  switch (type) {

  case MATCH_EXACT:
    ulookup<match_exact, match_all>(pos.pos, begin, end, form, res, left);
    break;
  case MATCH_IGNORE_CASE:
    ulookup<match_ignore_case, match_all>(pos.pos, begin, end, form, res, left);
    break;
  case MATCH_IGNORE_MAJ:
    ulookup<match_ignore_maj, match_all>(pos.pos, begin, end, form, res, left);
    break;
  case MATCH_EXACT|MATCH_IGNORE_MARKS:
    ulookup<match_exact, ignore_marks>(pos.pos, begin, end, form, res, left);
    break;
  case MATCH_IGNORE_CASE|MATCH_IGNORE_MARKS:
    ulookup<match_ignore_case, ignore_marks>(pos.pos, begin, end, form, res, left);
    break;
  case MATCH_IGNORE_MAJ|MATCH_IGNORE_MARKS:
    ulookup<match_ignore_maj, ignore_marks>(pos.pos, begin, end, form, res, left);
    break;

  case MATCH_EXACT|MATCH_SHELL_REGEXP:
    ulookup<match_regexp, ignore_regexp>(pos.pos, begin, end, form, res, left);
    break;
  case MATCH_IGNORE_CASE|MATCH_SHELL_REGEXP:
    ulookup<match_ignore_case_regexp, ignore_regexp>(pos.pos, begin, end, form, res, left);
    break;
  case MATCH_IGNORE_MAJ|MATCH_SHELL_REGEXP:
    ulookup<match_ignore_maj_regexp, ignore_regexp>(pos.pos, begin, end, form, res, left);
    break;
  case MATCH_EXACT|MATCH_IGNORE_MARKS|MATCH_SHELL_REGEXP:
    ulookup<match_regexp, ignore_regexp_n_marks>(pos.pos, begin, end, form, res, left);
    break;
  case MATCH_IGNORE_CASE|MATCH_IGNORE_MARKS|MATCH_SHELL_REGEXP:
    ulookup<match_ignore_case_regexp, ignore_regexp_n_marks>(pos.pos, begin, end, form, res, left);
    break;
  case MATCH_IGNORE_MAJ|MATCH_IGNORE_MARKS|MATCH_SHELL_REGEXP:
    ulookup<match_ignore_maj_regexp, ignore_regexp_n_marks>(pos.pos, begin, end, form, res, left);
    break;

  default:
    throw std::runtime_error("ulookup : bad matching type");
  }
}





int ulookup(const dico::position & pos, int lookup_type, const string & word,
             set<dic_lex_entry> & res, set<dico::position> & left) {

  /* convert to UTF-16 normalized decomposition form */

  //cerr << "inside ulookup\n";

  UErrorCode uerror = U_ZERO_ERROR;

  vector<UChar> buf(word.size());
  int lenb = unicode::u_str_from_utf8(buf, word, uerror);

  vector<UChar> form(lenb * 2);
  int lenf = unicode::normalize(form, UNORM_NFD, & buf[0], lenb, uerror);
  //unicode_check(uerror);

  form.resize(lenf);

  //  cerr << "unicode stuffs ok\n";
  set<ulookup_result> _res;

  //cerr<< "before inner ulookup\n";
  ulookup(pos, lookup_type, form.begin(), form.end(), _res, left);

  //cerr << "back from  inner ulookup : "<<_res.size() << " results\n";

  int nblex = 0;
  for (set<ulookup_result>::iterator it = _res.begin(); it != _res.end(); ++it) {
    nblex += pos.pos.dic->make_lex_entries(it->form, it->idx, inserter(res, res.begin()));
  }

  //cerr << "after make lex entry, : " << res.size() << " results\n";
  //cerr << "out of ulookup\n";
  return nblex;
}


int ulookup(const dico & dic, int lookup_type, const string & form, set<dic_lex_entry> & res,
             set<dico::position> & left) {

  return ulookup(dico::position(dic), lookup_type, form, res, left);
}


int ulookup(const dico & dic, int lookup_type, const string & form, set<dic_lex_entry> & res) {

  set<dico::position> left;

  return ulookup(dico::position(dic), lookup_type, form, res, left);
}


