#include <iostream>
#include <set>
#include <vector>

#include <outilex/sentence_fsa.h>
#include <outilex/rtn_grammar.h>

#include <outilex/rtn_parser.h>


using namespace std;
using namespace boost;

void rtn_parser::e_item::dump(ostream & os) const {
  os << "[ fst=" << pfst->name << ", q=" << q << ", from=" << from << ", to=" << to 
  << ", path=" << path << " ]";
}


void rtn_parser::ENQUEUE(int pos, const e_item & item) {

  agenda[pos].add(item);

  /*
  earley_stack & s = agenda[pos];

  earley_stack::comparaison_type compare = s.item_set.key_comp();
  assert(& s.items == compare.pC);
 
  earley_stack::by_val_iterator it = s.lower_bound(item);

  while (it != s.by_val_end() && seems_like(item, s[*it])) {
    if (s[*it].fs == item.fs) {
      return;
    }
    ++it;
  }
  agenda[pos].push_back(item);
  */
}


void rtn_parser::parse(const sentence_fsa & txt, rtn_chart & chart, bool surf) {

  //cerr << "earley parse\n";
 
  /* initialization stuffs */

  ptxt = & txt;
  pchart = & chart;

  agenda.clear();
  agenda.resize(txt.size());

  if (surf) {

    const string & axiom = gram.start_name();

    for (int pos = 0; pos < txt.size(); ++pos) { PREDICTOR(pos, axiom); }

  } else { PREDICTOR(0, gram.start_name()); }


  for (int pos = 0; pos < txt.size(); ++pos) {

    //cerr << "POS = " << pos << endl;

    earley_stack & stack = agenda[pos];

    for (int i = 0; i < stack.size(); ++i) {
    
      //cerr << "\npos = " << pos << " " << i+1 << "/" << stack.size() << endl;

      const rtn_pattern & fst = *(stack[i].pfst);
      int q = stack[i].q;

      //cerr << "fst = " << fst.name << ", path = " << stack[i].path << endl;

      if (q == -1) { // TERMINAL
        //cerr << "TERMINAL (" << fst.name << ")\n";
        COMPLETER(pos, stack[i]);
        continue;
      }

      if (fst.final(q)) { // item is completed, add a completed itm (q == -1)
        //cerr << "FINAL! q is final\n";
        const e_item & item = stack[i];
        ENQUEUE(pos, e_item(fst, -1, item.from, item.to, item.out, item.path));
      } 


      for (rtn_pattern::const_trans_iterator str = fst.trans_begin(q),
           end = fst.trans_end(q); str != end; ++str) {

        if (str->in().type == input_type::SUBCALL) {

          const string & syntname = boost::get<string>(str->in().v);
 
          //cerr << "SUBCALL: " << syntname << endl;

          PREDICTOR(pos, syntname);
 
        } else if (str->in().type == input_type::LEXMASK) {

          const lexical_mask & m = boost::get<lexical_mask>(str->in().v);

          //cerr << "LEXMASK: " << m << endl;

          for (int transno = 0; transno < txt.trans_size(pos); ++transno) {
 
            const sentence_fsa::transition tr = txt.get_trans(pos, transno); 
            const lexical_mask & e = tr.in();

            //cerr << "  in text: " << e << endl;

            if (e.intersect(m)) { // match

              // cerr << "MATCH!\n";
              const e_item & item = stack[i];

              synt_path_type npath(item.path);
              npath.push_back(syntref(pos, transno));

              vector<string> out(item.out);
              out.back() += str->out();
              out.push_back(string());

              ENQUEUE(tr.to(), e_item(fst, str->to(), item.from, tr.to(), out, npath));
            } 
            /*
            else {
              cerr  << "no match\n";
            }
            */
          }

        } else { // epsilon transition
 
          //cerr << "EPSILON\n";
          const e_item & item = stack[i];

          vector<string> out(item.out);
          out.back() += str->out();
          ENQUEUE(pos, e_item(fst, str->to(), item.from, item.to, out, item.path));
        }
      }
    }
  }
}


void rtn_parser::PREDICTOR(int pos, const std::string & syntname) {

  int syntno = gram.get_pat_idx(syntname);

  if (syntno == -1) {
    cerr << "warning: unknow syntagm: " << syntname << endl;
    return;
  }
  vector<string> out(1);
  ENQUEUE(pos, e_item(gram[syntno], 0, pos, pos, out, synt_path_type()));
}


void rtn_parser::COMPLETER(int pos, const e_item & item1) {

  // cerr << "COMPLETER pos = " << pos << "\n";
  // cerr << "item = " << item1 << endl;

  rtn_chart & chart = *pchart;

  const string & syntname = item1.pfst->name;

  //cerr << "syntname = " << syntname << " from=" << item1.from << " to=" << item1.to <<  endl;

  if (item1.from == item1.to) {
    cerr << "error: synt desc: " << syntname << " matches with empty word\n";
    return;
  }
 
  //if (syntname == gram.start_name()) { cerr << "find a match\n"; }

  int syntidx = - chart.add_match(item1.from,
                                  rtn_match(syntname, item1.to, item1.path, item1.out)) - 1;

  if (syntidx == 0) { // already present
    //cerr << "syntidx = 0\n";
    return;
  }


  int item1from = item1.from;
  int item1to   = item1.to;

  earley_stack & stack = agenda[item1.from];

  // warning: do not use item1 ref in the following block,
  // because its adress can change .... (call to ENQUEUE)
 
  //cerr << "searching stack...\n";
  for (earley_stack::iterator it = stack.begin(); it != stack.end(); ++it) {
 
    const e_item & item2 = *it;
    const rtn_pattern & fst = *item2.pfst;
    int q = item2.q;

    // cerr << "-> item = " << item2 << endl;

    if (q == -1) { continue; }

    for (rtn_pattern::const_trans_iterator str = fst.trans_begin(q);
         str != fst.trans_end(q); ++str) {

      //cerr << "     trans: " << str->in() << endl; 
      if (str->in().type == input_type::SUBCALL
          && boost::get<string>(str->in().v) == syntname) {

        //cerr << "find something to COMPLETE!\n";
        synt_path_type npath(item2.path);
        vector<string> nout(item2.out);

        // concat path
        npath.push_back(syntref(item1from, syntidx));

        // concat output
        nout.push_back(str->out());
        ENQUEUE(item1to, e_item(fst, str->to(), item2.from, item1to, nout, npath));
      }
    }
  }
}

