#include <iostream>
#include <set>
#include <vector>

#include <outilex/sentence_fsa.h>
#include <outilex/wrtn_grammar.h>
#include <outilex/wrtn_chart.h>
#include <outilex/wrtn_parser.h>

#include <boost/lexical_cast.hpp>
#include <boost/timer.hpp>


using namespace std;
using namespace boost;


void wrtn_match::dump(ostream & os) const {
  os << "( to ="<< to <<",weight="<<lexical_cast<string>(w)<<")"<<endl;
}


double wrtn_parser::add_weight(double a,double b){
  return a + b;
}


void wrtn_parser::e_item::dump(ostream & os) const {
  os << "[ fst=" << pfst->name << ", q=" << q << ", from=" << from << ", to=" << to 
  << ", path=" << path << " , weight ="<< lexical_cast<string>(weight) <<" ]";
}


void wrtn_parser::ENQUEUE(int pos, const e_item & item) {

  if (agenda[pos].add(item) == -1) { // already present, keep the best match (best weight)

    earley_stack::val_iterator it = agenda[pos].find(item);

    assert(it != agenda[pos].val_end());

    if (it->weight < item.weight) { // replace item in place
      it->weight = item.weight;
      if (flags & IGNORE_DIFF_PATHS)   { it->path = item.path; }
      if (flags & IGNORE_DIFF_OUTPUTS) { it->out = item.out; }
    }
  }
}


int wrtn_parser::parse(wrtn_chart & chart, double timeout) {

  int res = 0;

  //cerr << "earley parse\n";
 
  /* initialization stuffs */

  const sentence_fsa & txt = chart.fsa;
  ptxt = & txt;
  pchart = & chart;

  if (txt.empty()) { return 0; } // case of an empty text fsa ...

  {
    e_agenda(txt.size(), earley_stack(compare_items(flags))).swap(agenda);
  }

  if (flags & SURF_PARSING) {

    const string & axiom = gram.start_name();

    for (int pos = 0; pos < txt.size(); ++pos) { PREDICTOR(pos, axiom); }

  } else { PREDICTOR(0, gram.start_name()); }

  timer tmr;

  for (int pos = 0; pos < txt.size(); ++pos) {

    /*
    cerr << "POS = " << pos << "/" << txt.size() << " timer = " << tmr.elapsed() 
      << " timeout = "<< timeout << endl;
      */
    if (timeout && timeout < tmr.elapsed()) {
      cerr << "parse too long!: " << tmr.elapsed() << "s.\n"
        << "text = " << txt.text << "\n";
      res = -1;
      break;
    }

    earley_stack & stack = agenda[pos];

    for (int i = 0; i < stack.size(); ++i) {
    
      // cerr << "\npos = " << pos << " " << i+1 << "/" << stack.size() << endl;
      // cerr << "item = " << stack[i] << endl;

      const wrtn_pattern & fst = *(stack[i].pfst);
      int q = stack[i].q;

      //cerr << "fst = " << fst.name << ", path = " << stack[i].path << endl;

      if (q == -1) { // TERMINAL
        //cerr << "TERMINAL (" << fst.name << ")\n";
        COMPLETER(pos, stack[i]);
        continue;
      }

      if (fst.final(q)) { // item is completed, add a completed itm (q == -1)
        //cerr << "FINAL! q is final\n";
        const e_item & item = stack[i];
        ENQUEUE(pos, e_item(fst, -1, item.from, item.to, item.out, item.path,item.weight));
      } 


      for (wrtn_pattern::const_trans_iterator str = fst.trans_begin(q),
           end = fst.trans_end(q); str != end; ++str) {

        if (str->in().type == input_type::SUBCALL) {

          const string & syntname = boost::get<string>(str->in().v);
 
          //cerr << "SUBCALL: " << syntname << endl;

          PREDICTOR(pos, syntname);
 
        } else if (str->in().type == input_type::LEXMASK) {

          const lexical_mask & m = boost::get<lexical_mask>(str->in().v);

          //cerr << "LEXMASK: " << m << endl;

          for (int transno = 0; transno < txt.trans_size(pos); ++transno) {
 
            const sentence_fsa::const_transition tr = txt.get_trans(pos, transno); 
            const lexical_mask & e = tr.in();

            //cerr << "  in text: " << e << endl;

            if (e.intersects(m)) { // match

              //cerr << "MATCH!\n";
              const e_item & item = stack[i];

              synt_path_type npath(item.path);
              npath.push_back(syntref(pos, transno));
	      
              vector<string> out(item.out);
	      
	      out.back() += str->out().label;
              out.push_back(string());

	      //weight addition
	      double w = add_weight(item.weight, str->out().weight);

              ENQUEUE(tr.to(), e_item(fst, str->to(), item.from, tr.to(), out, npath, w));
            } 
            /*
            else {
              cerr  << "no match\n";
            }
            */
          }

        } else { // epsilon transition
 
          //cerr << "EPSILON\n";
          const e_item & item = stack[i];

          vector<string> out(item.out);
          out.back() += str->out().label;
          double w = add_weight(item.weight,str->out().weight);
          ENQUEUE(pos, e_item(fst, str->to(), item.from, item.to, out, item.path,w));
        }
      }
    }
  }
  return res;
}


void wrtn_parser::PREDICTOR(int pos, const std::string & syntname) {

  int syntno = gram.get_pat_idx(syntname);

  if (syntno == -1) {
    cerr << "warning: unknow syntagm: " << syntname << endl;
    return;
  }
  vector<string> out(1);
  ENQUEUE(pos, e_item(gram[syntno], 0, pos, pos, out, synt_path_type(), 0.));
}


void wrtn_parser::COMPLETER(int pos, const e_item & item1) {

  // cerr << "COMPLETER pos = " << pos << "\n";
  // cerr << "item = " << item1 << endl;

  wrtn_chart & chart = *pchart;

  const string & syntname = item1.pfst->name;

  //cerr << "syntname = " << syntname << " from=" << item1.from << " to=" << item1.to <<  endl;

  if (item1.from == item1.to) {
    cerr << "error: synt desc: " << syntname << " matches with empty word\n";
    return;
  }
 
  //if (syntname == gram.start_name()) { cerr << "find a match\n"; }

  int syntidx = - chart.add_match(item1.from,
                                  wrtn_match(syntname, item1.to, item1.path, item1.out,
                                             item1.weight)) - 1;
  //item1.dump(cout);
  //cout<<endl;

  if (syntidx == 0) { // already present
    //cerr << "syntidx = 0\n";
    return;
  }


  int item1from = item1.from;
  int item1to   = item1.to;
  double item1weight = item1.weight;

  earley_stack & stack = agenda[item1.from];

  // warning: do not use item1 ref in the following block,
  // because its adress can change .... (call to ENQUEUE)
 
  //cerr << "searching stack...\n";
  for (earley_stack::iterator it = stack.begin(); it != stack.end(); ++it) {
 
    const e_item & item2 = *it;
    const wrtn_pattern & fst = *item2.pfst;
    int q = item2.q;

    // cerr << "-> item = " << item2 << endl;

    if (q == -1) { continue; }

    for (wrtn_pattern::const_trans_iterator str = fst.trans_begin(q);
         str != fst.trans_end(q); ++str) {

      //cerr << "     trans: " << str->in() << endl; 
      if (str->in().type == input_type::SUBCALL
          && boost::get<string>(str->in().v) == syntname) {

        //cerr << "find something to COMPLETE!\n";
        synt_path_type npath(item2.path);
        vector<string> nout(item2.out);

        // concat path
        npath.push_back(syntref(item1from, syntidx));

        // concat output	
        nout.push_back(str->out().label);
        double w = add_weight(item2.weight, add_weight(str->out().weight, item1weight));
        ENQUEUE(item1to, e_item(fst, str->to(), item2.from, item1to, nout, npath, w));
      }
    }
  }
}

