#include <sstream>
#include <set>
#include <stdexcept>


#include <outilex/stringtok.h>
#include <outilex/grammar_application.h>


using namespace std;

typedef enum { white = 0, gray, black } wgb_color;


struct cache_fst {

public:

  typedef int input_type;
  typedef syntagm_fst::output_type output_type;

  typedef set<output_type> final_outputs_type;

  typedef map<pair<int, int>, int> states_map;

  struct transition {
    
    int to_;
    input_type in_;
    output_type out_;
    
    transition(const input_type & input, const output_type & output, int dest) : to_(dest), in_(input), out_(output) {}
 
    int to() const { return to_; }
    const input_type & in()   const { return in_; }
    const output_type & out() const { return out_; }
  };

  typedef vector<transition> transitions;
  typedef transitions::iterator trans_iterator;

  struct state {

    wgb_color color;
    int state_in_text;
 
    bool final;
    final_outputs_type final_outputs;

    transitions trans;

    state() : color(white), state_in_text(-1), final(false), final_outputs(), trans() {}
  };


  vector<state> states;
  states_map state_ids;

  vector<bool>  done;
  vector<pair<int, int> > result;

  cache_fst() : states(), state_ids(), done(), result() {}

  inline void init(int fsa_size) { done.resize(fsa_size, false); result.resize(fsa_size); }

  inline int size() const { return states.size(); }

  inline state & operator[](int q) { return states[q]; }

  int find_state(int q1, int q2) const;
  int add_state(int q1, int q2);

  bool & final(int q) { return states[q].final; }
  bool final(int q) const { return states[q].final; }
  void set_final(int q, bool v = true) { states[q].final = v; }

  wgb_color & color(int q) { return states[q].color; }
  const wgb_color & color(int q) const { return states[q].color; }

  inline int state_in_text(int q) const { return states[q].state_in_text; }

  inline final_outputs_type & final_outputs(int q) { return states[q].final_outputs; } 
  inline void add_final_output(int q, const output_type & output) {
    states[q].final_outputs.insert(states[q].final_outputs.begin(), output);
  }

  inline trans_iterator trans_begin(int q) { return states[q].trans.begin(); }
  inline trans_iterator trans_end(int q) { return states[q].trans.end(); }


  inline void add_trans(int q, const input_type & in, const output_type & out, int to) {
    states[q].trans.push_back(transition(in, out, to));
  }
};


int cache_fst::find_state(int q1, int q2) const {
  states_map::const_iterator it = state_ids.find(make_pair(q1, q2));
  if (it == state_ids.end()) { return -1; }
  return it->second;
}


int cache_fst::add_state(int q1, int q2) {
  
//  cerr << "CACHE add_state: " << q1 << ", " << q2 << endl;

  pair<int,int> id(q1, q2);
  
  if (state_ids.find(id) != state_ids.end()) {
//    cerr << "already HERE: " << state_ids[id] << endl;
    return state_ids[id];
  }

  int res = states.size();

  states.resize(res + 1);
  states[res].state_in_text = q1;

  state_ids[id] = res;

//  cerr << "making new state: " << res << endl;

  return res;
}


struct match_res {

  typedef syntagm_fst::output_type output_type;

  int to;
  vector<syntref> path;
  output_type output;

  match_res(int q, const vector<syntref> & p, const output_type & out)
    : to(q), path(p), output(out) {}
};

class grammar_application {

public:
  typedef grammar::output_type output_type;


protected:

  void grammar_application::travel(cache_fst & cache, int q, vector<syntref> & path, const output_type & output,
                                   vector<match_res> & res);

  int inter_state(int q1, syntagm_fst & grm, int q2, cache_fst & cache,
                  vector<syntref> & path, const output_type & output, vector<match_res> & res);

public:

  grammar_application(sentence_fsa & txt, grammar & grm) : text(txt), gram(grm), caches(gram.size()) {
    for (int i = 0; i < caches.size(); ++i) {
      caches[i].init(text.size());
    }
  }


  void apply(int q1, const std::string & descname, int & begin, int & end);
  void apply(int q1, int descno, int & beg, int & end); 

  pair<int, int> apply();


protected:

  sentence_fsa & text;
  grammar & gram;

  vector<cache_fst> caches;
};


void grammar_application::travel(cache_fst & cache, int q, vector<syntref> & path,
                                 const output_type & output, vector<match_res> & res) {

//  cerr << "TRAVEL, q=" << q << " size=" << cache.size() <<  " out=" << output << endl;

  assert(q < cache.size());

  if (cache.final(q)) {
//    cerr << "TRAVEL: " << q << " is final! sizeof outputs=" << cache.final_outputs(q).size() << "\n";
    for (cache_fst::final_outputs_type::iterator it = cache.final_outputs(q).begin();
         it != cache.final_outputs(q).end(); ++it) {
//         cerr << "TRAVEL: adding new match res\n";
      res.push_back(match_res(cache.state_in_text(q), path, output_type::mult(output, *it)));
    }
  }

  for (cache_fst::trans_iterator tr = cache.trans_begin(q); tr != cache.trans_end(q); ++tr) {
    path.push_back(syntref(cache[q].state_in_text, tr->in()));
    travel(cache, tr->to(), path, output_type::mult(output, tr->out()), res);
    path.pop_back();
  }
}

int grammar_application::inter_state(int q1, syntagm_fst & grm, int q2, cache_fst & cache,
                                     vector<syntref> & path, const output_type & output,
                                     vector<match_res> & res) {

  cerr <<  "inter_state(" << q1 << "," << q2 << ") syntdesc=" << grm.get_name() << " output=" << output << endl;

  int q = cache.find_state(q1, q2);

  if (q != -1) {
    if (cache.color(q) == gray) { throw runtime_error("inter_state: infinite loop"); }
    travel(cache, q, path, output, res);
    return q;
  }

  q = cache.add_state(q1, q2);

  cache.color(q) = gray;

  if (grm.final(q2)) {

    cache.final(q) = true;

    for (set<output_type>::iterator it = grm.final_outputs(q2).begin();
         it != grm.final_outputs(q2).end(); ++it) {

      cache.add_final_output(q, *it);
      cerr << "ADDING new match res for " << grm.get_name() << endl;
      res.push_back(match_res(q1, path, output_type::mult(output, *it)));
    }
  }

 
  int transno;
  sentence_fsa::trans_iterator tr;
  for (transno = 0, tr = text.trans_begin(q1); tr != text.trans_end(q1); ++transno, ++tr) {

    cerr << "looking for " << tr->in() << endl;
 
    syntagm_fst::const_trans_iterator gtr = grm.find_matching_trans(q2, tr->in()); 

    if (gtr != grm.trans_end(q2) && gtr->to() != -1) {

      cerr << "found a transition : in=" << gtr->in() << " out=" << gtr->out() << endl;

      // WARNING: copy out because, following inter_state call invalide gtr, F****ING C++
      output_type out = gtr->out();

      path.push_back(syntref(q1, transno));
      int to = inter_state(tr->to(), grm, gtr->to(), cache, path,
                           output_type::mult(output, gtr->out()), res);
      path.pop_back();
//      cerr << "inter_state: back to (" << q1 << ", " << q2 << ") in " << grm.get_name() << endl;
//      cerr << "add trans in cache: (" << q << ", " << transno << "," << out << ',' << to << endl;
      cache.add_trans(q, transno, out, to);
    }
  }

  // syntagm matching

  cerr <<  "\ninter_state(" << q1 << "," << q2 << ") syntdesc=" << grm.get_name() << " output=" << output
    << ": syntagm matching\n";

/*
  for (syntagm_fst::const_synt_trans_iterator tr = grm.synt_trans_begin(q2); tr != grm.synt_trans_end(q2); ++tr) {
    cerr << "tr in = " << tr->in() << endl;
  }
*/


  for (int i = 0; i < grm.synt_trans_size(q2); ++i) {

    /* copy synt transition, so that we don't bother with iterator invalidation, FU*#!**ING c++ */

    syntagm_fst::synt_transition tr = grm.get_synt_trans(q2, i);


    cerr << "inter_state: from state "<< q2 << " in " << grm.get_name()
      << " (" << q1 << " in text) looking for syntmask " << tr.in()
      << "out = " << tr.out() << endl;


    /*
    const synt_mask mask = tr->in();
    const output_type out = tr->out();
    int trto = tr->to();
    */

    int begin, end;
    apply(q1, tr.in().get_name(), begin, end);

/*
    cerr << "back to inter_state(" << q1 << ", " << q2 << ") in "<< grm.get_name()
      << " looking for " << tr.in() << " syntpath in text\n";
*/

    while (begin < end) {

      //const syntagm_path & synt_path = text[q].synt_trans[begin].in();
      const sentence_fsa::synt_transition & syntr = text.get_synt_trans(q1, begin);

      if (synt_mask::match(syntr.in(), tr.in())) {

        cerr << syntr.in() << " matches with " << tr.in() << endl;

        path.push_back(syntref(q1, -begin));
        int to = inter_state(syntr.to(), grm, tr.to(), cache, path,
                             output_type::mult(output, tr.out()), res);
        path.pop_back();

        /*
        cerr << "inter_state: back to (" << q1 << ", " << q2 << ") in " << grm.get_name() << endl;
        cerr << "adding cache syn trans in=" << syntr.in() << " out=" << tr.out() << endl;
        */
        cache.add_trans(q, -begin, tr.out(), to);
      }

      ++begin;
    }
  }

  cache.color(q) = black;

  cerr << "out of inter_state\n";
  return q;
}

void make_syntagm_path(syntagm_path & path, sentence_fsa & text_fsa,
                       const std::string & syntname, match_res & res) {

  cerr << "make_syntagm_path\n";

  typedef match_res::output_type output_type;
  typedef output_type::elem elem;

  path.clear();

  path.path = res.path;

  ling_def * lingdef = text_fsa.lingdef;
  path.syntdef = lingdef->get_syntagm_def(syntname);

  if (path.syntdef == NULL) { // add non declared syntagm definition
    cerr << "creating new syntagm def for " << syntname << endl;
    path.syntdef = lingdef->add_syntagm_def(syntname);
  }

  assert(res.output != output_type::zero());


  const output_type & output = res.output;
  for (set<elem>::const_iterator it = output.v.begin(); it != output.v.end(); ++it) {
 
    // get the litteral output string, as written in local grammar (attr=val form)
    const string & txt = it->v; 
    string::size_type eq = txt.find('=');

    if (eq == string::npos || eq == (txt.size() - 1)) {
      cerr << "make_syntagm_path: invalid syntagm output string: '" << it->v << "'\n";
      continue;
    }

    string attrname = txt.substr(0, eq);
    string val = txt.substr(eq + 1);

    /* get the type of feat associated with attribute,
     * if attrname not declared, fall back to string type
     */
    attr_def * attr;
    syntagm_feat_type feattype = path.syntdef->get_feat_type(attrname, attr);

    // litteral string, no reference to the recognised sequence
    if (val.compare(0, 3, "$$.", 3) != 0) { 

      if (feattype != STRING_FEAT_TYPE) {
        cerr << "make_syntagm_path: bad syntagm output: " << it->v << " type mismatch\n";
        continue;
      }
      path.string_feats[attrname] = val;
 
    } else {

      assert(it->pos > -1 && it->pos < res.path.size()); 

      /* get the position of the matching token|syntagm */

//#warning "there is a difference"
      const syntref & ref = res.path[res.path.size() - 1 - it->pos];
      //const syntref & ref = res.path[it->pos - res.path.size() - 1];
      int q = ref.qno;
      int transno = ref.transno;

      // cut the feat path
      vector<string> featpath;
      stringtok(val.substr(3), ".", back_inserter(featpath));

      attr_def * attr; int featval; string stringval; vector<syntref> syntrefs;
      
      // retrieve the associated type and value
      syntagm_feat_type realtype = text_fsa.get_feat(q, transno, featpath, attr, featval,
                                                     stringval, syntrefs); 


      if (feattype != realtype) { // types mismatch

        if (feattype != STRING_FEAT_TYPE || realtype == INVALID_FEAT_TYPE
            || realtype == SYNTS_FEAT_TYPE) {
          cerr << "make_syntagm_path: invalid feattype for '" << txt << "'\n";
          continue;
        }
 
        // cast to STRING. 
        if (realtype == DIC_FEAT_TYPE) {
          stringval = attr->get_value_name(featval);
        } else {
          assert(realtype == SYNT_FEAT_TYPE);
          ostringstream os;
          os << '(' << q << ',' << transno << ')';
          stringval = os.str();
        }
        // to do: cast SYNTS_FEAT_TYPE TOO
      }

      switch (feattype) {
      
      case STRING_FEAT_TYPE:
        path.string_feats[attrname] = stringval;
        break;
      
      case DIC_FEAT_TYPE:
        path.dic_feats[attr] = featval;
        break;

      case SYNT_FEAT_TYPE:
        path.synt_feats[attrname].resize(1);
        path.synt_feats[attrname][0] = syntrefs[0];
        break;
      
      case SYNTS_FEAT_TYPE:
        path.synt_feats[attrname].swap(syntrefs);
        break;
      
      default:
        throw runtime_error("make_syntagm_path: internal error? with feattype\n");
      }
    }
  }
}


void grammar_application::apply(int q1, int descno, int & begin, int & end) { 

  cache_fst & cache = caches[descno];
  syntagm_fst & desc = gram[descno];

  //cerr << "\napply:: look for " << desc.get_name() << " from state " << q1 << endl;

  if (cache.done[q1]) {
    //cerr << "already computed (in cache)\n";
    begin = cache.result[q1].first;
    end   = cache.result[q1].second;
    return;
  }

  vector<syntref> path;
  vector<match_res> res;

  inter_state(q1, desc, desc.start(), cache, path, output_type::one(), res);

/*
  cerr << "apply (" << desc.get_name() << " state=" << q1 
    << " out of inter_state " << res.size() << " matching sequences found\n";
*/

  sentence_fsa::synt_transitions & strans = text[q1].synt_trans;
  begin = cache.result[q1].first = strans.size();

//  cerr << "begin = " << begin << endl;

  for (vector<match_res>::iterator it = res.begin(); it != res.end(); ++it) {
    //syntagm_path p(text, desc.get_name(), it->path, it->output);

    syntagm_path p;
    make_syntagm_path(p, text, desc.get_name(), *it);
    strans.push_back(sentence_fsa::synt_transition(p, it->to));
  }

  end = cache.result[q1].second = strans.size();
  cache.done[q1] = true;

/*
  cerr << "end=" << end << endl;
  cerr << "out of apply " << desc.get_name() << " in state " << q1 << endl;
*/
}


void grammar_application::apply(int q1, const std::string & descname, int & begin, int & end) {
  int descno = gram.get_syntagm_idx(descname);
  apply(q1, descno, begin, end);
}

pair<int, int> grammar_application::apply() {
  int begin, end;
  apply(text.start(), gram.start(), begin, end);
  return make_pair(begin, end);
}


std::pair<int, int> apply_grammar(sentence_fsa & text, grammar & gram) {
  return (grammar_application(text, gram)).apply();
}

int apply_grammar_surf(sentence_fsa & text, grammar & grm) {

  int res = 0, begin, end;
  int start = grm.start();
  grammar_application applier(text, grm);

  for (int q = 0; q < text.size(); ++q) {
 //   cerr << "\nAPPLY from state " << q << endl;
    applier.apply(q, start, begin, end);
 //   cerr << "SURF APPLY: found" << end - begin << " matching sequences from " << q << "\n";
    res += end - begin;
  }
//  cerr << "OUT of apply_grammar_surf\n";
  return res;
}

