#include <iostream>
#include <string>
#include <vector>
#include <stdexcept>

#include <boost/regex.hpp>
#include <boost/regex/icu.hpp>
#include <outilex/sentence_fsa.h>

#include <outilex/wrtn_chart.h>
#include <outilex/fsa_transduction.h>


using namespace std;
using namespace boost;

namespace {


u32regex fsa_token_regex(make_u32regex("(<[^>]*>"                 // <le.det+p>
                                       "|\\{[^}]*\\}"             // {la.le.det:fs}
                                       "|[[:L*:]+"                // forme
                                       "|[[:Nd:]]+"               // 12321
                                       "|[[:P*:]]|[[:S*:]])"));   // .



template<typename OutputIterator>
void tokenize_output(const std::string & text, OutputIterator out) {

  u32regex_token_iterator<string::const_iterator>
    tok(make_u32regex_token_iterator(text, fsa_token_regex, 1)), end;

  while (tok != end) {
    *out = tok->str();
    ++out, ++tok;
  }
}


void tokenize_match_output(const wrtn_chart & chart, const wrtn_match & match,
                           vector<string> & res) {

  int size = match.path.size();

  for (int i = 0; i< size; ++i) {

    if (! match.out[i].empty()) { tokenize_output(match.out[i], back_inserter(res)); }
  
    const syntref & ref = match.path[i];

    if (ref.transno < 0) { //subcall
      tokenize_match_output(chart, chart[ref.qno][-ref.transno - 1], res);
    }
  }

  if (! match.out[size].empty()) { tokenize_output(match.out[size], back_inserter(res)); }
}


int process_match(wrtn_chart & chart, const wrtn_match & match, mutable_lexic & lexic) {

  sentence_fsa & fsa = chart.fsa;
  ling_def * lingdef = fsa.lingdef;

  vector<string> output;
  
  tokenize_match_output(chart, match, output);

  if (output.empty()) {
    cerr << "warning: match with no output\n";
    return 0;
  }

  int curq = match.path[0].qno;

  const int last = output.size() - 1;

  for (int i = 0; i < last; ++i) { // n-1 first symbols : create new states

    try {

      lexical_mask m(output[i], lingdef);
      if (! m) { throw runtime_error("void symbol"); }

      int nq = fsa.A.add_state();

      int lbl = lexic.add(m);
      fsa.A.add_trans(curq, lbl, nq);
      curq = nq;

    } catch (exception & e) {
      cerr << "error with symbol : " << output[i] << ": " << e.what() << "\n";
      return 0;
    }
  }

  // last symbol : connect to match destination state

  try {

    lexical_mask m(output[last], lingdef);
    if (! m) { throw runtime_error("void symbol"); }
  
    int lbl = lexic.add(m);
    fsa.A.add_trans(curq, lbl, match.to);

  } catch (exception & e) {
    cerr << "error with symbol : " << output[last] << ": " << e.what() << "\n";
    return 0;
  }
  
  return 1;
}

} // namespace ""

int wchart_fsa_transduct(wrtn_chart & chart, const std::string & axiom) {

  sentence_fsa & fsa = chart.fsa;

  mutable_lexic lexic;
  fsa.strip_lexic(lexic);


  int nbmatch = 0;
  int size = chart.size();

  for (int q = 0; q < size; ++q) {

    wrtn_chart::match_by_val_iterator m = chart.find_best_match(q, axiom);

    while (m != chart.by_val_end(q) && m->name == axiom) {

      nbmatch += process_match(chart, *m, lexic);
      ++m;
    }
  }

  fsa.prune();
  fsa.set_lexic(lexic);
  fsa.determinize();
  fsa.topological_sort();

  return nbmatch;
}

