#include <iostream>
#include <queue>
#include <boost/filesystem/path.hpp>
#include <boost/filesystem/convenience.hpp>
#include <boost/lexical_cast.hpp>

#include <cstdlib>

#include <outilex/lingdef.h>
#include <outilex/text_fsa.h>

#include <outilex/ugrammar.h>
#include <outilex/xml_text_uchart.h>
#include <outilex/uchart.h>
#include <outilex/featstruct.h>
#include <outilex/fs_node.h>

#include <outilex/usage.h>

using namespace std;
using namespace boost;

namespace fs = boost::filesystem;


const char * USAGE_DESCRIPTION =
"usage: chart-dump-all  [-n <sentenceno>][-form|-pos|-tags][-w][-fs][-prefix|-affix|-whole]\n"
" [-synt <syntname>][-id s:q:id] <chart>\n";

#if 0
void usage() {
  cout << 
    "\n"
    "usage: " << progname << " [options] <chart>\n"
    "\n"
    "print the predicates and their arguments which have been identified in the text chart.\n"
    "\n"
    "options:\n"
    " -l <lingdef>  : specify the tagset description file (look for LINGDEF env var by default)\n"
    " -all|-affix   : print all matches (whatever their position in the sentence) this is the default\n"
    " -init|-prefix : print only matches which are prefix of a sentence\n"
    " -whole        : print only matches which cover a whole sentence\n"
    " -wellformed   : show only parse whose f-structure is well formed\n"
    " -w            : show weight of the analyses\n"
    "\n";
  exit(0);
}
#endif

namespace {

enum match_type { 
  MATCH_ALL,     // dump all parse which are affix of the sentence
  MATCH_PREFIX,  // dump all parse which are prefix of the sentence
  MATCH_WHOLE    // dump all parse which cover the whole sentence
} match_mode = MATCH_ALL;

bool check_wellformed = false;
bool factorize = true;
bool show_weight = false;





queue<const fs_node *> Q;



void dump_fs(const fs_node * node, ostream & os) try {

  node = follow(node);

  if (node->type == FS_UNSET) {
    os << "[bad unset FS]";
    return;
  }

  if (node->type == FS_STRING) { // dump string as-is
    os << node->str;
    /*
    if (node->str == "zero") {
      os << '_';
    } else {
      os << node->str;
    }
    */
    return;
  }

  if (node->type == FS_VAL) { // dump feat_set as-is
    os << node->feat;
    return;
  }

  if (node->type == FS_SET) { // push all elem in the queue
    const fs_node * n2 = node + node->next;
    while (n2 != node) {
      Q.push(n2 + n2->offset);
      n2 += n2->next;
    }
    return;
  }

  if (node->type != FS_FS) {
    cerr << "dump_fs: bad fs node type =" << node->type << endl;
    
    if (node->type == FS_EXIST) {
      os << "EXIST";
    } else if (node->type == FS_NOEXIST) {
      os << "~EXIST";  
    } else if (node->type == FS_CONSTR) {
      os << "constr " << node->str;
    } else {
      throw logic_error("dump_fs: invalid node type : " + lexical_cast<string>(node->type));
    }
    return;
  }
  assert(node->type == FS_FS);


  /* if there is some modifieurs push them into the stack */
  
  const fs_node * n2 = find_attr_val(node, "modifs");

  if (n2) { Q.push(n2); }


  n2 = find_attr_val(node, "Pred");

  if (n2) { // new predicate representation
 
    dump_fs(n2, os); os << '(';
 
    n2 = find_attr_val(node, "nArgs");
    if (! n2) {
      os << "?)";
      cerr << "mal-formed featstruct (Pred without nArgs attribute)\n";
      return;
    }

    if (n2->type != FS_STRING) {
      os << "?)\n";
      cerr << "bad nArgs attribute (not a string)\n";
      return;
    }
  
    if (n2->str == "*") { //nombre variable d'arguments
  
      n2 = find_attr_val(node, "args");

      if (n2 == 0) {
        cerr << "bad featstruct, no args found (nArgs='*')\n";
        return;
      }

      if (n2->type != FS_SET) {
        cerr << "bad Pred featstruct 'args != SET'\n";
        return;
      }


      const fs_node * n3 = n2 + n2->next;
      while (n3 != n2) {
        dump_fs(n3 + n3->offset, os);
        if (n3 + n3->next != n2) { os << ", "; }
        n3 = n3 + n3->next;
      }


    } else { // nombre d'arguments fixe
    
      int nArgs = lexical_cast<int>(n2->str);

      char argname[] = { 'N', '0', 0 };
      for (int i = 0; i < nArgs; ++i) {

        argname[0] = 'n';
        argname[1] = '0' + i;

        if (i > 0) { os << ", "; }

        n2 = find_attr_val(node, argname);

        if (! n2) {
          argname[0] = 'N';
          n2 = find_attr_val(node, argname);
        }

        if (n2 && n2->type != FS_UNSET) {
          dump_fs(n2, os);
        } else {
          os << '_';
        }
      }
    }
    os << ')';
    return;
  }



  n2 = find_attr_val(node, "PRED");

  if (n2) { // old predicate representation (will be obseleted)

    static bool warning = true;
    if (warning) {
      cerr << "warning PRED notation is deprecated (use Pred and nArgs instead)\n"; 
      warning = false;
    }

    dump_fs(n2, os); os << '(';
  
    int nargs = 3;

    n2 = find_attr_val(node, "ARGS");

    if (n2) {
      nargs = lexical_cast<int>(n2->str);
    }

    char argname[] = { 'N', '0', 0 };

    for (int i = 0; i < nargs; ++i) {

      argname[1] = '0' + i;

      if (i > 0) { os << ", "; }

      n2 = find_attr_val(node, argname);

      if (n2) {
        dump_fs(n2, os);
      } else {
        os << "_";
      }
    }

    os << ")";

    return;
  } 
  
  // not a Pred featstruct

  n2 = find_attr_val(node, "CAT");

  if (n2 && n2->str == "LocN") { // if LocN dump prep
    n2 = find_attr_val(node, "prep");
    if (n2) {
      dump_fs(n2, os); os << ' ';
    }
  }

  n2 = find_attr_val(node, "head");

  if (n2) { // dump head if there is one
 
    dump_fs(n2, os);

  } else if (n2 = find_attr_val(node, "form")) { // else dump form if there is one

    dump_fs(n2, os);
  
  } else { // don't know what to do
    os << "?";
#if 0
    cerr << "dump_fs: don't know what to do with fs = " << node << endl;
    n2 = find_attr_val(node, "CAT");
    if (n2) {
      cerr << "CAT = " << n2->str << endl;
    }
#endif
  }

} catch (exception & e) {
  cerr << "dump_fs: bad feature structure" << node << ": " << e.what() << '\n';
}


void dump_queue(ostream & os) {
  while (! Q.empty()) { 
    dump_fs(Q.front(), os);
    Q.pop();
    os << '\n';
  }
}


int dump_preds(const uchart & chart, ostream & os) {

  int num = 0;

  /* if we want to factorize, different analyses
   * leading to the same predicate-argument structure
   */

  map<string, int> analyses;

  for (int q = 0; q < chart.size(); ++q) {

    for (uchart::const_synt_iterator it = chart.synt_begin(q); it != chart.synt_end(q); ++it) {

      if (it->name == "mainP") {

        /* check if synt ends to a final synt (if in MATCH_WHOLE mode) */
        if ((match_mode == MATCH_WHOLE) && ! chart.fsa.final(it->to)) { continue; }

        /* check if featstruct is well formed */
        if (check_wellformed && ! is_wellformed(it->fs)) { continue; }

        Q.push(it->fs.get_entry_node());
 
        if (! factorize) {
          dump_queue(os);
          if (show_weight) { os << "weight = " << it->w << "\n"; }
          os << "========\n\n";
        } else { // factorization of analyses
          ostringstream oss;
          dump_queue(oss);
          if (show_weight) { oss << "weight = " << it->w << "\n"; }
          analyses[oss.str()]++;
        }
        ++num;
      }
    }

    if (match_mode != MATCH_ALL) { // print synts only from initial state
      break;
    }
  }

  if (factorize && ! analyses.empty()) {
    for (map<string, int>::iterator it = analyses.begin(), end = analyses.end();
         it != end; ++it) {
      os << it->first << it->second << " time(s) ========\n\n";
    }
    os << analyses.size() << " differents pred/args analyses. ";
  }

  if (num == 0) {
    os << "FAIL\n";
  } else {
    os << "total of " << num << " analys(es).\n";
  }
  return num;
}


enum anonymous1 {
  SHOW_FORM = 0, SHOW_POS, SHOW_ALL
} OUTPUT_MODE = SHOW_FORM;

/*
enum anonymous2 {
  AFFIX, PREFIX, WHOLE
} MATCH_TYPE = AFFIX;
*/

bool SHOW_WEIGHT = false;
bool DUMP_FS = false;



void dump_path(const syntagm & synt, const uchart & chart, ostream & os) {

  os << "(" << synt.name << ' ';
  const vector<syntref> & path = synt.path;

  for (int i = 0; i < path.size(); ++i) {

    if (path[i].transno < 0) { // syntagm path

      dump_path(chart.get_synt(path[i].qno, path[i].transno), chart, os);

    } else { // lexical entry
      
      switch (OUTPUT_MODE) {

      case SHOW_FORM:
        os << chart.get_lex(path[i].qno, path[i].transno).in().form;  
        break;

      case SHOW_POS:
        os << chart.get_lex(path[i].qno, path[i].transno).in().form  
          << '.' << chart.get_lex(path[i].qno, path[i].transno).in().pos->get_name();
        break;

      case SHOW_ALL:
        os << chart.get_lex(path[i].qno, path[i].transno).in();  
        break;
      }
    }
    os << ' ';
  }
  os << synt.name << ")";

  if (SHOW_WEIGHT) { os << "/" << synt.w; }
}



void dump_synt(const uchart & chart, int qno, int syntno, ostream & os) {

  const syntagm & synt = chart.get_synt(qno, syntno);

  dump_path(synt, chart, os);
  os << '\n';
  if (DUMP_FS) { 
    synt.fs.prettyprint(os);
  }
  os << '\n';
}




int dump_synts(const uchart & chart, const string & syntname, ostream & os) {

  int num = 0;

  for (int q = 0; q < chart.size(); ++q) {

    uchart::const_by_name_iterator begin, end;
    chart.find(q, syntname, begin, end);

    while (begin != end) {

      const syntagm & synt = chart.get_synt(q, begin->second);

      if (match_mode == MATCH_WHOLE && ! chart.fsa.final(synt.to)) { ++begin; continue; }

      os << "match:\n";
      dump_path(synt, chart, os);
      os << '\n';

      os << "=========\n";
      Q.push(synt.fs.get_entry_node());
      dump_queue(os);
      os << "=========\n\n";

      if (DUMP_FS) { 
        synt.fs.prettyprint(os);
      }

      ++num;
      ++begin;
      os << '\n';
    }

    if (match_mode == MATCH_PREFIX || match_mode == MATCH_WHOLE) {
      // print synts only from initial state
      break;
    }
  }

  return num;
}

} // namespace ""


int main(int argc, char ** argv) try {

  fs::path chartpath, lingdefpath;
  string syntname = "mainP";
  int sentenceno = -1, qno = -1, syntno = -1;

  char * text = getenv("LINGDEF");
  if (text) {
    lingdefpath = fs::path(text, fs::native);
  }


  argv++, argc--;

  if (argc == 0) { usage(); }

  while (argc) {
    
    string arg = *argv;
 
    if ((arg == "-h") || (arg == "-help")) {
    
      usage();
    
    } else if (arg == "-l") {
    
      argv++, argc--;
      if (! argc) { cerr << "bad args: '-l' needs an argument\n"; exit(1); }
      lingdefpath = fs::path(*argv, fs::native);
    
    } else if (arg == "-n") {

      argv++, argc--;
      if (! argc) { cerr << "bad args: '-n' needs an argument\n"; exit(1); }
      sentenceno = lexical_cast<int>(*argv);

    } else if (arg == "-id") {

      argv++, argc--;
      if (! argc) { cerr << "bad args: '-id' needs an argument\n"; exit(1); }
      if (sscanf(*argv, "%d:%d:%d", & sentenceno, & qno, & syntno) != 3) {
        cerr << "bad argument for id\n";
        exit(1);
      }

    } else if (arg == "-synt") {

      argv++, argc--;
      if (! argc) { cerr << "bad args: '-synt' needs an argument\n"; exit(1); }
      syntname = *argv;

    } else if (arg == "-affix") {
      
      match_mode = MATCH_ALL;
    
    } else if (arg == "-prefix") {
      
      match_mode = MATCH_PREFIX;
    
    } else if (arg == "-whole") {
      
      match_mode = MATCH_WHOLE;
    
    } else if (arg == "-fs") {
    
      DUMP_FS = true;

    } else if (arg == "-w") {
    
      SHOW_WEIGHT = true;

    } else if (arg == "-form") {
    
      OUTPUT_MODE = SHOW_FORM;

    } else if (arg == "-pos") {
    
      OUTPUT_MODE = SHOW_POS;

    } else if (arg == "-tags") {
    
      OUTPUT_MODE = SHOW_ALL;

    } else {
      chartpath = fs::path(arg, fs::native);
    }
    argv++, argc--;
  }

  if (lingdefpath.empty() || chartpath.empty() || syntname.empty()) {
    bad_args();
    exit(1);
  }


  ling_def lingdef(lingdefpath);

  unification_init(& lingdef);

  xml_itext_uchart ichart(chartpath, & lingdef);

  uchart chart;

  int nbmatch = 0;

  if (syntno != -1) {
  
    ichart.seek(sentenceno);

    ichart >> chart;

    if (chart.size() < qno) {
      cerr << "sentence " << sentenceno << " too small (size=" << qno << ")\n";
    }
    dump_synt(chart, qno, syntno, cout);

  } else if (sentenceno != -1) { // proceed only one sentence
  
    ichart.seek(sentenceno);
  
    ichart >> chart;
  
    cout << "sentence #" << sentenceno << "\n"
      << chart.fsa.text << "\n\n";

    nbmatch = dump_synts(chart, syntname, cout);
  
    cout << nbmatch << " match(es).\n";

  } else {

    sentenceno = 0;
    while (ichart >> chart) {

      cout << "sentence #" << sentenceno << "\n"
        << chart.fsa.text << "\n\n";

      nbmatch += dump_synts(chart, syntname, cout);
      sentenceno++;
    }
    cout << nbmatch << " match(es).\n";
  }


  return 0;

} catch (exception & e) {

  cerr << "fatal error: " << e.what() << endl;
  exit(1);

} catch (...) { cerr << "ouch!\n"; exit(1); }


