#include <string>
#include <iostream>

#include <cassert>

#include <outilex/lexical_entry.h>
#include <outilex/xml.h>
#include <outilex/lingdef.h>


using namespace std;


#if 0
syntagm_feat_type lexical_entry::get_feat(const string & attrname,
                                          attr_def * & attr, int & featval,
                                          string & stringval) const {

  if (attrname == "lemma") {
    stringval = lemma; return STRING_FEAT_TYPE;
  }

  if (attrname == "form") {
    stringval = form; return STRING_FEAT_TYPE;
  }

  int idx = pos->get_attr_idx(attrname);
  if (idx == -1) {
    return INVALID_FEAT_TYPE;
  }

  attr = pos->get_attr(idx);
  featval = feats[idx];
  return DIC_FEAT_TYPE;
}
#endif

void lexical_entry::write_XML(xmlwriter & writer) const {


  writer.start_element("lex_entry");

  writer.write_element("form", form);
  writer.write_element("lemma", lemma);

  writer.start_element("pos");
  writer.write_attribute("name", pos->get_name());
  writer.end_element();

  for (int i = 0; i < feats.size(); ++i) {
    attr_def * attr = pos->get_attr(i);
    writer.start_element("feat");
    writer.write_attribute("name", attr->get_name());
    writer.write_attribute("value", attr->get_value_name(feats[i]));
    writer.end_element();
  }
  writer.end_element();
}


void lexical_entry::read_XML(xmlNodePtr node, ling_def * lingdef) {

  clear();

  node = node->xmlChildrenNode;

  char * text;

  while (node) {
    
    if (xmlStrcmp(node->name, (const xmlChar *) "form") == 0) {
      
      text = (char *) xmlNodeGetContent(node);
      form = text;
      xmlFree(text);
    
    } else if (xmlStrcmp(node->name, (const xmlChar *) "lemma") == 0) {
      
      text = (char *) xmlNodeGetContent(node);
      lemma = text;
      xmlFree(text);
    
    } else if (xmlStrcmp(node->name, "pos") == 0) {
    
      text = xmlGetProp(node, "name");
      pos = lingdef->get_pos(text);
      if (pos == NULL) { throw xml_parse_error("lex_entry: unknow POS : '" + string(text) + "'"); }
      xmlFree(text);

      feats.resize(pos->nb_attrs());
 
      for (int i = 0; i < feats.size(); ++i) { 
        feats[i] = pos->get_attr(i)->get_default_value();
        /* if default value is 'unspec', we set it to 'unset'... */
        if (feats[i] == -1) { feats[i] = 0; } 
      }

    } else if (xmlStrcmp(node->name, "feat") == 0) {
      
      if (pos == NULL) { throw xml_parse_error("lex_entry: no POS\n"); }

      text = xmlGetProp(node, "name");
      int idx = pos->get_attr_idx(text);

      if (idx == -1) {
      
        if (unknow_attributes.find(text) == unknow_attributes.end()) {
          cerr << "lex_entry::read_XML: unknow attribute '" << text
            << "' in POS " << pos->get_name() << '\n';
          unknow_attributes.insert(text);
        }
        xmlFree(text);

      } else {

        char * val = xmlGetProp(node, "value");

        if (val == NULL) { throw xml_parse_error("lex_entry: feat with no value"); }

        int v = pos->get_attr(idx)->get_value(val);
 
        if (v == -1) {
          cerr << "warning: unknow value '" << val << "' for attribute '" << text << "'" 
            << " in POS '" << pos->get_name() << "'\n";
        } else {
          feats[idx] = v;
        }

        xmlFree(text);
        xmlFree(val);
      }
    }

    node = node->next;  
  }
}

void lexical_entry::dump_text(ostream & os) const {

  if (pos == pos->get_lingdef()->punc_pos() || pos == pos->get_lingdef()->number_pos()) {
    os << form; return;
  }

  os << '{' << form << ',' << lemma << '.' << pos->get_name();

  for (int i = 0; i < feats.size(); ++i) {
    attr_def * attr = pos->get_attr(i);
    if (feats[i] != attr->get_default_value()) { // output feat value when it's not the default value
      attr->dump_feat_val(feats[i], os);
    }
  }
  os << '}';
}

static void feats_tokenize(vector<string> & res, const string & text) {

  if (text.empty()) { return; }

  string::size_type len = text.size();
  string::size_type i = text.find_first_of("+-");

  /* push pos */
  if (i == string::npos) {
    res.push_back(text);
  } else {
    res.push_back(text.substr(0, i)); // push pos
  }
  
  while (i < len) {

    int j = text.find_first_of("+-", i + 1);

    if (j == string::npos) {
      res.push_back(text.substr(i));
      return;
    } else {
      res.push_back(text.substr(i, j - i));
    }
    i = j;
  }
}

void lexical_entry::read_text(const string & text, ling_def * lingdef) {

  int size = text.size();

  if (text.empty()) {
    throw runtime_error("lexentry::read_text: empty text");
  }
            
  if (text[0] == '{' && size > 1) { // dic entry

    assert(text[size - 1] == '}');
  
    string::size_type dot = text.find('.');
    string::size_type comma = text.find(',');

    if (dot == string::npos || comma > dot) {
      throw runtime_error("lexentry:read_text: bad text entry: " + text);
    }
  
    form = text.substr(1, comma - 1);
    lemma = text.substr(comma + 1, dot - (comma + 1));

    vector<string> vec;
    feats_tokenize(vec, text.substr(dot + 1, text.size() - 1 - (dot + 1)));
 
    if (vec.empty()) { throw runtime_error("entry:read_text: baad format: " + text); }

    pos = lingdef->get_pos(vec[0]);
    if (pos == NULL) { throw runtime_error("entry::read: unknow POS: " + vec[0]); }

    feats.resize(pos->nb_attrs());
  
    for (int i = 0; i < feats.size(); ++i) { feats[i] = pos->get_attr(i)->get_default_value(); }

    for (int i = 1; i < vec.size(); ++i) {
      int idx, val;
      pos->get_featval(vec[i], idx, val);
      if (val == -1) { throw runtime_error("entry::read: bad feat: " + vec[i] + "in"  + text); }
      feats[idx] = val;
    }
  }


}


