#include <iostream>
#include <string>
#include <set>

#include <outilex/xml.h>
#include <outilex/stringtok.h>
#include <outilex/feat_set.h>

#include <outilex/lingdef.h>

using namespace std;


set<string> unknow_attributes;

pos_def * const pos_unspec = (pos_def *) -1;



void attr_def::read_XML(xmlNodePtr node, ling_def * lingdef) {

  char * text = xmlGetProp(node, "name");
  name = text;
  xmlFree(text);

  text = xmlGetProp(node, "type");

  type = lingdef->get_attr_type(text);

  if (type == 0) {
    throw xml_parse_error("in attribute " + name + ": unknow attribute type '" + text + "'");
  }
  xmlFree(text);

  text = xmlGetProp(node, "default");

  if (text) {

    def_value = type->get_value(text);
    if (def_value == -1) {
      throw xml_parse_error("in attribute " + name +
                            ": unknow default value '" + text + "'");
    }
    xmlFree(text);

  } else { def_value = -1; } // -1 is for unspec

  text = xmlGetProp(node, "shortcut");

  if (text && xmlStrcmp(text, "yes") == 0) {
    shortcut = true;
  } else { shortcut = false; }
  xmlFree(text);
}


void attr_def::register_shortcut_values(pos_def * pos) {
  type->register_shortcut_values(this, pos);
}


pos_def::pos_def(xmlNodePtr node, ling_def * ldef)
  : name(), lingdef(ldef), attrs(), attr_by_name() {
  read_XML(node);
}

pos_def::~pos_def() {

  // free attributes definitions
  for (int i = 0; i < attrs.size(); ++i) { delete attrs[i]; }
}


void pos_def::register_shortcut(const string & shortcut, attr_def * attr, int v) {

  if (shortcut_values.find(shortcut) != shortcut_values.end()) {
    throw xml_parse_error("in POS " + name + " : shortcut '" + shortcut + "' already present\n");
  }

  shortcut_values[shortcut] = make_pair(attr, v);
}


bool pos_def::get_featval(const std::string & text, int & idx, int & val) const {

  string::size_type eq = text.find('=');

  attr_def * attr;

  if (eq == string::npos) {
    if (! find_shortcut_value(text, attr, val)) {
      return false;
    }
    if (val == -1) { return false; }
    idx = get_attr_idx(attr->get_name());
    return true;
  }

  // assume text is of the form +'attrname'='attrvalue'
 
  assert(text[0] == '+');
  attr = get_attr(text.substr(1, eq - 1));

  if (attr == NULL) { return false; }
  val = attr->get_value(text.substr(eq + 1));

  idx = get_attr_idx(attr->get_name());
  return true;
}


bool pos_def::get_feat_set(const std::string & text, attr_def * & attr, feat_set & fs) const {

  string::size_type eq = text.find('=');

  if (eq == string::npos) {
    int val;
    if (! find_shortcut_value(text, attr, val)) {
#if 0
      string unk = name + text;
      if (unknow_attributes.find(unk) == unknow_attributes.end()) {
        cerr << "pos::get_feat_set: in POS '" << name << "': unknown feat '" << text << "'\n";
        unknow_attributes.insert(unk);
      }
#endif
      return false;
    }
    fs.set(attr->get_type(), val);
    return true;
  }

  // assume text is of the form +'attrname'='attrvalue'
 
  assert(text[0] == '+');
  attr = get_attr(text.substr(1, eq - 1));

  if (attr == NULL) { 
    cerr << "pos::get_feat_set : in POS " << name << ": in feat " << text <<
      ": unknown attr " << text.substr(1, eq - 1) << "\n";
    return false; 
  }
  fs.set(attr->get_type(), text.substr(eq + 1));

  return true;
}


void pos_def::read_XML(xmlNodePtr node) {

  char * text = xmlGetProp(node, "name");
  name = text;
  xmlFree(text);

  lingdef->register_pos(this, name);

  if ((text = xmlGetProp(node, "alias")) == NULL) { text = xmlGetProp(node, "cutename"); }

  if (text) {
    vector<string> cutenames;
    stringtok(text, ",", back_inserter(cutenames));
    for (vector<string>::iterator it = cutenames.begin(); it != cutenames.end(); ++it) {
      lingdef->register_pos(this, *it);
    }
    xmlFree(text);
  }


  node = node->children;

  while (node) {

    if (xmlStrcmp(node->name, "attribute") == 0) {

      text = xmlGetProp(node, "name");
 
      if (attr_by_name.find(text) != attr_by_name.end()) {
        throw xml_parse_error("in " + name + " POS definition: too much '"
                              + text + "' attribute definitions");
      }
      xmlFree(text);

      attr_def * attr = new attr_def(node, lingdef);

      int idx = attrs.size();
      attrs.push_back(attr);
      attr_by_name[attr->get_name()] = idx;


      /* register attribute aliases names ... */

      text = xmlGetProp(node, "alias");

      if (text) {
        vector<string> aliases;
        stringtok(text, ",", back_inserter(aliases));
        for (vector<string>::iterator it = aliases.begin(); it != aliases.end(); ++it) {
          if (attr_by_name.find(*it) != attr_by_name.end()) {
            throw xml_parse_error("in " + name + " POS definition: too much "
                                  + *it + " attr definition");
          }
          attr_by_name[*it] = idx;
        }
        xmlFree(text);
      }


      /* register shortcut set values */

      string v = '+' + attr->get_name();
      register_shortcut(v, attr, -1); // -1 means 'set' (all possible values except unset) 

      v[0] = '-';
      register_shortcut(v, attr, 0); // 0 means 'unset'

      if (attr->do_shortcut()) {
        attr->register_shortcut_values(this);
      }
    }

    node = node->next;
  }
}


int pos_def::get_attr_idx(const string & str) const {
  map<string, int>::const_iterator it = attr_by_name.find(str);
  if (it == attr_by_name.end()) { return -1; }
  return it->second;
}

attr_def * pos_def::get_attr(const string & str) const {

  int idx = get_attr_idx(str);

  return idx == -1 ? NULL : attrs[idx];
}


bool pos_def::is_shortcut_value(const string & text) const {
  return shortcut_values.find(text) != shortcut_values.end();
}


bool pos_def::find_shortcut_value(const string & text, attr_def * & attr, int & v) const {

  shortcut_map::const_iterator it = shortcut_values.find(text);

  if (it == shortcut_values.end()) { return false; }

  attr = (it->second).first;
  v = (it->second).second;
  return true;
}



/* syntagm_def */

attr_def * syntagm_def::get_dic_attr(const string & str) const {

  map<string, attr_def *>::const_iterator it = attrs.find(str);

  if (it == attrs.end()) { return NULL; }

  return it->second;
}


bool syntagm_def::get_feat_set(const std::string & text, attr_def * & attr, feat_set & fs) const {

  string::size_type eq = text.find('=');

  if (eq == string::npos) { return false; }

  // assume text is of the form +'attrname'='attrvalue'
 
  assert(text[0] == '+');
  attr = get_dic_attr(text.substr(1, eq - 1));

  if (attr == NULL) { return false; }
  fs.set(attr->get_type(), text.substr(eq + 1));

  return true;
}

syntagm_feat_type syntagm_def::get_feat_type(const string & attrname, attr_def * & attr) const {
  
  map<string, syntagm_feat_type>::const_iterator it = feats_type.find(attrname);

  if (it == feats_type.end()) { return STRING_FEAT_TYPE; }
  
  syntagm_feat_type type = it->second;
  if (type == DIC_FEAT_TYPE) {
    attr = get_dic_attr(attrname);
    assert(attr != NULL);
  }
  return type;
}


void ling_def::add_attr_type(attr_type * type) {

  if (attr_types.find(type->get_name()) != attr_types.end()) {
    throw xml_parse_error("ling_def: add attr_type: too much definition of '" + type->get_name()
                          + "' attribute type");
  }
  attr_types[type->get_name()] = type;
}



namespace {

char * casse_attribute_xml = 
"<attrtype name='casse' type='enum'>"
"  <value name='min'/>"
"  <value name='maj' alias='upper'/>"
"  <value name='cap' alias='capit'/>"
"  <value name='other'/>"
"</attrtype>";
 
char * lex_pos_xml = 
"<pos name='lex' cutename='LEX,MOT'>"
"  <attribute name='case' type='casse' shortcut='yes' default='min'/>"
"</pos>";
 
} // namespace ""


void ling_def::read_xml(xmlNodePtr node) {

  // create and register predefinite POSs

  pos_epsilon = new pos_def("empty", this);
  pos_punc    = new pos_def("punc", this);
  pos_number  = new pos_def("number", this);
  pos_unknow  = new pos_def("unknow", this);
  //pos_lex     = new pos_def("lex", this);

  register_pos(pos_epsilon);
  register_pos(pos_epsilon, "E");
  register_pos(pos_punc);
  register_pos(pos_punc, "PNC");
  register_pos(pos_number);
  register_pos(pos_number, "NB");
  register_pos(pos_number, "NUM");
  register_pos(pos_number, "numeric");
  register_pos(pos_number, "num");
  register_pos(pos_unknow);
  register_pos(pos_unknow, "?");
  register_pos(pos_unknow, "unknown");
  /*
  register_pos(pos_lex, "LEX");
  register_pos(pos_lex, "MOT");
  register_pos(pos_lex, "PRE");
  */

  /* always epsilon = 0, pnc = 1, nb = 2
   * ? = 3
   * POS at indexes 3 and more are word POS
   */

  if (xmlStrcmp(node->name, "lingdef")) {
    throw xml_parse_error("ling_def: wrong XML document type\n");
  }

  char * text = xmlGetProp(node, "name");
  name = text;
  xmlFree(text);


  /* load lex and cass description (in memory xml description) */
  {
    xmlDoc * memdoc = xmlParseDoc((xmlChar *) casse_attribute_xml);
    xmlNode * memnode = xmlDocGetRootElement(memdoc);

    assert(xmlStrcmp(memnode->name, "attrtype") == 0);

    char * aname = xmlGetProp(memnode, "name");
    char * type = xmlGetProp(memnode, "type");

    attr_type * atype = new_attr_type(type, memnode);

    if (atype == NULL) {
      throw xml_parse_error("ling_def: " + name + ": unable to load in memory casse attribute");
    }

    attr_types[aname] = atype;

    xmlFree(aname);
    xmlFree(type);

    xmlFreeDoc(memdoc);

  }
  
  {
    xmlDoc * memdoc = xmlParseDoc((xmlChar *) lex_pos_xml);
    xmlNode * memnode = xmlDocGetRootElement(memdoc);

    assert(xmlStrcmp(memnode->name, "pos") == 0);
    char * pname = xmlGetProp(memnode, "name");

    pos_def * pos = new pos_def(memnode, this);
    register_pos(pos, pname);

    pos_lex = pos;

    xmlFree(pname);
    xmlFreeDoc(memdoc);
  }

  /* parse XML file */

  node = node->children;

  while (node) {
  
    if (xmlStrcmp(node->name, "attrtype") == 0) {

      char * aname = xmlGetProp(node, "name");
      char * type  = xmlGetProp(node, "type");

      //  cerr << "got attrtype, name=" << aname << endl;

      if (strcmp(aname, "casse") == 0) {

        cerr << "warning: ignoring declaration of predefinite 'casse' attribute\n";

      } else {
      
        if (attr_types.find(aname) != attr_types.end()) {
          throw xml_parse_error("ling_def:" + name + ": too much definition of '" + aname
                                + "' attribute type");
        }

        attr_type * atype = new_attr_type(type, node); 

        if (atype == NULL) {
          throw xml_parse_error("ling_def: " + name + ": unable to load '" + aname
                                + "' attribute type (type='" + type + "')");
        }

        attr_types[aname] = atype;
      }

      xmlFree(aname);
      xmlFree(type);
 
    } else if (xmlStrcmp(node->name, "pos") == 0) {
 
      char * pname = xmlGetProp(node, "name");

      if (strcmp(pname, "lex") == 0) {
        
        cerr << "warning: ignoring definition of predefinite 'lex' pos\n";
      
      } else {
        
        if (name2pos.find(pname) != name2pos.end()) {
          throw xml_parse_error("ling_def: " + name + ": too much definitions for POS '"
                                + pname + "'");
        }

        pos_def * pos = new pos_def(node, this);
        register_pos(pos, pname);
      }

      xmlFree(pname);
    }

    node = node->next;
  }

  /*
  cerr << name << " lingdef: " << attr_types.size() << " attribute types and " << POSs.size()
    << " POS definitions loaded\n";
    */
}

ling_def::ling_def(xmlNodePtr node) {
  read_xml(node);
}



ling_def::ling_def(const boost::filesystem::path & path) {

  std::string fname(path.native_file_string());

  xmlDocPtr doc = xmlParseFile(fname.c_str());
  
  if (doc == NULL) {
    throw xml_parse_error("ling_def: document " + fname + " not parsed successfully");
  }

  xmlNodePtr node = xmlDocGetRootElement(doc);

  if (node == NULL) {
    xmlFreeDoc(doc);
    throw xml_parse_error("ling_def: document " + fname + " is empty");
  }

  read_xml(node);
  xmlFreeDoc(doc);
}

ling_def::~ling_def() {

  // free POSs definitions
  for (int i = 0; i < POSs.size(); ++i) { delete POSs[i]; }

  // free attribute types definitions
  for (attr_map::iterator it = attr_types.begin(); it != attr_types.end(); ++it) {
    delete it->second;
  }
}


attr_type * ling_def::get_attr_type(const string & type_name) {
  
  attr_map::iterator it = attr_types.find(type_name);

  if (it == attr_types.end()) { return NULL; }

  return (*it).second;
}

pos_def * ling_def::get_pos(const string & pos_name) {
  
  pos_map::iterator it = name2pos.find(pos_name);

  if (it == name2pos.end()) { return NULL; }

  return POSs[(*it).second];
}

void ling_def::register_pos(pos_def * pos, const string & name) {

  pos_map::iterator it = name2pos.find(pos->get_name());

  int idx;
  if (it == name2pos.end()) {
    idx = POSs.size();
    POSs.push_back(pos);
    name2pos[pos->get_name()] = idx;

  } else { idx = it->second; }

  it = name2pos.find(name);
  if (it != name2pos.end() && it->second != idx) {
    throw runtime_error("lingdef:: too much POS definition for '" + name + "'");
  }
  name2pos[name] = idx;
}


syntagm_def * ling_def::get_syntagm_def(const string & synt_name) {
  
  syntagm_map::iterator it = syntagms.find(synt_name);

  if (it == syntagms.end()) { return NULL; }

  return it->second;
}

syntagm_def * ling_def::add_syntagm_def(const string & syntname) {
  syntagm_def * res = get_syntagm_def(syntname);
  if (res != NULL) { return res; }
  res = new syntagm_def(syntname);
  syntagms[syntname] = res;
  return res;
}


#if 0
void ling_def::dump_XML(ostream & os) const {

  os << "<lingdef name='" << name << "'>";

  for (attr_map::const_iterator it = attr_types.begin(); it != attr_types.end(); ++it) {
    ((*it).second)->dump_XML(os);
  }

  for (vector<pos_def *>::const_iterator it = POSs.begin(); it != POSs.end(); ++it) {
    (*it)->dump_XML(os);
  }

  os << "</lingdef>";
}
#endif

