#include <string>
#include <outilex/token.h>

#include <outilex/xml.h>

using namespace std;

char * token::case_names[] = { "min", "upper", "capit", "other" };

void token::read_XML(xmlNodePtr node) {

  char * txt = xmlGetProp(node, "type");

  if (! txt) { throw xml_parse_error("invalid xml : no type attribute in token"); }

  if (xmlStrcmp(txt, "word") == 0) {

    type = word;
  
  } else if ((xmlStrcmp(txt, "punctuation") == 0) || (xmlStrcmp(txt, "symbol") == 0)
             || xmlStrcmp(txt, "unknown") == 0) {
  
    if (text[0] == 'u') {
      cerr << "warning: token with 'unknown' type?\n";
    }

    type = punctuation;
  
  } else if (xmlStrcmp(txt, "numeric") == 0) {

    type = numeric;

  } else if (xmlStrcmp(txt, "entity") == 0) {

    cerr << "warning: entity\n";
    type = punctuation;

  } else {

    throw xml_parse_error("invalid xml: invalid token type : " + string(txt));
  }

  xmlFree(txt);

  if (type == word) {
 
    txt = xmlGetProp(node, "case");

    if (txt) {
      
      if (xmlStrcmp(txt, "upper") == 0) {
        case_ = upper;
      
      } else if (xmlStrcmp(txt, "capit") == 0) {
        
        case_ = capit;
      
      } else {
      
        case_ = other_case;
      }
      xmlFree(txt);
    
    } else { case_ = min; }
  }

  txt = (char *) xmlNodeGetContent(node);
  text = txt;
  xmlFree(txt);
}

