#ifndef _TOKEN_H_
#define _TOKEN_H_

#include <libxml/xmlmemory.h>
#include <libxml/parser.h>

#include <string>
#include <list>

#include "Unicode.h"
#include "UnicodeData.h"

#include "Case.h"

extern char *type_name[];
extern char *gc_name[];
extern char *e_name[];

enum tokentype {
  _undef,
  _separator,
  _numeric,
  _punctuation,
  _symbol,
  _entity,
  _word,
  _tag,
  _unknown,
};

// ---- class token -----------------------------------------------------------

class token {
  friend int operator==(const token&, const token&);
 public:
  token();
  token(const std::string &, const std::string &);
  virtual ~token() {}

  inline static void InitId(int i) { currentid=i; }
  inline void SetId() { id=currentid; currentid++; }
  inline void SetId(int i) { id=i; }
  inline int GetId() { return id; }
  inline tokentype GetType() { return type; }
  inline std::string &GetNorm()   { return norm; }
  inline unsigned int length() { return norm.length(); }
  inline _case GetCase() { return wordcase; }
  inline const char *c_str() { return norm.c_str(); }
  inline int operator==(tokentype t) { return type==t; }
  inline int operator!=(tokentype t) { return type!=t; }
  inline int operator==(const char *s) { return norm==s; }
  inline int operator!=(const char *s) { return norm!=s; }
  void SetTagSep(std::list<xmlNodePtr>);
  std::list<xmlNodePtr> &GetTagSep();
  xmlNodePtr CreateGenericXmlNode();
  virtual xmlNodePtr CreateXmlNode();
  virtual std::string GetPos();
  virtual void SetPos(std::string);
  virtual void SetAttribute(xmlNodePtr);
  void RefreshXmlContent();

  static int currentid;
  int id;
  
  static int translation_currentid;
  int translation_id;

  bool hasbeennormalized:1;
  bool hasbeenfoundinindex:1;

  std::string src;
  std::string norm;
  std::string idx;
  tokentype type;
  std::string lang;
  _case wordcase;
  short weight; // gives for how many original tokens this token stands for 
  short wordweight; // gives for how many original word tokens this token is
  xmlNodePtr refXml;
  std::list<xmlNodePtr> tagsep;
  xmlNodePtr ts;
  bool translate;
  bool no_entity_misc; /* Do not include this token in a larger entity in misc entity rules */
  bool no_entity;      /* Do not include at all this token in a larger entity */
};

// ---- class token_separator -------------------------------------------------

class token_separator: public token {

 public:
  token_separator(const std::string&, const std::string&);
  token_separator(const std::string&, const std::string&, int);

  std::string dump();
  void SetAttribute(xmlNodePtr);
  xmlNodePtr CreateXmlNode();

 public:
  enum gc_subt {
    gc_undef=::gc_undef,
    gc_Zs=::gc_Zs, /* Separator, Space   -22 */
    gc_Zl=::gc_Zl, /* Separator, Line    -23 */
    gc_Zp=::gc_Zp, /* Separator, Paragraph -24 */
  };
  gc_subt subt;
};

// ---- class token_numeric ---------------------------------------------------

class token_numeric: public token {

 public:
  token_numeric(const std::string&, const std::string&);

  std::string dump();
  void SetAttribute(xmlNodePtr);
  xmlNodePtr CreateXmlNode();

 public:
};

// ---- class token_punctuation -----------------------------------------------

class token_punctuation: public token {

 public:
  token_punctuation(const std::string&, const std::string&);
  token_punctuation(const std::string&, const std::string&, int);

  std::string dump();
  void SetAttribute(xmlNodePtr);
  xmlNodePtr CreateXmlNode();

 public:
  enum gc_subt {
    gc_undef=::gc_undef,
    gc_Pc=::gc_Pc, /* Punctuation, Connector11*/
    gc_Pd=::gc_Pd, /* Punctuation, Dash    -12*/
    gc_Ps=::gc_Ps, /* Punctuation, Open    -13*/
    gc_Pe=::gc_Pe, /* Punctuation, Close   -14*/
    gc_Pi=::gc_Pi, /* Punctuation, Initial quote -15 */
    gc_Pf=::gc_Pf, /* Punctuation, Final quote -16 */
    gc_Po=::gc_Po, /* Punctuation, Other -17 */
  };
  gc_subt subt;
};

// ---- class token_symbol ----------------------------------------------------

class token_symbol: public token {

 public:
  token_symbol(const std::string&, const std::string&);
  token_symbol(const std::string&, const std::string&, int);

  std::string dump();
  void SetAttribute(xmlNodePtr);
  xmlNodePtr CreateXmlNode();

 public:
  enum gc_subt {
    gc_undef=::gc_undef,
    gc_Sm=::gc_Sm, /* Symbol, Math       -18 */
    gc_Sc=::gc_Sc, /* Symbol, Currency   -19 */
    gc_Sk=::gc_Sk, /* Symbol, Modifier   -20 */
    gc_So=::gc_So, /* Symbol, Other      -21 */
  };
  gc_subt subt;
};

// ---- class token_entity ----------------------------------------------------

class token_entity: public token {
 public:
  token_entity(const std::string&, const std::string&);
  token_entity(const std::string&, const std::string&, int);
  
  token_entity(const token_entity &);
  const token_entity &operator=(const token_entity &);
  ~token_entity();

  std::string dump();
  void SetAttribute(xmlNodePtr);
  xmlNodePtr CreateXmlNode();

  enum entity_subt {
    e_undef,
    e_email,
    e_acronym,
    e_uri,
    e_date,
    e_numeric,
    e_propernoun,
    e_address,
    e_ip,
  };
  
  static const char *entity_subt_name[];

 public:
  entity_subt subt;
};

// ---- class token_word ------------------------------------------------------

class token_word: public token {

 public:
  token_word(const std::string&, const std::string&, int=f_na);
  token_word(const std::string&, const std::string&, const std::string&, int=f_na);

  std::string dump();
  std::string GetPos();
  void SetPos(std::string);
  void SetAttribute(xmlNodePtr);
  xmlNodePtr CreateXmlNode();

 public:
  f_val alphabet;
  std::string pos;
};

// ---- class token_tag -------------------------------------------------------

class token_tag: public token {

 public:
  token_tag();

  std::string dump();
  void SetAttribute(xmlNodePtr);
  xmlNodePtr CreateXmlNode();

 public:
  bool hasspace;
};

// ---- class token_unknown ---------------------------------------------------

class token_unknown: public token {

 public:
  token_unknown(const std::string&, const std::string&);

  std::string dump();
  void SetAttribute(xmlNodePtr);
  xmlNodePtr CreateXmlNode();

 public:
};

// ----------------------------------------------------------------------------

token *CreateToken(xmlNodePtr);
token *newtoken(const std::string&, const std::string&, tokentype);
token *newtoken(const std::string&, const std::string&, const std::string&, tokentype);
token *newtoken(const std::string&, xmlNodePtr);
token *copyToken(const token &t);

#define tok_isword(t) ((t) && (*t)==_word) 
#define tok_ispunct(t) ((t) && (*t)==_punctuation) 
#define tok_isentity(t) ((t) && (*t)==_entity) 
#define tok_isconnector(t) ((t) && (*t)==_punctuation && ((token_punctuation*)(t))->subt==token_punctuation::gc_Pc)
#define tok_isnum(t) ((t) && (*t)==_numeric)
#define tok_endwith(t,s) ((t) && (t->GetNorm().length()>=strlen(s)) && strcmp(t->GetNorm().c_str()+t->GetNorm().length()-strlen(s),s)==0)
#define tok_eq(t,s) ((t) && (*t)==s)
#define tok_in(t,S) ((t) && strstr(S,(*t).c_str()))

#define tok_islower(t) ((t) && (*t)==_word && t->GetCase()==_clower)
#define tok_iscapitalized(t) ((t) && (*t)==_word && t->GetCase()==_ccapit)

#endif //_TOKEN_H_
