#ifndef _LINGUISTICDEFINITION_LINGDEF_H_
#define _LINGUISTICDEFINITION_LINGDEF_H_

/** \mainpage
 * Module that supplies a data structure for storing and querying linguistic
 * features.
 *
 * The two main classes are LinguisticDefinition::LingDef and
 * LinguisticDefinition::LingFeatures. Their relationship is similar to that of
 * a class and an object in object oriented programming, one being the
 * definition for the possible states, and the other being a certain state.
 *
 * The class LinguisticDefinition::XmlLingDefFormatter may be used to transfer
 * the definition data from an XML document to a LinguisticDefinition::LingDef
 * object.
 *
 */

#include <string>
#include <vector>
#include <map>
#include <set>
#include <sstream>
#include <iostream>

//TODO: allow short names in addition to full names?
//TODO: Replace 'reference' with 'int value'/'string value'?
//TODO: Have subclasses for the types of features?
//TODO: make ordering of features deterministic (alphabetic?)
//TODO: error messages, validation (exceptions?)
//TODO: equals operators?

namespace LinguisticDefinition {

  /**
   * Represents a definition of linguistic features.
   *
   * These features can be of type
   * <dl>
   *  <dt>Boolean</dt><dd>A simple true or false</dd>
   *  <dt>Enumeration</dt><dd>A grouping of boolean features, for which only
   *                          one may be true</dd>
   *  <dt>Reference</dt><dd>A feature that has an integer value</dd>
   *  <dt>Virtual tree</dt><dd>A tree structure grouping of boolean
   *                           features</dd>
   * </dl>
   *
   * With the definitions contained in a LingDef object, instances of
   * LingFeatures objects are created.
   *
   * Using the information contained in the definition, features can be
   * referred to by simply using strings for their names.
   *
   * The features are grouped by parts-of-speech (pos). Each pos in the
   * definition class is represented by the class LingDef::Pos. In this class,
   * the available features are represented by the class LingDef::Feature.
   *
   * The scope of a feature name is the pos. The name of a boolean
   * feature may for example not be reused in an enumeration in the same pos,
   * or an inheriting pos. However, the feature 'active' may for example very
   * well be an enumeration for verbs, and a boolean for nouns.
   *
   * Example of how to set up the definition:
   *
   * <pre><code>
   *   LingDef::Pos &nounDef = lingDef.createPos("NOUN");
   *   {
   *     // Create enumeration 'number', with values 'singular' and 'plural'
   *     LingDef::Feature &numberDef =
   *       lingDef.createFeature("number",
   *                             LingDef::Feature::MISC,
   *                             LingDef::Feature::ENUM);
   *
   *     LingDef::Feature &singularDef =
   *       numberDef.createEnumValueFeature("singular");
   *
   *     LingDef::Feature &singularDef =
   *       numberDef.createEnumValueFeature("plural");
   *   }
   * </code></pre>
   *
   * Sub classes of LingDefFormatter may do this for you, using an external
   * source of some sort.
   *
   * Example of how to retrieve information in the definition:
   *
   * <pre><code>
   *   const LingDef::Pos *nounDef = lingDef.getPos("NOUN");
   *   const LingDef::Feature *pluralDef = nounDef->getFeature("plural");
   * </code></pre>
   *
   * This is however something that is mostly interesting for the class
   * LingFeatures.
   *
   * The poses allow inhertiance. For example, if we want a common super pos
   * for both nouns and pronouns:
   *
   * <pre><code>
   *   // Create a virtual base pos
   *   LingDef::Pos &superNounDef = lingDef.createVirtualPos();
   *
   *   // Create 'NOUN' as a sub pos
   *   LingDef::Pos &nounDef = superNounDef.createSubPos("NOUN");
   *
   *   // Create 'PRON' as a sub pos
   *   LingDef::Pos &pronounDef = superNounDef.createSubPos("PRON");
   *
   *   // Let both pronouns and nouns have the 'hello' boolean
   *   LingDef::Feature *pluralDef =
   *     superNounDef.createFeature("hello",
   *                                LingDef::Feature::MISC,
   *                                LingDef::Feature::BOOLEAN);
   * </code></pre>
   *
   */
  class LingDef {
  public:

    /**
     * @param isoLanguageCode The ISO 639-1 code for the language for which
     *                        this definition is intended. It may be used
     *                        during data collection to determine whether a
     *                        certain piece of information should be
     *                        taken into account given this language.
     */
    LingDef(const std::string &isoLanguageCode);

    /**
     *
     */
    ~LingDef();

    /**
     * Copy constructor. Note that features belonging to one LingDef object
     * may not be used with LingFeatures objects that are created with another
     * LingDef object.
     */
    LingDef(const LingDef &);

    /**
     *
     */
    LingDef &operator=(const LingDef &);

    /**
     * Returns the ISO 639-1 code for the language for which this definition is
     * intended.
     */
    const std::string &getIsoLanguageCode() const;

    class Pos;

    /**
     * Defines a hierarchical property structure.
     *
     * The most obvious use is in sematic features:
     *
     * <ul>
     *  <li>A human is concrete</li>
     *  <li>A dream is abstract</li>
     *  <li>A salesperson is a human</li>
     *  <li>A car is concrete</li>
     *  <li>A hearse is a car</li>
     * </ul>
     *
     * If <i>salesperson</i> is set, then querying <i>concrete</i> will return
     * true.
     *
     * The tree is a shared resource for the whole definition. To use a tree,
     * a feature of type virtual tree needs to be declared, with an identifier
     * referring to the tree in question. This way the trees can be easily
     * reused in different poses. The names used in the tree will then be
     * defined for the whole pos, and may consequently not be used in other
     * features.
     */
    class Tree {
      friend class LingDef;
    public:

      /**
       *
       */
      Tree(const std::string &name);

      /**
       * Copy constructor
       */
      Tree(const Tree &);

      /**
       *
       */
     ~Tree();

      /**
       * Returns the name by which this tree is identified
       */
      const std::string &getName() const;

      /**
       *
       */
      class Node {
	friend class Tree;
      private:

	/**
	 *
	 */
	Node(Tree &, const std::string &name);

      public:

	/**
	 *
	 */
	~Node();
	
	/**
	 *
	 */
	Node &createChildNode(const std::string &name);

	/**
	 * Returns a reference to the tree in which this node is defined.
	 */
	Tree &getParentTree() const;
	
	/**
	 *
	 */
	const std::string &getName() const;

	/**
	 * Returns whether a this node is an ancestor (parent or parent of
	 * parent) to a certain node. If this node is <i>concrete</i>, and the
	 * passed node is <i>human</i>, then true will be returned.
	 */
	bool isAncestorOf(const Node &) const;

      private:
	Tree *d_parentTree;
	std::string d_name;
      };

      /**
       *
       */
      typedef std::vector<const Node *> NodeList;
      typedef NodeList::const_iterator NodeIterator;

      /**
       *
       */
      NodeIterator nodesBegin() const;

      /**
       *
       */
      NodeIterator nodesEnd() const;

      /**
       * Returns the root node, which is nameless and invisible.
       */
      Node &getRootNode();
      const Node *getNode(const std::string &) const;

    private:
      std::string d_name;
      NodeList d_nodes;
      std::map<std::string, Node *> d_nodeNameMap;
      Node d_rootNode;

      std::multimap<const Node *, const Node *> d_ancestorsMap;
      std::set<std::pair<const Node *,
			 const Node *> > d_grandChildAncestorPairSet;
    };

    /**
     * Creates a tree with a given name and adds it to the definition.
     */
    Tree &createTree(const std::string &name);

    /**
     * Returns the tree with a certain name in the definition. If no tree
     * exists with that name, NULL is returned.
     */
    const Tree *getTree(const std::string &name) const;

    /**
     * Represents a feature in the definition.
     * A feature belongs to a pos.
     */
    class Feature {
      friend class Pos;
      friend class LingDef;
    public:

      /**
       *
       */
      ~Feature();

      /**
       *
       */
      typedef enum { SYNTAX, MORPHO, SEMANTIC, MISC } Domain;

      /**
       *
       */
      typedef enum { BOOLEAN, ENUM, REFERENCE, VTREE } Type;

      /**
       * Returns the name of this feature, such as 'plural' or 'active'
       */
      const std::string &getName() const;

      /**
       *
       */
      Domain getDomain() const;

      /**
       *
       */
      Type getType() const;

      /**
       * Returns a pointer to the definition that this feature belongs to.
       */
      const LingDef *getLingDef() const;

      /**
       * Returns a pointer to the pos definition that this feature belongs to.
       */
      const LingDef::Pos *getPosDef() const;

      /**
       * If this feature is an enumeration value ('plural'), then this will
       * return a pointer to the enumeration feature ('number'). Otherwise it
       * will return NULL.
       *
       * Enumeration values are of type boolean.
       */
      const Feature *getParentEnum() const;

      /**
       *
       */
      typedef std::set<Feature *> EnumChildrenSet;

      /**
       *
       */
      typedef EnumChildrenSet::const_iterator EnumChildrenIterator;

      /**
       * If this is an enumeration feature, this will give a start iterator to
       * the available enumeration value features.
       */
      EnumChildrenIterator enumChildrenBegin() const {
	return d_enumChildren.begin();
      }

      /**
       * If this is an enumeration feature, this will give an end iterator to
       * the available enumeration value features.
       */
      EnumChildrenIterator enumChildrenEnd() const {
	return d_enumChildren.end();
      }

      /**
       * If this is a virtual tree feature, then this will enable us to set
       * which tree to use. The nodes will be mapped to features.
       */
      void setTree(const Tree &);

      /**
       * If this is a virtual tree feature, then this will return a pointer
       * to the tree in question. If not, it will return NULL.
       */
      const Tree *getTree() const;

      /**
       * If this is a virtual tree node feature, then this will return a
       * pointer to the tree node in question. If not, it will return NULL.
       */
      const Tree::Node *getTreeNode() const;

      /**
       * Returns true if this feature is in the passed set. Used to allow
       * the "sloppyness" of the virtual tree.
       */
      bool isIn(const std::set<const LingDef::Feature *> &) const;

      /**
       * For enumeration and reference features, several values may optionally
       * be allowed.
       *
       * For a reference, this could for example be an attribute 'modifiedBy'
       * where several words modify this word.
       *
       * For an enumeration, it is less clear. It allows for some simple
       * ambiguity, for example if the case is always ambiguous for a certain
       * type of noun. For more complex ambiguities, the class LingFeaturesSet
       * is recommended.
       */
      void setAllowSeveralValues(bool);

      /**
       *
       */
      bool allowSeveralValues() const;

      /**
       * For enumerations, it is possible to forbid the undefined state, where
       * no enumeration value is selected. In that case, the enumeration value
       * that has is set to default will be active, or if there is no default,
       * the first value.
       */
      void setAllowNoValue(bool);

      /**
       *
       */
      bool allowNoValue() const;

      /**
       * Sets whether this feature should be set by default.
       */
      void setDefault(bool);

      /**
       *
       */
      bool isDefault() const;

      /**
       * A short name is an alternative name that may be used in some contexts.
       */
      void setShortName(const std::string &);

      /**
       *
       */
      const std::string &getShortName() const;

      /**
       * For enumeration features, this adds an enumeration value. The returned
       * reference is the enumeration value feature created.
       */
      Feature &createEnumValueFeature(const std::string &name);

      /**
       *
       */
      void addConflict(const Feature &);

    private:

      /**
       *
       */
      Feature(Pos &,
	      const std::string &name,
	      Domain domain,
	      Type type);

      /**
       * For enum values
       */
      Feature(Feature &parentEnum,
	      const std::string &name);

      /**
       *
       */
      void setDefaultSub(bool, bool);

    private:
      LingDef *d_lingDef;
      Pos *d_posDef;
      std::string d_name;
      Domain d_domain;
      Type d_type;
      std::string d_shortName;

      Feature *d_parentEnum;
      EnumChildrenSet d_enumChildren;

      bool d_allowSeveralValues;
      bool d_allowNoValue;

      bool d_isDefault;

      const Tree *d_tree;
      const Tree::Node *d_treeNode;
    };

    /**
     * Represents a part-of-speech (pos) in the definition.
     */
    class Pos {
      friend class Feature;
      friend class LingDef;
    public:

      /**
       *
       */
      ~Pos();

      /**
       *
       */
      Pos(const Pos &);

      /**
       *
       */
      Pos &operator=(const Pos &);

      /**
       * Returns the name of this pos, such as 'NOUN' or 'VERB'
       */
      const std::string &getName() const;

      /**
       * Returns a pointer to the definition that this feature belongs to.
       */
      const LingDef *getLingDef() const;

      /**
       * If this pos inherits another pos, this will return a pointer to that
       * pos. If not, it will return NULL.
       */
      const Pos *getSuperPos() const;

      /**
       * A short name is an alternative name that may be used in some contexts.
       */
      void setShortName(const std::string &);

      /**
       *
       */
      const std::string &getShortName() const;

      /**
       *
       */
      typedef enum { VIRTUAL, ELEMENT, SYNTAGM } Type;

      /**
       *
       */
      void setType(Type);

      /**
       *
       */
      Type getType() const;

      /**
       *
       */
      void setNote(const std::string &);

      /**
       * Returns a feature definition for this pos with a certain name. If
       * no feature of that name is available, NULL is returned.
       */
      const Feature *getFeature(const std::string &) const;

      /**
       * Creates a feature for this pos.
       */
      Feature &createFeature(const std::string &name,
			     Feature::Domain domain,
			     Feature::Type type);

      /**
       *
       */
      typedef std::set<const Feature *> FeatureList;

      /**
       *
       */
      typedef FeatureList::const_iterator FeatureIterator;

      /**
       * Returns a start iterator for all the features for this pos.
       */
      FeatureIterator featuresBegin() const;

      /**
       * Returns an end iterator for all the features for this pos.
       */
      FeatureIterator featuresEnd() const;

      /**
       * Returns a start iterator for the features to set by default for this
       * pos.
       */
      FeatureIterator defaultFeaturesBegin() const;

      /**
       * Returns an end iterator for the features to set by default for this
       * pos.
       */
      FeatureIterator defaultFeaturesEnd() const;

      /**
       * Creates an inherited pos to this pos.
       */
      Pos &createSubPos(const std::string &name);

      /**
       * Creates a virtual inherited pos to this pos.
       */
      Pos &createVirtualSubPos();

    private:

      /**
       *
       */
      Pos(LingDef &lingDef, const std::string &name);

      /**
       *
       */
      Pos(Pos &superPos, const std::string &name);

      /**
       *
       */
      void addFeature(Feature *feature);

    private:
      LingDef *d_lingDef;
      const Pos *d_superPos;
      std::string d_name;
      std::string d_shortName;
      Type d_type;
      std::string d_note;

      std::vector<const Pos *> d_subPoses;

      std::set<const Feature *> d_featureDefs;
      std::map<std::string, const Feature *> d_featureNameMap;
      std::map<std::string, const Feature *> d_featureShortNameMap;
      std::set<const Feature *> d_defaultFeatures;
    };

    /**
     * Creates a pos for this definition.
     */
    Pos &createPos(const std::string &name);

    /**
     * Creates a virtual pos for this definition. A virtual pos is a pos
     * that is not directly referenceable by it name. It does not even have
     * a name.
     */
    Pos &createVirtualPos();

    /**
     * Returns a pos definition for this definition with a certain name. If
     * no pos of that name is available, NULL is returned.
     */
    const Pos *getPos(const std::string &id) const;

    /**
     *
     */
    typedef std::map<const Feature *, Pos::FeatureList *> ConflictMap;

    /**
     *
     */
    void addConflict(const Feature &featureA, const Feature &featureB);

    /**
     * May be NULL
     */
    const Pos::FeatureList *getConflicts(const Feature &) const;

  private:

    /**
     *
     */
    void addPos(Pos *pos);

    /**
     *
     */
    void addTree(Tree *);

  private:
    std::string d_isoLanguageCode;

    std::vector<Pos *> d_posDefs;
    std::map<std::string, Pos *> d_posNameMap;
    std::map<std::string, Pos *> d_posShortNameMap;

    std::vector<Tree *> d_treeDefs;
    std::map<std::string, Tree *> d_treeNameMap;

    ConflictMap d_conflicts;
  };

}

#endif //_LINGUISTICDEFINITION_LINGDEF_H_
