
#include <map>
#include <list>
#include <string>
#include <iostream>
using namespace std;

#include "DocFilters.h"
#include "document.h"

/****************************************************/
/* Generation of output and substitution of subflow */
/****************************************************/

static map<string,string> NSubFlowId; // native subflow
static map<string,string> RSubFlowId; // raw subflow

static void WriteRes(string &res,const string &text,const string current_subflowid)
{
  if (current_subflowid.length()) {
    RSubFlowId[current_subflowid]+=text;
    NSubFlowId[current_subflowid]+=text;
  }
  else res+=text;
}

static void WriteResTag(string &res,string tag,const string current_subflowid)
{

  if (current_subflowid.length()) {
    NSubFlowId[current_subflowid]+=tag;
    return;
  }
  unsigned int p;
  while((p=tag.find("$sfi_",0))!=string::npos) {
    bool raw=(tag.length()>p+5 && tag[p+5]=='r');
    unsigned int q;
    for(q=p+7;q<tag.length() && tag[q]>='0' && tag[q]<='9';q++);
    string key=tag.substr(p+1,q-p-1);
    tag.replace(p,q-p,raw?RSubFlowId[key]:NSubFlowId[key]);
  }
  res+=tag;
}

/****************************************************/
string ScriptStyleMetaCharsetTag(const string &charset) {
  string s =
    "\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset="
    + charset  + "\">\n";
 
  return s;
} 


static string htmlize(string ostring) {
  /* Protect '&' litterals */	
  string::size_type k=0;
  while((k=ostring.find("&",k))!=string::npos)
    { ostring.replace(k,1,"&amp;");k++; }
  /* Protect '<' litterals */	
  k=0;
  while((k=ostring.find("<",k))!=string::npos)
    ostring.replace(k,1,"&lt;");
  /* Protect '>' litterals */	
  k=0;
  while((k=ostring.find(">",k))!=string::npos)
    ostring.replace(k,1,"&gt;");	
  return ostring;
}

static int tuid;

static int parseParTu (xmlDocPtr doc,
		       xmlNodePtr par,
		       string &res,
		       string &pending_ws,
		       string current_subflowid,
		       bool notypeset,
		       long nb_par, 
		       long par_num) {
  if (!xmlStrcmp(par->name,(const xmlChar*)"par")) {
    if (xmlHasProp(par,(const xmlChar *)"subflowid")) {
      xmlChar *sfi=xmlGetProp(par,(const xmlChar*)"subflowid");
      current_subflowid=(char*)sfi;
      xmlFree(sfi);
    }
    if (xmlHasProp(par,(const xmlChar *)"notypeset")) notypeset=true;
  }
  
  xmlNodePtr cur = par->children;
  while (cur != NULL) {
    // Only generate xml_element_node/"par"/"tu", and xml_text_node 

    if (cur->type==XML_ELEMENT_NODE && 
	(!xmlStrcmp(cur->name, (const xmlChar *)"tu")) || 
	 !xmlStrcmp(cur->name, (const xmlChar *)"tu_group") ||
	 !xmlStrcmp(cur->name, (const xmlChar *)"par") || 
	 !xmlStrcmp(cur->name, (const xmlChar *)"token")) {
      /* pseudo recursivity - to handle par, par/tu, and par/tu/token */
      if ((xmlStrcmp(par->name,(xmlChar*)"tu_group") && !xmlStrcmp(cur->name,(xmlChar*)"tu")) || !xmlStrcmp(cur->name,(xmlChar*)"tu_group")) {
	xmlChar *sid=xmlGetProp(cur,(xmlChar *)"id");
	if (sid && *sid) tuid=atol((char*)sid+1); /* sid="s\d+" */
	xmlFree(sid);
      }

      if (!parseParTu(doc,cur,res,pending_ws,current_subflowid,notypeset,nb_par,par_num)) return 0;
    }
    else if (cur->type==XML_ELEMENT_NODE && (!xmlStrcmp(cur->name, (const xmlChar *)"tag"))){
      xmlChar *restrict=xmlGetProp(cur,(const xmlChar*)"restricted");
      if(!restrict || !xmlStrcmp(restrict,(const xmlChar*)"target")) {
        xmlChar *s=xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
	if (s) {
	  string tag=(char *)s;
	  xmlFree(s);
	  if(!notypeset) {
	  } else {
	    WriteRes(res,pending_ws,current_subflowid);
	    pending_ws="";
	  }
	  WriteResTag(res,tag.c_str(),current_subflowid);
	}
      }
      if(restrict)
	xmlFree(restrict);
    }  
    else if (cur->type==XML_TEXT_NODE) {
      	xmlChar *s;
	if (!xmlStrcmp(par->name,(const xmlChar *)"token") &&
	    xmlHasProp(par,(xmlChar *)"source"))
          s=xmlGetProp(par,(const xmlChar *)"source");
  	else
          s=xmlNodeGetContent(cur);
	string ostring;
	ostring=htmlize((const char *)s);

	string::size_type last_not_ws=ostring.find_last_not_of(" \r\t\n");
	if(last_not_ws!=string::npos) {
	  WriteRes(res,pending_ws,current_subflowid);
	  pending_ws="";
	  WriteRes(res,ostring.substr(0,last_not_ws+1),current_subflowid);
	  pending_ws=ostring.substr(last_not_ws+1);
	} else
	  pending_ws+=ostring;
	
	xmlFree(s);
      }    
    cur = cur->next;
  }

  return 1;
}

/*class HTMLCharsetConvertor : public CharsetConvertor {
 protected:
  int IncorrectCharacter(const string &charset,const char *character,string &conversion);

 public:
  HTMLCharsetConvertor(const string &s,const string &t) : CharsetConvertor(s,t) {}
};

int HTMLCharsetConvertor::IncorrectCharacter(const string &charset,const char *character,string &conversion)
{
  if(charset!="UTF-8")
    return 1;

  unsigned unicode=utf82unicode(character);

  if(!unicode)
    return 1;

  char code[10];

  sprintf(code,"&#x%x;",unicode);

  conversion=code;

  return utf8charlen(character);
}*/

static int
parseDoc(xmlDocPtr doc,string &res) {
  xmlNodePtr cur = xmlDocGetRootElement(doc);

  RSubFlowId.clear();
  NSubFlowId.clear();
  
  if (cur == NULL) {
    fprintf(stderr,"ERROR_PARSING_XML\n");
    return 0;
  }
  
  if (xmlStrcmp(cur->name, (const xmlChar *) "document")) {
    fprintf(stderr,"ERROR_PARSING_XML\n");
    return 0;
  }

  xmlNodePtr oldcur=0;
 
  string pending_ws;
  long nb_par=0;
  if(doc && doc->children && doc->children->children)
    for(xmlNodePtr bla=doc->children->children;bla;bla=bla->next)
      if(bla->type==XML_ELEMENT_NODE && !xmlStrcmp(bla->name,(const xmlChar*)"par"))
	nb_par++;
  if (!parseParTu(doc,cur,res,pending_ws,"",false,nb_par,1)) return 0;
  
  if(oldcur) {
    xmlReplaceNode(cur,oldcur); 
    xmlFreeNode(cur);
  }

  return 1;
}

string PostFilterHTML(DocumentPtr D)
{
  // Will need later to get tuid from id property in (trans?tu)
  tuid=1;
  documentHTML Doc(NULL);
  pCurrentDocH=&Doc;

  string res;
  if (!parseDoc (D->xmltree,res)) return "";

  /* Insert script, style, meta charset tag */
  {
    const char *target_charset="UTF-8";

    string::size_type k;
      
    if((k=res.find("<head"))!=string::npos &&
       (k=res.find(">",k))!=string::npos)
      k=k+1;
    else if((k=res.find("<body"))!=string::npos)
      ;
    else if((k=res.find("<html"))!=string::npos && 
	    (k=res.find(">",k))!=string::npos)
      k=k+1;
    else
      k=0;
      
    res.insert(k,ScriptStyleMetaCharsetTag(target_charset));
  }

  return res;
}
