#include "dico.h"
#include "text.h"
#include "tab_u.h"
#include "auto2txt.h"
#include "fst2.h"
#include "utilities.h"
#include "applications.h"
#include <stdlib.h>

void dico_print_config(Dico_config config);

int dico_n_free = 0;
int dico_n_malloc = 0;

void* dico_malloc(int n){
  dico_n_malloc++;
  return (void *)malloc(n);
}

void dico_pfree(void *ptr){
  if(ptr == NULL) return;
  dico_n_free++;
  free(ptr);
  ptr = NULL;
}

int dico_print_n_malloc(){
  printf("dico_malloc=%d\n",dico_n_malloc);
  return dico_n_malloc;
}

int dico_print_n_free(){
  printf("dico_free=%d\n",dico_n_free);
  return dico_n_free;
}

Dico_config dico_create_config(){
  Dico_config config = (Dico_config)dico_malloc(sizeof(struct dico_config));
  config->categories = lex_init();  
  config->cat_features = NULL;
  config->default_features = NULL;
  config->hash_cat = hash_init(2028);
  return config;
}

void dico_init_config_line(Dico_config *config,Ustring line,Tokens *tok){
    Tab_u tab,tab2;
    int cat_index1,cat_index2,feature_index,val_index,default_index;
    Ustring u,feature,val,values,default_feat;
    int i,j,l1,l2;

    tab = ustring_split((Uchar)';',line);
    u = tab_u_get_value(tab,0);
    tok_add_element(tok,u,TOK_WITHOUT_FREQ,&cat_index1);
    cat_index2 = lex_add_element(&((*config)->categories),u);
    hash_add_element(&((*config)->hash_cat),cat_index1,cat_index2,0);
    l1 = tab_u_get_n_elements(tab);
    for(i = 1 ; i < l1 ; i++){
      u = tab_u_get_value(tab,i);
      tab2 = ustring_split((Uchar)'=',u);
      feature = tab_u_get_value(tab2,0);
      tok_add_element(tok,feature,TOK_WITHOUT_FREQ,&feature_index);
      values = tab_u_get_value(tab2,1);
      l2 = ustring_length(values);
      for(j = 0 ; j < l2 ; j++){
	val = NULL;
	ustring_append(&val,ustring_get_uchar(values,j));
	tok_add_element(tok,val,TOK_WITHOUT_FREQ,&val_index);
	tab_l_insert_value(&((*config)->cat_features),cat_index2,feature_index,val_index);
	ustring_free(val);
      }
      if((default_feat = tab_u_get_value(tab2,2))!= NULL){
	tok_add_element(tok,default_feat,TOK_WITHOUT_FREQ,&default_index);
	tab_l_insert_value(&((*config)->default_features),cat_index2,feature_index,default_index);
      }
      tab_u_free(&tab2);	
    }
    tab_u_free(&tab);
}



Dico_config dico_init_config(char *config_name,Tokens* tokens){
  Dico_config dconf = NULL;
  Ustring u;
  FILE *f = fopen(config_name,"r");

  if(f == NULL) util_error("dico_init_config","cannot open config file");
  u = ustring_get_line(f,ASCII_FORMAT);
  ustring_free(u);
  dconf = dico_create_config();
  while((u = ustring_get_line(f,ASCII_FORMAT))!= NULL){
    dico_init_config_line(&dconf,u,tokens);    
    //ustring_println(u);
    ustring_free(u);
  }
  fclose(f);
  return dconf;
}

Dico dico_init(char *config_name,Tokens* tokens){
  Dico dic = (Dico)dico_malloc(sizeof(struct dico));
  dic->config = dico_init_config(config_name,tokens);
  //  dico_print_config(dic->config);
  dic->compound_lexicon = lex_init();
  dic->compound_features = NULL;
  dic->simple_features = NULL;
  return dic;
}

void dico_load_line(Dico *dic,Ustring line,Text *t,Tab_u *infos){
  Tab_u tab;
  Ustring split,word,root,info;

  split = ustring_from_string(",.");
  tab = ustring_split_in_order(split,line);
  if(tab != NULL){
      word = tab_u_get_value(tab,0);
      root = ustring_allocation(tab_u_get_value(tab,1));
      info = tab_u_get_value(tab,2);
      if(ustring_length(root) == 0){
	ustring_free(root);
	root = ustring_allocation(word);
      }
      txt_append_ustring(t,word);
      txt_append_element(t,(Uchar)';');
      txt_append_ustring(t,root);
      txt_append_element(t,(Uchar)';');
      tab_u_assign_value(infos,tab_u_get_n_elements(*infos),info);
      ustring_free(root);
    }
    tab_u_free(&tab);
    ustring_free(split);
}

Tab_u dico_tokenization(Text t,char *graph_name,char *output_config,Tokens *tokens){
  struct a2t_context context;
  struct a2t_result result;
  int fd[10];
  Grammar gram;
  int split;
  Ustring u;
  Tab_u res = NULL;

  gram = fst2_load_graph(graph_name,0,NULL);
  gram_change_morpho_terminals(&gram);//obligatory for morphological application TODO without
  a2t_init_context(&context,tokens,gram,1,t,0,APP_TOKENIZATION,fd,0,output_config,NULL,NULL);
  a2t_init_result(&result);
  a2t_apply_grammar(&context,&result);
  u = ustring_from_string(";");
  split = tok_exists(context.tokens,u);
  ustring_free(u);
  res = txt_split(split,result.new_text);
  tok_assign(tokens,context.tokens);
  a2t_free_context(&context,A2T_NOT_FREE_TOKENS);  
  a2t_free_result(&result,A2T_FREE_RESULT_TEXT);
  return res;
}

int dico_get_config_cat_index(Dico_config config,int cat){
  return hash_get_value(config->hash_cat,cat);
}


//get categories (e.g. cat=N) and sub-categories (e.g.subcat=Hum)

Llist dico_get_categories(Ustring info,int *cat_index,Tokens *tokens,int cat_key,int subcat_key){

  Llist l = NULL;
  Tab_u tab;
  int length,subcat_index,i;
  Ustring cat,subcat;

  tab = ustring_split((Uchar)'+',info);
  cat = tab_u_get_value(tab,0);
  tok_add_element(tokens,cat,TOK_WITHOUT_FREQ,cat_index);
  ll_insert_sorted(&l,cat_key,*cat_index,LL_KEYS);
  length = tab_u_get_n_elements(tab);
  for(i = 1 ; i < length ; i++){
    subcat = tab_u_get_value(tab,i);
    tok_add_element(tokens,subcat,TOK_WITHOUT_FREQ,&subcat_index);
    ll_insert_sorted(&l,subcat_key,subcat_index,LL_KEYS);
  }
  tab_u_free(&tab);
  return l;
}


// return 1 if something has been added in temp_list
//0 else

int dico_get_inflection_codes(Llist inflection_features,Llist *temp_list,
			      Ustring inflection,int *already_default,
			      Tokens *tokens,Llist default_features){
  
  int length = ustring_length(inflection);
  int nothing = 1,code_index,res = 0,i;
  Uchar c;
  Ustring temp;
  struct ll_cell *ptr;
  
  for(i = 0 ; i < length ; i++){
    temp = NULL;
    c = ustring_get_uchar(inflection,i);
    ustring_append(&temp,c);
    if((code_index = tok_exists(*tokens,temp))> -1 ){
      ptr = ll_find_element(inflection_features,code_index,LL_VALUES);
      if(ptr != NULL){
	ll_insert_sorted(temp_list,ptr->key,ptr->value,LL_KEYS);
	res = 1;
	nothing = 0;
      }
    }
    ustring_free(temp);
 }
  if(nothing && !(*already_default)){
    *already_default = 1;
    ll_add_list_sorted(temp_list,default_features);
    res = 1;
  }
  return res;
}


Llist_l dico_get_features(Tab_u infos,Dico_config config,int word_index,Tokens *tokens,
			  int root_key,int root_value,int cat_key,int subcat_key){

  Ustring info,inflection;
  int cat_index,length,i,dico_cat_index,already_default = 0,nothing;
  Llist_l ll = NULL;
  Llist list = NULL,temp_list = NULL,config_inflection_features = NULL;
  Tab_u tab;

  ll_insert_sorted(&temp_list,root_key,root_value,LL_KEYS);
  info = tab_u_get_value(infos,word_index);
  tab = ustring_split((Uchar)':',info);
  info = tab_u_get_value(tab,0);
  list = dico_get_categories(info,&cat_index,tokens,cat_key,subcat_key);
  ll_add_list_sorted(&list,temp_list);
  ll_free(&temp_list);
  dico_cat_index = dico_get_config_cat_index(config,cat_index);
  config_inflection_features = tab_l_get_list(config->cat_features,dico_cat_index);
  if(config_inflection_features != NULL){
    length = tab_u_get_n_elements(tab);
    already_default = 0;
    nothing = 1;
    for(i = 1 ; i < length ; i++){
      nothing = 0;
      temp_list = ll_copy(list);
      inflection = tab_u_get_value(tab,i);
      if(dico_get_inflection_codes(config_inflection_features,&temp_list,inflection,&already_default,
				   tokens,
				   tab_l_get_list(config->default_features,dico_cat_index))){
	ll_l_insert(&ll,temp_list);
	//	printf("%d\n",i);
      }
      else{
	ll_free(&temp_list);
      }
    }   
    if(nothing){
      ll_l_insert(&ll,ll_copy(list));
    }
  }
  else{ 
    ll_l_insert(&ll,ll_copy(list));
  }
  ll_free(&list);
  tab_u_free(&tab);
  return ll;
}

void dico_get_entries(Dico *dic,Tab_u entries,Tab_u infos,Tokens *tokens){
  int i,length,word_index,root_index,root_key,cat_key,subcat_key;
  Llist_l ll = NULL;
  Ustring word,root;
  Tab_ll features;
  int is_compound = 0;

  length = tab_u_get_n_elements(entries);
  tok_add_element_from_string(tokens,"cat",TOK_WITHOUT_FREQ,&cat_key);
  tok_add_element_from_string(tokens,"subcat",TOK_WITHOUT_FREQ,&subcat_key);
  tok_add_element_from_string(tokens,"lemma",TOK_WITHOUT_FREQ,&root_key);  

  for(i = 0 ; i < length ; i+=2){
    word = tab_u_get_value(entries,i);
    root = tab_u_get_value(entries,i + 1);
    switch(ustring_length(word)){
    case 0:
            util_error("dico_get_entries","word should not be null");
      break;
    case 1://simple word
      word_index = ustring_get_uchar(word,0);
      root_index = ustring_get_uchar(root,0);
      features = (*dic)->simple_features;
      is_compound = 0;
      break;
    default: //compound
      word_index = lex_add_element(&((*dic)->compound_lexicon),word);
      root_index = lex_add_element(&((*dic)->compound_lexicon),root);
      features = (*dic)->compound_features;
      is_compound = 1;
      break;
    }
    ll = dico_get_features(infos,(*dic)->config,i/2,tokens,root_key,root_index,cat_key,subcat_key);
    //    ll_l_print(ll);
    // printf("---\n");

    if(is_compound)tab_ll_insert_list(&((*dic)->compound_features),word_index,&ll);
    else tab_ll_insert_list(&((*dic)->simple_features),word_index,&ll);
  }
  //lex_print((*dic)->compound_lexicon);
  //      tab_ll_print((*dic)->simple_features);
  //  tok_print(*tokens);

}


void dico_load(Dico *dic,char *dico_name,int has_ascii_format,Tokens* tokens){
  FILE *f;
  Ustring u;
  Tab_u info_tab = NULL,entries = NULL;
  Text t = NULL;

  if(*dic == NULL){
    *dic = dico_init("dico_config",tokens);
  }
  
  //  dico_print_config((*dic)->config);
  //tok_print(*tokens);
  f = fopen(dico_name,"r");
  if(!has_ascii_format) ustring_read_uchar(f,has_ascii_format);//read unicode special character
  while((u = ustring_get_line(f,has_ascii_format)) != NULL){
    //    ustring_println(u);
    dico_load_line(dic,u,&t,&info_tab);
    ustring_free(u);    
  }
  entries = dico_tokenization(t,"/home/mconstant/tokenization.fst2","output_config",tokens);
  dico_get_entries(dic,entries,info_tab,tokens);

  tab_u_free(&info_tab);
  tab_u_free(&entries);
  fclose(f);
}

void dico_print_config(Dico_config config){
  lex_print(config->categories);
  tab_l_print(config->cat_features);
  tab_l_print(config->default_features);
}



void dico_free_config(Dico_config *config){
  lex_free(&((*config)->categories));
  tab_l_free(&((*config)->cat_features));
  tab_l_free(&((*config)->default_features));
  hash_free((*config)->hash_cat);
  dico_pfree(*config);
}


void dico_free(Dico *dic){
  if(*dic == NULL) return;
  dico_free_config(&((*dic)->config));
  lex_free(&((*dic)->compound_lexicon));
  tab_ll_free(&((*dic)->compound_features));
  tab_ll_free(&((*dic)->simple_features));
  dico_pfree(*dic);
}
