#ifndef _UNICODE_H_
#define _UNICODE_H_

#include "UnicodeData.h"

#include <stdlib.h>
#include <string>

#define UTF8 "utf-8"
#define utf8_initial(s) (!*(s) || ((unsigned char)(s)[0]>=0xc0 && (unsigned char)(s)[0]<=0xfd) || ((unsigned char)(s)[0]<=0x80))
#define forutf8init()   const char *forutf8s;int forutf8ls;
#define forutf8(s,utfs) for(forutf8s=utfs;(forutf8ls=utf8charlen(forutf8s)) && (strncpy(s,forutf8s,forutf8ls),s[forutf8ls]=0,1);forutf8s+=forutf8ls)

/* Check validity of a utf8 buffer */
int check_utf8(const char *t);

/* Conversion from unicode to utf8    */
const char* unicode2utf8(unsigned int uc);
const char* iso12utf8(char ic);

/* Conversion from utf8 to unicode    */
unsigned int utf82unicode(const char *s);

/* Find length of next utf8 character */
size_t utf8charlen(const char *s);

/* Find length of utf8 word */
size_t utf8len(const char *S);

/* Advance a set number of utf8 chars from the start of a string */
const char *utf8advance(const char *pszUtf8, size_t nChars);

/* Backup a set number of utf8 chars from the end of a string */
const char *utf8backup(const char *pszUtf8, size_t nChars);

/* Copy a single utf8 char w/terminating zero.  Returns utf8charlen of char copied */
size_t utf8copychar(const char *pszUtf8, char *pszCopy);

/* Find unidata description for uc    */
unirdata *findunidescrpt(unsigned int uc);
unirdata *findutfdescrpt(const char *utf8);

/* Get, set and remove combining marks */
const char *GetAccents(const char *utf8char);
const char* SetAccent(const char *utf8char, const char *utf8acc);
const char* RemoveAccent(const char *utf8char, const char *utf8acc);
  
/* Get first UTF8 decomposition character */
const char *GetFirstUTF8DecompChar(const char *);

/* Is a character in the decomposition of another? */
int HasUTF8DecompChar(const char *, const char *decomp);

int HasLetterInAlphabet(const char *s,f_val);

f_val GetAlphabet(const char *s);

int is_hiragana(char *s);
int is_katakana(char *s);
int is_hanji(char *s);
int is_hangul(char *s);

int ko_cons(char *utfa);
int ko_cons_l(char *utfa);
int ko_changeKorEnd(char *source,char *target);
int ko_conjugateString(char *utfa,char *utfb,char *utfc);
int ko_wd_equal_beg(const char *wpstr, const char *str);
int ko_wd_equal_end(const char *wpstr, const char *str);

const char *uGroup(unirdata *u);

std::string CheckUTF8(const std::string &s);

/* We used to use wchar_t, but we finally dropped it coz was not so portable
 */
typedef short swchar;

typedef std::basic_string<swchar> swstring;

swstring ToSWString(const char *);
swstring ToSWString(const std::string &);

const char * FromSWString(const swstring &);

/* Find length of utf8 word */
inline size_t utf8len(const std::string &s) { return utf8len(s.c_str()); }

std::string iso1toutf8(const std::string &s);

int smbtowc(swchar *pwc, const char *s, size_t n);
swchar sonembtowc(const char *s);
int swctomb(char *s, swchar wc);

int isLetter(const char *);

std::string LoosyUTF8convert(const std::string &s);
std::string ProtectFileName(const std::string &s);
std::string UnProtectFileName(const std::string &s);

namespace std {
  template<>
  struct char_traits<short>
  {
    typedef short           char_type;
    typedef wint_t            int_type;

    static void 
    assign(char_type& __c1, const char_type& __c2)
    { __c1 = __c2; }

    static bool 
    eq(const char_type& __c1, const char_type& __c2)
    { return __c1 == __c2; }

    static bool 
    lt(const char_type& __c1, const char_type& __c2)
    { return __c1 < __c2; }

    static int 
    compare(const char_type* __s1, const char_type* __s2, size_t __n)
    {
      size_t i = 0;
      while (i < __n) {
	if (__s1[i] < __s2[i]) {
	  return -1;
	}
	if (__s1[i] > __s2[i]) {
	  return 1;
	}
	if (__s1[i] == 0 && __s2[i] == 0) {
	  return 0;
	}
	i++;
      }
      return 0;
    }

    static size_t
    length(const char_type* __s)
    {
      size_t i = 0;
      while (__s[i] != 0) {
	i++;
      }
      return i;
    }

    static const char_type* 
    find(const char_type* __s, size_t __n, const char_type& __a)
    {
      for (size_t i = 0; i < __n && __s[i] != 0; i++) {
	if (__s[i] == __a) {
	  return __s + i;
	}
      }
      return 0;
    }

    static char_type* 
    move(char_type* __s1, const char_type* __s2, int_type __n)
    { return (char_type*)memmove(__s1, __s2, __n * sizeof(char_type)); }

    static char_type* 
    copy(char_type* __s1, const char_type* __s2, size_t __n)
    { return (char_type*)memcpy(__s1, __s2, __n * sizeof(char_type)); }

    static char_type* 
    assign(char_type* __s, size_t __n, char_type __a)
    { 
      for(size_t k=0;k<__n;k++) __s[k]=__a;
      return __s;
    }

    static char_type 
    to_char_type(const int_type& __c) { return char_type(__c); }

    static int_type 
    to_int_type(const char_type& __c) { return int_type(__c); }

    static bool 
    eq_int_type(const int_type& __c1, const int_type& __c2)
    { return __c1 == __c2; }

    static int_type 
    eof() { return static_cast<int_type>(WEOF); }

    static int_type 
    not_eof(const int_type& __c)
    { return eq_int_type(__c, eof()) ? 0 : __c; }
  };
}

#endif // _UNICODE_H_

