00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef Tanl_text_Utf8Utils_h
00025 #define Tanl_text_Utf8Utils_h
00026
00027
00028 #include <sys/types.h>
00029 #include <string.h>
00030
00031 #if defined(_WIN32)
00032 # include <tchar.h>
00033 #endif
00034
00035
00036 #include "text.h"
00037
00038 namespace Tanl {
00039 namespace Text {
00040
00044 namespace Unicode
00045 {
00046 typedef unsigned char byte;
00047
00048 void incUtfPtr(const char*& ptr, const char* end);
00049 void decUtfPtr(const char*& ptr, const char* begin);
00050 size_t utfDiff(const char* end, const char* begin);
00051 bool isAscii(const char* begin, const char* end);
00052
00053 UCS2 fetchChar(const char*& begin, const char* end);
00054 inline UCS2 toChar(const char* begin, const char* end)
00055 {
00056 const char* it = begin;
00057 return fetchChar(it, end);
00058 }
00059 int byteLength(const char* it);
00063 int utfLength(UCS2 ch);
00064
00068 int utfLength(const UCS2* begin, const UCS2* end);
00072 int ucLength(const UCS2* ch);
00073 inline int uc2Length(const UCS2* ch) { return ucLength(ch); }
00077 int uc4Length(const UCS4* ch);
00087 int ToUtf8(byte*& it, byte* end, UCS2 ucc);
00095 int ToUtf8(byte* it, const size_t max, const UCS2* ucbegin, const size_t len);
00096
00106 UCS2* ToUc(UCS2* buffer, const char* source, int length);
00117 UCS2* ToUc(UCS2* buffer, const UCS4* source, int length = -1);
00128 UCS2* ToUc(UCS2* buffer, const wchar_t* source, int length = -1);
00132 UCS2* utf8ToUc(UCS2* buffer, const char* source, int length = -1);
00133
00134 inline size_t stringLength(const char* str) { return strlen(str); }
00135 inline size_t stringLength(const UCS2* str) { return ucLength(str); }
00136 inline size_t stringLength(const UCS4* str) { return uc4Length(str); }
00137
00138 template <typename ToCharType, typename FromCharType>
00139 inline ToCharType* convertTo(ToCharType* buffer, const FromCharType* source, int length = -1)
00140 {
00141 if (length == -1)
00142 length = stringLength(source);
00143 ToCharType* tit = buffer;
00144 register const FromCharType* fit = source;
00145 register const FromCharType* end = source + length;
00146 for (; fit < end; ++tit, ++fit)
00147 *tit = *fit;
00148 return buffer;
00149 }
00150
00151 }
00152 }
00153 }
00154
00155
00156 #endif //Tanl_text_Utf8Utils_h