00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef Tanl_Text_Char_h
00025 #define Tanl_Text_Char_h
00026
00027 #include <cstring>
00028 #include <cwchar>
00029 #include <string>
00030
00031 #include "text.h"
00032 #include "Unicode.h"
00033
00034 namespace Tanl {
00035 namespace Text {
00036
00040 class Char
00041 {
00042 public:
00043
00044 typedef Char CharType;
00045 typedef UCS2 CodeUnit;
00046
00047 static const int MaxRadix = 36;
00048 static const int MinRadix = 2;
00049
00050 static const UCS2 MaxValue = 0xFFFF;
00051 static const UCS2 MinValue = 0x0000;
00052
00053 Char() { }
00054
00055 Char(UCS2 ucs) : ucs(ucs) { }
00056
00057 operator UCS2() const { return ucs; }
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00083 int CompareTo(Char const& other) const { return ucs - (UCS2)other; }
00084
00085 int CompareCase(Char const& other) const {
00086 return ToLower().ucs - other.ToLower().ucs; }
00087
00088 Unicode::Category GetCategory() const {
00089 return (Unicode::Category)Unicode::UnicodeTable[ucs].category;
00090 }
00091
00092 bool IsControl() const {
00093 return Unicode::UnicodeTable[ucs].flags & Unicode::Control; }
00094
00095 bool IsDigit() const {
00096 return Unicode::UnicodeTable[ucs].flags & Unicode::Number; }
00097
00098 bool IsLetter() const {
00099 return Unicode::UnicodeTable[ucs].flags & Unicode::Letter; }
00100
00101 bool IsLetterOrDigit() const { return IsDigit() || IsLetter(); }
00102
00103 bool IsLower() const {
00104 return Unicode::UnicodeTable[ucs].flags & Unicode::Lowercase; }
00105
00106 bool IsMark() const {
00107 return Unicode::UnicodeTable[ucs].flags & Unicode::Control;
00108 }
00109
00111 bool IsPunctuation() const {
00112 return Unicode::UnicodeTable[ucs].flags & Unicode::Punctuation;
00113 }
00114
00116 bool IsSpace() const {
00117 return Unicode::UnicodeTable[ucs].flags & Unicode::Whitespace ||
00118 ucs == 0x00A0;
00119 }
00120
00121 bool IsSymbol() const {
00122 return Unicode::UnicodeTable[ucs].flags & Unicode::Symbol;
00123 }
00124
00125 bool IsTitle() const {
00126 return Unicode::UnicodeTable[ucs].flags & Unicode::Titlecase;
00127 }
00128
00129 bool IsUpper() const {
00130 return Unicode::UnicodeTable[ucs].flags & Unicode::Uppercase;
00131 }
00132
00134 bool IsWhitespace() const {
00135 return Unicode::UnicodeTable[ucs].flags & Unicode::Whitespace;
00136 }
00137
00138 int ToDigit(int radix);
00139
00140 Char ToLower() const;
00141
00142 Char ToUpper() const;
00143
00147 Unicode::BlockCode BlockCode() { return Unicode::GetBlockCode(ucs); }
00148
00149 protected:
00150 UCS2 ucs;
00151 };
00152
00156 class Utf8Char : public Char
00157 {
00158 public:
00159 typedef Char CharType;
00160 typedef char CodeUnit;
00161 };
00162
00166 class CChar
00167 {
00168 public:
00169 typedef char CharType;
00170 typedef char CodeUnit;
00171 };
00172
00173 }
00174 }
00175
00176 namespace std {
00177
00178 #if defined(__GNUC__) && __GNUC__ < 3
00179
00180 struct string_char_traits<Tanl::Text::Char> {
00181
00182 typedef Tanl::Text::Char char_type;
00183
00184 static void assign(char_type& c1, const char_type& c2) { c1 = c2; }
00185 static bool eq(const char_type& c1, const char_type& c2) {
00186 return (c1 == c2);
00187 }
00188 static bool ne(const char_type& c1, const char_type& c2) {
00189 return (c1 != c2);
00190 }
00191 static bool lt(const char_type& c1, const char_type& c2) {
00192 return (c1 < c2);
00193 }
00194 static char_type eos() { return 0; }
00195 static bool is_del(char_type a) { return char_type(a).IsSpace(); }
00196 static int compare(const char_type* s1, const char_type* s2, size_t n) {
00197 return ::wmemcmp((wchar_t const*)s1, (wchar_t const*)s2, n);
00198 }
00199 static size_t length(const char_type* s) {
00200 size_t i = 0;
00201 while (!eq(s[i], char_type(0))) ++i;
00202 return i;
00203 }
00204 static char_type* copy(char_type* s1, const char_type* s2, size_t n) {
00205 return (char_type*)::wmemcpy((wchar_t*)s1, (wchar_t const*)s2, n);
00206 }
00207 static char_type* move(char_type* s1, const char_type* s2, size_t n) {
00208 return (char_type*)::wmemmove((wchar_t*)s1, (wchar_t const*)s2, n);
00209 }
00210 static char_type* set(char_type* s, const char_type& c, size_t n) {
00211 return (char_type*)::wmemset((wchar_t*)(Tanl::Text::UCS2*)s, (Tanl::Text::UCS2)c, n);
00212 }
00213 };
00214
00215 #else
00216
00217 template<>
00218 struct char_traits<Tanl::Text::Char> {
00219
00220 typedef Tanl::Text::Char char_type;
00221 typedef int int_type;
00222 typedef streampos pos_type;
00223 typedef streamoff off_type;
00224 typedef mbstate_t state_type;
00225
00226 static void
00227 assign(char_type& c1, const char_type& c2) { c1 = c2; }
00228
00229
00230 static bool
00231 eq(const char_type& c1, const char_type& c2) {
00232 return (c1 == c2);
00233 }
00234
00235 static bool
00236 lt(const char_type& c1, const char_type& c2) { return(c1 < c2); }
00237
00238
00239 static int
00240 compare(const char_type* s1, const char_type* s2, size_t n) {
00241 for (size_t i = 0; i < n; ++i) {
00242 if (eq(s1[i], s2[i])) continue;
00243 if (lt(s1[i], s2[i])) return -1;
00244 else return 1;
00245 }
00246 return 0;
00247 }
00248
00249 static size_t
00250 length(const char_type* s) {
00251 size_t i = 0;
00252 while (!eq(s[i], char_type(0))) ++i;
00253 return i;
00254 }
00255
00256 static const char_type*
00257 find(const char_type* s, size_t n, const char_type& c) {
00258 size_t i = 0;
00259 while (i < n && !eq(s[i], c)) ++i;
00260 return s + i;
00261 }
00262
00263 static char_type*
00264 move(char_type* s1, const char_type* s2, size_t n) {
00265 return static_cast<char_type*>
00266 (::memmove(s1, s2, n * sizeof(char_type)));
00267 }
00268
00269 static char_type*
00270 copy(char_type* s1, const char_type* s2, size_t n) {
00271 return static_cast<char_type*>
00272 (::memcpy(s1, s2, n * sizeof(char_type)));
00273 }
00274
00275 static char_type*
00276 assign(char_type* s, size_t n, char_type a) {
00277 for (size_t i = 0; i < n; ++i)
00278 assign(s[i], a);
00279 return s;
00280 }
00281
00282 static char_type
00283 to_char_type(const int_type& i) { return char_type(i); }
00284
00285 static int_type
00286 to_int_type(const char_type& c) { return (Tanl::Text::UCS2)c; }
00287
00288 static bool
00289 eq_int_type(const int_type& i1, const int_type& i2) { return (i1 == i2); }
00290
00291 static int_type
00292 eof() { return static_cast<int_type>(EOF); }
00293
00294 static int_type
00295 not_eof(const int_type& i) { return (eof() == i) ? !eof() : i; }
00296 };
00297
00298 #endif // __GNUC__
00299
00300 }
00301
00302 #endif // Tanl_Text_Char_h