00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef DeSR_MorphExtractor_H
00025 #define DeSR_MorphExtractor_H
00026
00027 #include "text/RegExp.h"
00028
00029 namespace Tanl {
00030
00034 struct MorphExtractor
00035 {
00036 MorphExtractor() { }
00037
00038 struct Features {
00039 Features() {
00040 Case[0] = '\0';
00041 extra[0] = '\0';
00042 gender[0] = '\0';
00043 mode[0] = '\0';
00044 negative[0] = '\0';
00045 number[0] = '\0';
00046 person[0] = '\0';
00047 tense[0] = '\0';
00048 trans[0] = '\0';
00049 full[0] = '\0';
00050 }
00051
00052 char Case[20];
00053 char extra[20];
00054 char gender[20];
00055 char mode[20];
00056 char negative[20];
00057 char number[20];
00058 char person[20];
00059 char tense[20];
00060 char trans[20];
00061 char full[256];
00062 };
00063
00064 virtual void operator() (char const* start, char const* end,
00065 Features& mf) const;
00066 };
00067
00068 struct ArabicMorphExtractor : public MorphExtractor
00069 {
00070 static Tanl::Text::RegExp::Pattern reCase;
00071 static Tanl::Text::RegExp::Pattern reGender;
00072 static Tanl::Text::RegExp::Pattern reNumber;
00073 static Tanl::Text::RegExp::Pattern rePerson;
00074
00075 void operator() (char const* start, char const* end, Features& mf) const;
00076 };
00077
00078 struct BasqueMorphExtractor : public MorphExtractor
00079 {
00080 static Tanl::Text::RegExp::Pattern reNumber;
00081
00082 BasqueMorphExtractor() {}
00083
00084 void operator() (char const* start, char const* end, Features& mf) const;
00085 };
00086
00087 struct BulgarianMorphExtractor : public MorphExtractor
00088 {
00089 static Tanl::Text::RegExp::Pattern reGender;
00090 static Tanl::Text::RegExp::Pattern reNumber;
00091 static Tanl::Text::RegExp::Pattern rePerson;
00092
00093 void operator() (char const* start, char const* end, Features& mf) const;
00094 };
00095
00096 struct CzechMorphExtractor : public MorphExtractor
00097 {
00098 static Tanl::Text::RegExp::Pattern reCase;
00099 static Tanl::Text::RegExp::Pattern reGender;
00100 static Tanl::Text::RegExp::Pattern reNumber;
00101 static Tanl::Text::RegExp::Pattern rePerson;
00102 static Tanl::Text::RegExp::Pattern reNegative;
00103 static Tanl::Text::RegExp::Pattern reGra;
00104
00105 void operator() (char const* start, char const* end, Features& mf) const;
00106 };
00107
00108 struct DanishMorphExtractor : public MorphExtractor
00109 {
00110 static Tanl::Text::RegExp::Pattern reGender;
00111 static Tanl::Text::RegExp::Pattern reNumber;
00112 static Tanl::Text::RegExp::Pattern rePerson;
00113 static Tanl::Text::RegExp::Pattern reCase;
00114
00115 void operator() (char const* start, char const* end, Features& mf) const;
00116 };
00117
00118 struct DutchMorphExtractor : public MorphExtractor
00119 {
00120 static Tanl::Text::RegExp::Pattern reGender;
00121 static Tanl::Text::RegExp::Pattern reNumber;
00122 static Tanl::Text::RegExp::Pattern rePerson;
00123 static Tanl::Text::RegExp::Pattern reCase;
00124
00125 void operator() (char const* start, char const* end, Features& mf) const;
00126 };
00127
00128 struct GreekMorphExtractor : public MorphExtractor
00129 {
00130 static Tanl::Text::RegExp::Pattern reGender;
00131 static Tanl::Text::RegExp::Pattern reNumber;
00132 static Tanl::Text::RegExp::Pattern rePerson;
00133 static Tanl::Text::RegExp::Pattern reCase;
00134
00135 void operator() (char const* start, char const* end, Features& mf) const;
00136 };
00137
00138 struct HungarianMorphExtractor : public MorphExtractor
00139 {
00140 static Tanl::Text::RegExp::Pattern reNumber;
00141 static Tanl::Text::RegExp::Pattern rePerson;
00142 static Tanl::Text::RegExp::Pattern reCase;
00143
00144 void operator() (char const* start, char const* end, Features& mf) const;
00145 };
00146
00147 struct ItalianTutMorphExtractor : public MorphExtractor
00148 {
00149 static Tanl::Text::RegExp::Pattern reCase;
00150 static Tanl::Text::RegExp::Pattern reGender;
00151 static Tanl::Text::RegExp::Pattern reMode;
00152 static Tanl::Text::RegExp::Pattern reNumber;
00153 static Tanl::Text::RegExp::Pattern rePerson;
00154 static Tanl::Text::RegExp::Pattern reSem;
00155 static Tanl::Text::RegExp::Pattern reTense;
00156 static Tanl::Text::RegExp::Pattern reTrans;
00157 static Tanl::Text::RegExp::Pattern reVTrans;
00158
00159 void operator() (char const* start, char const* end, Features& mf) const;
00160 };
00161
00162 struct PortugueseMorphExtractor : public MorphExtractor
00163 {
00164 static Tanl::Text::RegExp::Pattern reGender;
00165 static Tanl::Text::RegExp::Pattern reNumber;
00166 static Tanl::Text::RegExp::Pattern rePerson;
00167
00168 PortugueseMorphExtractor() {}
00169
00170 void operator() (char const* start, char const* end, Features& mf) const;
00171 };
00172
00173 struct SloveneMorphExtractor : public MorphExtractor
00174 {
00175 static Tanl::Text::RegExp::Pattern reCase;
00176 static Tanl::Text::RegExp::Pattern reGender;
00177 static Tanl::Text::RegExp::Pattern reNegative;
00178 static Tanl::Text::RegExp::Pattern reNumber;
00179 static Tanl::Text::RegExp::Pattern rePerson;
00180
00181 void operator() (char const* start, char const* end, Features& mf) const;
00182 };
00183
00184 struct SpanishMorphExtractor : public MorphExtractor
00185 {
00186 static Tanl::Text::RegExp::Pattern reGender;
00187 static Tanl::Text::RegExp::Pattern reNumber;
00188 static Tanl::Text::RegExp::Pattern rePerson;
00189
00190 void operator() (char const* start, char const* end, Features& mf) const;
00191 };
00192
00193 struct TurkishMorphExtractor : public MorphExtractor
00194 {
00195 static Tanl::Text::RegExp::Pattern reAFeats;
00196 static Tanl::Text::RegExp::Pattern rePFeats;
00197
00198 void operator() (char const* start, char const* end, Features& mf) const;
00199 };
00200
00201 }
00202
00203 #endif // DeSR_MorphExtractor_H