DeSR Dependency Parser |
00001 /* 00002 ** Tanl C++ Library 00003 ** ixe/text/HtmlTokenizer.h 00004 ** ---------------------------------------------------------------------- 00005 ** Copyright (c) 2002 Giuseppe Attardi (attardi@di.unipi.it). 00006 ** ---------------------------------------------------------------------- 00007 ** 00008 ** This file is part of DeSR. 00009 ** 00010 ** DeSR is free software; you can redistribute it and/or modify it 00011 ** under the terms of the GNU General Public License, version 3, 00012 ** as published by the Free Software Foundation. 00013 ** 00014 ** DeSR is distributed in the hope that it will be useful, 00015 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 ** GNU General Public License for more details. 00018 ** 00019 ** You should have received a copy of the GNU General Public License 00020 ** along with this program. If not, see <http://www.gnu.org/licenses/>. 00021 ** ---------------------------------------------------------------------- 00022 */ 00023 00024 #ifndef Tanl_Text_HtmlTokenizer_H 00025 #define Tanl_Text_HtmlTokenizer_H 00026 00027 #include "text/StringTokenizer.h" 00028 00029 using namespace std; 00030 00031 namespace Tanl { 00032 namespace Text { 00033 00038 class HtmlTokenizer : public StringTokenizer { 00039 00040 public: 00041 00042 // include \n 00046 static char const delimitersNL[]; // = " \t\n\r"; 00047 00048 HtmlTokenizer() : 00049 StringTokenizer() { } 00050 00058 HtmlTokenizer(istream& is, char const* delim = delimitersNL) : 00059 StringTokenizer(is, delim) { } 00060 00069 HtmlTokenizer(char const* s, char const* end = 0, 00070 char const* delim = delimitersNL) : 00071 StringTokenizer(s, end, delim) { } 00072 00076 char const* next(); 00080 char const* hasNext(); 00081 00082 }; 00083 00084 } // namespace Text 00085 } // namespace Tanl 00086 00087 #endif // Tanl_Text_HtmlTokenizer_H