DeSR Dependency Parser |
00001 /* 00002 ** IXE C++ Library 00003 ** ixe/text/StreamTokenizer.cpp 00004 ** ---------------------------------------------------------------------- 00005 ** Copyright (c) 2002 Giuseppe Attardi (attardi@di.unipi.it). 00006 ** ---------------------------------------------------------------------- 00007 ** 00008 ** This file is part of DeSR. 00009 ** 00010 ** DeSR is free software; you can redistribute it and/or modify it 00011 ** under the terms of the GNU General Public License, version 3, 00012 ** as published by the Free Software Foundation. 00013 ** 00014 ** DeSR is distributed in the hope that it will be useful, 00015 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 ** GNU General Public License for more details. 00018 ** 00019 ** You should have received a copy of the GNU General Public License 00020 ** along with this program. If not, see <http://www.gnu.org/licenses/>. 00021 ** ---------------------------------------------------------------------- 00022 */ 00023 00024 #include "text/strings.h" 00025 #include "text/StreamTokenizer.h" 00026 #include <iostream> 00027 00028 using namespace std; 00029 00030 namespace Tanl { 00031 namespace Text { 00032 00033 char const* StreamTokenizer::hasNext() 00034 { 00035 if (tok) 00036 return tok; 00037 count += toklen; // count characters up to end of previous token 00038 char const* start = rest; 00039 if (rest && rest < end) { // continue on same line 00040 if ((tok = next_token(rest, end, delim))) { 00041 toklen = rest - tok; 00042 count += tok - start; // count characters before next token 00043 return tok; 00044 } 00045 } 00046 count += end - start; // got to the end 00047 return 0; 00048 } 00049 00050 bool StreamTokenizer::getline() 00051 { 00052 if (count) count++; // (count previous \n) 00053 if (is->getline(line, MAX_LINE_LEN)) { 00054 lineNumber++; 00055 rest = line; 00056 end = line + ::strlen(line); 00057 return true; 00058 } 00059 return false; 00060 } 00061 00062 } // namespace Text 00063 } // namespace Tanl 00064