00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef DeSR_WordCounts_H
00025 #define DeSR_WordCounts_H
00026
00027
00028 #include "include/unordered_map.h"
00029
00030
00031 #include <string>
00032
00033 namespace Parser {
00034
00035 struct WordCounts : public unordered_map<std::string, int> {
00036 int add(std::string const& w) {
00037 iterator wcit = find(w);
00038
00039 int count;
00040 if (wcit == end())
00041 count = operator[](w) = 1;
00042 else
00043 count = ++wcit->second;
00044 return count;
00045 }
00046 int count(std::string const& w) {
00047 iterator wcit = find(w);
00048 return (wcit == end()) ? 0 : wcit->second;
00049 }
00050 };
00051
00052 class WordFreq : public unordered_map<const char*, float, hash<const char*>, eqstr>
00053 {
00054 public:
00055
00056 typedef unordered_map<const char*, float, hash<const char*>, eqstr> HashMap;
00057
00061 WordFreq(char const* file) { load(file); }
00062 WordFreq(std::string& file) { load(file.c_str()); }
00063
00064 ~WordFreq() {
00065 for (iterator it = this->begin(); it != this->end(); ++it)
00066 free((void*)it->first);
00067 }
00068
00072 bool contains(char const* ngram) {
00073 return this->find(ngram) != this->end();
00074 }
00075
00079 bool contains(std::string const& ngram) {
00080 return this->find(ngram.c_str()) != this->end();
00081 }
00082
00086 std::pair<iterator,bool> insert(const value_type& __obj) {
00087 iterator _Where = this->find(__obj.first);
00088 key_type _Keyval = (_Where == this->end()) ?
00089 ::strdup(__obj.first) :
00090 _Where->first;
00091 return HashMap::insert(value_type(_Keyval, __obj.second));
00092 }
00093
00094 mapped_type& operator[](const key_type& _Keyval) {
00095
00096 iterator _Where = this->find(_Keyval);
00097 if (_Where == this->end())
00098 _Where = this->insert(value_type(_Keyval, mapped_type())).first;
00099 return _Where->second;
00100 }
00101
00105 void load(char const* file) {
00106 std::ifstream ifs(file);
00107 load(ifs);
00108 }
00109
00114 void load(std::ifstream& ifs) {
00115 # define MAX_LINE_LEN 4096
00116 char line[MAX_LINE_LEN];
00117
00118 while (ifs.getline(line, MAX_LINE_LEN)) {
00119 char* next = line;
00120 char const* ngram = strtok_r(0, "\t", &next);
00121 float count = atof(strtok_r(0, " ", &next));
00122 insert(std::make_pair(ngram, count));
00123 }
00124 # undef MAX_LINE_LEN
00125 }
00126 };
00127
00128 }
00129
00130 #endif // DeSR_WordCounts_H