1 : /** \file
2 : * Parser for APT records
3 : */
4 :
5 : /*
6 : * Copyright (C) 2007 Enrico Zini <enrico@enricozini.org>
7 : *
8 : * This library is free software; you can redistribute it and/or
9 : * modify it under the terms of the GNU Lesser General Public
10 : * License as published by the Free Software Foundation; either
11 : * version 2.1 of the License, or (at your option) any later version.
12 : *
13 : * This library is distributed in the hope that it will be useful,
14 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 : * Lesser General Public License for more details.
17 : *
18 : * You should have received a copy of the GNU Lesser General Public
19 : * License along with this library; if not, write to the Free Software
20 : * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 : */
22 :
23 : #include <ept/apt/recordparser.h>
24 :
25 : #include <algorithm>
26 : #include <cctype>
27 :
28 : //#include <iostream>
29 :
30 : using namespace std;
31 :
32 : namespace ept {
33 : namespace apt {
34 :
35 : struct rpcompare
36 : {
37 : const RecordParser& rp;
38 2618 : rpcompare(const RecordParser& rp) : rp(rp) {}
39 177724 : bool operator()(size_t a, size_t b)
40 : {
41 177724 : return rp.name(a) < rp.name(b);
42 : }
43 : };
44 :
45 2618 : void RecordParser::scan(const std::string& str)
46 : {
47 2618 : buffer = str;
48 2618 : ends.clear();
49 2618 : sorted.clear();
50 :
51 : //cerr << "PARSE " << endl << buffer << "*****" << endl;
52 :
53 : // Scan the buffer, taking note of all ending offsets of the various fields
54 2618 : size_t pos = 0;
55 2618 : size_t idx = 0;
56 59427 : while (pos < buffer.size() - 1)
57 : {
58 : //cerr << "PREPOS " << pos << " left: " << buffer.substr(pos, 10) << endl;
59 56809 : pos = buffer.find("\n", pos);
60 : //cerr << "POSTPOS " << pos << " left: " << (pos == string::npos ? "NONE" : buffer.substr(pos, 10)) << endl;
61 :
62 : // The buffer does not end with a newline
63 56809 : if (pos == string::npos)
64 : {
65 : //cerr << "ENDNOTEOL" << endl;
66 1 : pos = buffer.size();
67 1 : ends.push_back(pos);
68 1 : sorted.push_back(idx++);
69 1 : break;
70 : }
71 :
72 56808 : ++pos;
73 : //cerr << "POSTPOSINC " << pos << " left: " << buffer.substr(pos, 10) << endl;
74 :
75 : // The buffer ends with a newline
76 56808 : if (pos == buffer.size())
77 : {
78 : //cerr << "ENDEOL" << endl;
79 11 : ends.push_back(pos);
80 11 : sorted.push_back(idx++);
81 11 : break;
82 : }
83 :
84 : // Terminate parsing on double newlines
85 56797 : if (buffer[pos] == '\n')
86 : {
87 : //cerr << "ENDDOUBLENL" << endl;
88 2606 : ends.push_back(pos);
89 2606 : sorted.push_back(idx++);
90 2606 : break;
91 : }
92 :
93 : // Mark the end of the field if it's not a continuation line
94 54191 : if (!isspace(buffer[pos]))
95 : {
96 : //cerr << "INNERFIELD" << endl;
97 34233 : ends.push_back(pos);
98 34233 : sorted.push_back(idx++);
99 : } //else
100 : //cerr << "CONTLINE" << endl;
101 : }
102 :
103 : // Sort the sorted array
104 2618 : sort(sorted.begin(), sorted.end(), rpcompare(*this));
105 :
106 : //for (size_t i = 0; i < ends.size(); ++i)
107 : // cerr << ends[i] << "\t" << name(i) << "\t" << sorted[i] << "\t" << name(sorted[i]) << endl;
108 2618 : }
109 :
110 465183 : std::string RecordParser::field(size_t idx) const
111 : {
112 465183 : if (idx >= ends.size())
113 2664 : return string();
114 462519 : if (idx == 0)
115 52771 : return buffer.substr(0, ends[0]);
116 : else
117 409748 : return buffer.substr(ends[idx-1], ends[idx]-ends[idx-1]);
118 : }
119 :
120 446879 : std::string RecordParser::name(size_t idx) const
121 : {
122 446879 : string res = field(idx);
123 446879 : size_t pos = res.find(":");
124 446879 : if (pos == string::npos)
125 1 : return res;
126 446878 : return res.substr(0, pos);
127 : }
128 :
129 18279 : std::string RecordParser::lookup(size_t idx) const
130 : {
131 18279 : string res = field(idx);
132 18279 : size_t pos = res.find(":");
133 18279 : if (pos == string::npos)
134 2662 : return res;
135 : // Skip initial whitespace after the :
136 15617 : for (++pos; pos < res.size() && isspace(res[pos]); ++pos)
137 : ;
138 15617 : res = res.substr(pos);
139 : // Trim spaces at the end
140 46850 : while (!res.empty() && isspace(res[res.size() - 1]))
141 15616 : res.resize(res.size() - 1);
142 15617 : return res;
143 : }
144 :
145 18283 : size_t RecordParser::index(const std::string& str) const
146 : {
147 : int begin, end;
148 :
149 : /* Binary search */
150 18283 : begin = -1, end = size();
151 109703 : while (end - begin > 1)
152 : {
153 73137 : int cur = (end + begin) / 2;
154 : //cerr << "Test " << cur << " " << str << " < " << name(cur) << endl;
155 73137 : if (name(sorted[cur]) > str)
156 32507 : end = cur;
157 : else
158 40630 : begin = cur;
159 : }
160 :
161 18283 : if (begin == -1 || name(sorted[begin]) != str)
162 2661 : return size();
163 : else
164 15622 : return sorted[begin];
165 : }
166 :
167 : }
168 : }
169 :
170 : // vim:set ts=4 sw=4:
|