1 : #ifndef TAGCOLL_COLL_INT_DISK_INDEX_H
2 : #define TAGCOLL_COLL_INT_DISK_INDEX_H
3 :
4 : /** \file
5 : * Fast on-disk index for tag data
6 : */
7 :
8 : /*
9 : * Copyright (C) 2006 Enrico Zini <enrico@debian.org>
10 : *
11 : * This program is free software; you can redistribute it and/or modify
12 : * it under the terms of the GNU General Public License as published by
13 : * the Free Software Foundation; either version 2 of the License, or
14 : * (at your option) any later version.
15 : *
16 : * This program is distributed in the hope that it will be useful,
17 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 : * GNU General Public License for more details.
20 : *
21 : * You should have received a copy of the GNU General Public License
22 : * along with this program; if not, write to the Free Software
23 : * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 : */
25 :
26 : #include <tagcoll/coll/base.h>
27 : #include <tagcoll/diskindex/int.h>
28 :
29 : namespace tagcoll {
30 : template<typename ITEM, typename TAG>
31 : class PatchList;
32 :
33 : namespace coll {
34 :
35 : class IntDiskIndex;
36 :
37 : template<>
38 : struct coll_traits< IntDiskIndex >
39 : {
40 : typedef int item_type;
41 : typedef int tag_type;
42 : typedef std::set<int> tagset_type;
43 : typedef std::set<int> itemset_type;
44 : };
45 :
46 : /**
47 : * Full TaggedCollection implementation on top of a persistent on-disk TDB
48 : * database.
49 : *
50 : * It allows to efficiently query a collection without having to store it all
51 : * into memory.
52 : *
53 : * If used for heavy modifications, the performance is slower compared to other
54 : * in-memory collections. If database writes are mainly used for populating
55 : * the index, then TDBIndexer should be used to create the index and
56 : * TDBDiskIndex to access it afterwards.
57 : */
58 : class IntDiskIndex : public coll::ReadonlyCollection< IntDiskIndex >
59 :
60 : {
61 : protected:
62 : diskindex::Int pkgidx;
63 : diskindex::Int tagidx;
64 :
65 : public:
66 : class const_iterator
67 : {
68 : const IntDiskIndex& index;
69 : int idx;
70 : mutable std::pair< int, std::set<int> >* cached;
71 :
72 : public:
73 : // Builds an iterator
74 8 : const_iterator(const IntDiskIndex& index, int idx)
75 8 : : index(index), idx(idx), cached(0) {}
76 : // Builds the end iterator
77 274893 : const_iterator(const IntDiskIndex& index)
78 274893 : : index(index), idx(index.pkgidx.size()), cached(0) {}
79 528661 : ~const_iterator() { if (cached) delete cached; }
80 :
81 105725 : std::pair< int, std::set<int> > operator*() const
82 : {
83 105725 : return std::make_pair(idx, index.getTagsOfItem(idx));
84 : }
85 42290 : std::pair< int, std::set<int> >* operator->() const
86 : {
87 42290 : if (!cached)
88 21145 : cached = new std::pair< int, std::set<int> >(operator*());
89 42290 : return cached;
90 : }
91 :
92 105725 : const_iterator operator++()
93 : {
94 105725 : ++idx;
95 105725 : if (cached) { delete cached; cached = 0; }
96 105725 : return *this;
97 : }
98 169161 : bool operator==(const const_iterator& iter) const
99 : {
100 169161 : return idx == iter.idx;
101 : }
102 105732 : bool operator!=(const const_iterator& iter) const
103 : {
104 105732 : return idx != iter.idx;
105 : }
106 : };
107 8 : const_iterator begin() const { return const_iterator(*this, 0); }
108 274893 : const_iterator end() const { return const_iterator(*this); }
109 :
110 : /**
111 : * Create a new IntDiskIndex
112 : *
113 : * @param filename
114 : * The file name of the package index
115 : * @param tagidx
116 : * The file name of the tag index
117 : * @param fromitem, fromtag, toitem, totag
118 : * The Converter-s used to convert int and int to and from strings.
119 : * If 0 is passed, this TDBDiskIndex will only be able to work with
120 : * string items and string tags.
121 : * @param write
122 : * Set to false if the index should be opened in read-only mode. If
123 : * opened in read-only mode, all non-const methods of this class will
124 : * throw an exception if invoked.
125 : * It defaults to true.
126 : */
127 10 : IntDiskIndex() {}
128 : IntDiskIndex(
129 : const diskindex::MasterMMap& master,
130 : int pkgindex, int tagindex)
131 : : pkgidx(master, pkgindex), tagidx(master, tagindex) {}
132 :
133 9 : void init(const diskindex::MasterMMap& master, int pkgindex, int tagindex)
134 : {
135 9 : pkgidx.init(master, pkgindex);
136 9 : tagidx.init(master, tagindex);
137 9 : }
138 :
139 : std::set<int> getItemsHavingTag(const int& tag) const;
140 : std::set<int> getItemsHavingTags(const std::set<int>& tags) const;
141 : std::set<int> getTagsOfItem(const int& item) const;
142 : std::set<int> getTagsOfItems(const std::set<int>& items) const;
143 :
144 : bool hasTag(const int& tag) const
145 : {
146 : return tagidx.size(tag) > 0;
147 : }
148 :
149 : std::set<int> getTaggedItems() const;
150 :
151 : std::set<int> getAllTags() const;
152 : std::vector<int> getAllTagsAsVector() const;
153 :
154 : unsigned int itemCount() const { return pkgidx.size(); }
155 : unsigned int tagCount() const { return tagidx.size(); }
156 :
157 0 : unsigned int getCardinality(const int& tag) const
158 : {
159 0 : return tagidx.size(tag);
160 : }
161 :
162 : std::set<int> getCompanionTags(const std::set<int>& tags) const;
163 :
164 : //void output(Consumer<int, int>& consumer) const;
165 : };
166 :
167 : class IntDiskIndexer
168 2 : {
169 : protected:
170 : diskindex::IntIndexer pkgidx;
171 : diskindex::IntIndexer tagidx;
172 :
173 : public:
174 1 : const diskindex::MMapIndexer& pkgIndexer() const { return pkgidx; }
175 1 : const diskindex::MMapIndexer& tagIndexer() const { return tagidx; }
176 :
177 : template<typename ITEMS, typename TAGS>
178 21133 : void insert(const ITEMS& items, const TAGS& tags)
179 : {
180 21133 : if (tags.empty())
181 0 : return;
182 42266 : for (typename ITEMS::const_iterator it = items.begin();
183 : it != items.end(); ++it)
184 126731 : for (typename TAGS::const_iterator ta = tags.begin();
185 : ta != tags.end(); ++ta)
186 : {
187 105598 : pkgidx.map(*it, *ta);
188 105598 : tagidx.map(*ta, *it);
189 : }
190 : }
191 : };
192 :
193 :
194 : }
195 : }
196 :
197 : // vim:set ts=4 sw=4:
198 : #endif
|