forked from hrydgard/ppsspp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Hashmaps.h
330 lines (303 loc) · 8.35 KB
/
Hashmaps.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
#pragma once
#include <cstring>
#include <vector>
#include "ext/xxhash.h"
#include "Common/CommonFuncs.h"
#include "Common/Log.h"
// Whatever random value.
const uint32_t hashmapSeed = 0x23B58532;
// TODO: Try hardware CRC. Unfortunately not available on older Intels or ARM32.
// Seems to be ubiquitous on ARM64 though.
template<class K>
inline uint32_t HashKey(const K &k) {
return XXH32(&k, sizeof(k), hashmapSeed);
}
template<class K>
inline bool KeyEquals(const K &a, const K &b) {
return !memcmp(&a, &b, sizeof(K));
}
enum class BucketState : uint8_t {
FREE,
TAKEN,
REMOVED, // for linear probing to work (and removal during deletion) we need tombstones
};
// Uses linear probing for cache-friendliness. Not segregating values from keys because
// we always use very small values, so it's probably better to have them in the same
// cache-line as the corresponding key.
// Enforces that value are pointers to make sure that combined storage makes sense.
template <class Key, class Value, Value NullValue>
class DenseHashMap {
public:
DenseHashMap(int initialCapacity) : capacity_(initialCapacity) {
map.resize(initialCapacity);
state.resize(initialCapacity);
}
// Returns nullptr if no entry was found.
Value Get(const Key &key) {
uint32_t mask = capacity_ - 1;
uint32_t pos = HashKey(key) & mask;
// No? Let's go into search mode. Linear probing.
uint32_t p = pos;
while (true) {
if (state[p] == BucketState::TAKEN && KeyEquals(key, map[p].key))
return map[p].value;
else if (state[p] == BucketState::FREE)
return NullValue;
p = (p + 1) & mask; // If the state is REMOVED, we just keep on walking.
if (p == pos) {
_assert_msg_(SYSTEM, false, "DenseHashMap: Hit full on Get()");
}
}
return NullValue;
}
// Returns false if we already had the key! Which is a bit different.
bool Insert(const Key &key, Value value) {
// Check load factor, resize if necessary. We never shrink.
if (count_ > capacity_ / 2) {
Grow(2);
}
uint32_t mask = capacity_ - 1;
uint32_t pos = HashKey(key) & mask;
uint32_t p = pos;
while (true) {
if (state[p] == BucketState::TAKEN) {
if (KeyEquals(key, map[p].key)) {
// Bad! We already got this one. Let's avoid this case.
_assert_msg_(SYSTEM, false, "DenseHashMap: Duplicate key inserted");
return false;
}
// continue looking....
} else {
// Got a place, either removed or FREE.
break;
}
p = (p + 1) & mask;
if (p == pos) {
// FULL! Error. Should not happen thanks to Grow().
_assert_msg_(SYSTEM, false, "DenseHashMap: Hit full on Insert()");
}
}
if (state[p] == BucketState::REMOVED) {
removedCount_--;
}
state[p] = BucketState::TAKEN;
map[p].key = key;
map[p].value = value;
count_++;
return true;
}
bool Remove(const Key &key) {
uint32_t mask = capacity_ - 1;
uint32_t pos = HashKey(key) & mask;
uint32_t p = pos;
while (state[p] != BucketState::FREE) {
if (state[p] == BucketState::TAKEN && KeyEquals(key, map[p].key)) {
// Got it! Mark it as removed.
state[p] = BucketState::REMOVED;
removedCount_++;
count_--;
return true;
}
p = (p + 1) & mask;
if (p == pos) {
// FULL! Error. Should not happen.
_assert_msg_(SYSTEM, false, "DenseHashMap: Hit full on Remove()");
}
}
return false;
}
size_t size() const {
return count_;
}
template<class T>
inline void Iterate(T func) const {
for (size_t i = 0; i < map.size(); i++) {
if (state[i] == BucketState::TAKEN) {
func(map[i].key, map[i].value);
}
}
}
void Clear() {
memset(state.data(), (int)BucketState::FREE, state.size());
count_ = 0;
removedCount_ = 0;
}
void Rebuild() {
Grow(1);
}
void Maintain() {
// Heuristic
if (removedCount_ >= capacity_ / 4) {
Rebuild();
}
}
private:
void Grow(int factor) {
// We simply move out the existing data, then we re-insert the old.
// This is extremely non-atomic and will need synchronization.
std::vector<Pair> old = std::move(map);
std::vector<BucketState> oldState = std::move(state);
// Can't assume move will clear, it just may clear.
map.clear();
state.clear();
int oldCount = count_;
capacity_ *= factor;
map.resize(capacity_);
state.resize(capacity_);
count_ = 0; // Insert will update it.
removedCount_ = 0;
for (size_t i = 0; i < old.size(); i++) {
if (oldState[i] == BucketState::TAKEN) {
Insert(old[i].key, old[i].value);
}
}
_assert_msg_(SYSTEM, oldCount == count_, "DenseHashMap: count should not change in Grow()");
}
struct Pair {
Key key;
Value value;
};
std::vector<Pair> map;
std::vector<BucketState> state;
int capacity_;
int count_ = 0;
int removedCount_ = 0;
};
// Like the above, uses linear probing for cache-friendliness.
// Does not perform hashing at all so expects well-distributed keys.
template <class Value, Value NullValue>
class PrehashMap {
public:
PrehashMap(int initialCapacity) : capacity_(initialCapacity) {
map.resize(initialCapacity);
state.resize(initialCapacity);
}
// Returns nullptr if no entry was found.
Value Get(uint32_t hash) {
uint32_t mask = capacity_ - 1;
uint32_t pos = hash & mask;
// No? Let's go into search mode. Linear probing.
uint32_t p = pos;
while (true) {
if (state[p] == BucketState::TAKEN && hash == map[p].hash)
return map[p].value;
else if (state[p] == BucketState::FREE)
return NullValue;
p = (p + 1) & mask; // If the state is REMOVED, we just keep on walking.
if (p == pos) {
_assert_msg_(SYSTEM, false, "PrehashMap: Hit full on Get()");
}
}
return NullValue;
}
// Returns false if we already had the key! Which is a bit different.
bool Insert(uint32_t hash, Value value) {
// Check load factor, resize if necessary. We never shrink.
if (count_ > capacity_ / 2) {
Grow(2);
}
uint32_t mask = capacity_ - 1;
uint32_t pos = hash & mask;
uint32_t p = pos;
while (state[p] != BucketState::FREE) {
if (state[p] == BucketState::TAKEN) {
if (hash == map[p].hash)
return false; // Bad!
} else {
// Got a place, either removed or FREE.
break;
}
p = (p + 1) & mask;
if (p == pos) {
// FULL! Error. Should not happen thanks to Grow().
_assert_msg_(SYSTEM, false, "PrehashMap: Hit full on Insert()");
}
}
if (state[p] == BucketState::REMOVED) {
removedCount_--;
}
state[p] = BucketState::TAKEN;
map[p].hash = hash;
map[p].value = value;
count_++;
return true;
}
bool Remove(uint32_t hash) {
uint32_t mask = capacity_ - 1;
uint32_t pos = hash & mask;
uint32_t p = pos;
while (state[p] != BucketState::FREE) {
if (state[p] == BucketState::TAKEN && hash == map[p].hash) {
// Got it!
state[p] = BucketState::REMOVED;
removedCount_++;
count_--;
return true;
}
p = (p + 1) & mask;
if (p == pos) {
_assert_msg_(SYSTEM, false, "PrehashMap: Hit full on Remove()");
}
}
return false;
}
size_t size() {
return count_;
}
template<class T>
void Iterate(T func) const {
for (size_t i = 0; i < map.size(); i++) {
if (state[i] == BucketState::TAKEN) {
func(map[i].hash, map[i].value);
}
}
}
void Clear() {
memset(state.data(), (int)BucketState::FREE, state.size());
count_ = 0;
removedCount_ = 0;
}
// Gets rid of REMOVED tombstones, making lookups somewhat more efficient.
void Rebuild() {
Grow(1);
}
void Maintain() {
// Heuristic
if (removedCount_ >= capacity_ / 4) {
Rebuild();
}
}
private:
void Grow(int factor) {
// We simply move out the existing data, then we re-insert the old.
// This is extremely non-atomic and will need synchronization.
std::vector<Pair> old = std::move(map);
std::vector<BucketState> oldState = std::move(state);
// Can't assume move will clear, it just may clear.
map.clear();
state.clear();
int oldCount = count_;
int oldCapacity = capacity_;
capacity_ *= factor;
map.resize(capacity_);
state.resize(capacity_);
count_ = 0; // Insert will update it.
removedCount_ = 0;
for (size_t i = 0; i < old.size(); i++) {
if (oldState[i] == BucketState::TAKEN) {
Insert(old[i].hash, old[i].value);
}
}
INFO_LOG(G3D, "Grew hashmap capacity from %d to %d", oldCapacity, capacity_);
_assert_msg_(SYSTEM, oldCount == count_, "PrehashMap: count should not change in Grow()");
}
struct Pair {
uint32_t hash;
Value value;
};
std::vector<Pair> map;
std::vector<BucketState> state;
int capacity_;
int count_ = 0;
int removedCount_ = 0;
};