forked from fubark/cyber
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmap.zig
489 lines (418 loc) · 16.6 KB
/
map.zig
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
const std = @import("std");
const stdx = @import("stdx");
const t = stdx.testing;
const builtin = @import("builtin");
const debug = builtin.mode == .Debug;
const cy = @import("cyber.zig");
const log = stdx.log.scoped(.map);
/// The implementation of ValueMap is based on Zig's std.HashMapUnmanaged.
/// Since keys and values are just cy.Values (8 bytes each), the memory layout can be made more compact.
/// This also allows inserting optimizations where needed.
pub const ValueMap = struct {
metadata: ?[*] align(8) Metadata = null,
entries: ?[*]ValueMapEntry = null,
size: u32 = 0,
cap: u32 = 0,
available: u32 = 0,
const MaxLoadPercentage = 80;
pub fn iterator(self: *const ValueMap) Iterator {
return Iterator{ .map = self };
}
pub fn put(self: *ValueMap, alloc: std.mem.Allocator, vm: *const cy.VM, key: cy.Value, value: cy.Value) linksection(cy.Section) std.mem.Allocator.Error!void {
const res = try self.getOrPut(alloc, vm, key);
res.valuePtr.* = value;
}
pub fn get(self: ValueMap, vm: *const cy.VM, key: cy.Value) linksection(cy.Section) ?cy.Value {
if (self.getIndex(vm, key)) |idx| {
return self.entries.?[idx].value;
}
return null;
}
pub fn getByString(self: ValueMap, vm: *const cy.VM, key: []const u8) linksection(cy.Section) ?cy.Value {
@setRuntimeSafety(debug);
if (self.getIndexByString(vm, key)) |idx| {
return self.entries.?[idx].value;
}
return null;
}
// This counts the number of occupied slots (not counting tombstones), which is
// what has to stay under the max_load_percentage of capacity.
fn load(self: *const ValueMap) u32 {
const maxLoad = (self.cap * MaxLoadPercentage) / 100;
std.debug.assert(maxLoad >= self.available);
return @truncate(u32, maxLoad - self.available);
}
pub fn getOrPut(self: *ValueMap, alloc: std.mem.Allocator, vm: *const cy.VM, key: cy.Value) linksection(cy.Section) std.mem.Allocator.Error!GetOrPutResult {
const res = try self.getOrPutAdapted(alloc, vm, key);
if (!res.foundExisting) {
res.keyPtr.* = key;
}
return res;
}
pub fn getOrPutAdapted(self: *ValueMap, alloc: std.mem.Allocator, vm: *const cy.VM, key: cy.Value) linksection(cy.Section) std.mem.Allocator.Error!GetOrPutResult {
if (self.available == 0) {
try self.grow(alloc, vm, capacityForSize(self.load() + 1));
}
return self.getOrPutAssumeCapacityAdapted(vm, key);
}
pub fn getOrPutAssumeCapacityAdapted(self: *ValueMap, vm: *const cy.VM, key: cy.Value) linksection(cy.Section) GetOrPutResult {
const hash = computeHash(vm, key);
const mask = self.cap - 1;
const fingerprint = Metadata.takeFingerprint(hash);
var limit = self.cap;
var idx = @truncate(usize, hash & mask);
var first_tombstone_idx: usize = self.cap; // invalid index
var md = self.metadata.?[idx];
while (!md.isFree() and limit != 0) {
if (md.isUsed() and md.fingerprint == fingerprint) {
const testKey = self.entries.?[idx].key;
if (keysEqual(vm, key, testKey)) {
return GetOrPutResult{
.keyPtr = &self.entries.?[idx].key,
.valuePtr = &self.entries.?[idx].value,
.foundExisting = true,
};
}
} else if (first_tombstone_idx == self.cap and md.isTombstone()) {
first_tombstone_idx = idx;
}
limit -= 1;
idx = (idx + 1) & mask;
md = self.metadata.?[idx];
}
if (first_tombstone_idx < self.cap) {
// Cheap try to lower probing lengths after deletions. Recycle a tombstone.
idx = first_tombstone_idx;
md = self.metadata.?[idx];
}
// We're using a slot previously free or a tombstone.
self.available -= 1;
self.metadata.?[idx].fill(fingerprint);
self.size += 1;
return GetOrPutResult{
.keyPtr = &self.entries.?[idx].key,
.valuePtr = &self.entries.?[idx].value,
.foundExisting = false,
};
}
pub fn putAssumeCapacityNoClobber(self: *ValueMap, vm: *const cy.VM, key: cy.Value, value: cy.Value) linksection(cy.Section) void {
std.debug.assert(!self.contains(vm, key));
const hash = computeHash(vm, key);
const mask = self.cap - 1;
var idx = @truncate(usize, hash & mask);
var md = self.metadata.?[idx];
while (md.isUsed()) {
idx = (idx + 1) & mask;
md = self.metadata.?[idx];
}
std.debug.assert(self.available > 0);
self.available -= 1;
const fingerprint = Metadata.takeFingerprint(hash);
self.metadata.?[idx].fill(fingerprint);
self.entries.?[idx] = .{
.key = key,
.value = value,
};
self.size += 1;
}
pub fn contains(self: ValueMap, vm: *const cy.VM, key: cy.Value) linksection(cy.Section) bool {
return self.getIndex(vm, key) != null;
}
fn computeStringHash(str: []const u8) linksection(cy.Section) u64 {
@setRuntimeSafety(debug);
return std.hash.Wyhash.hash(0, str);
}
fn computeHash(vm: *const cy.VM, key: cy.Value) linksection(cy.Section) u64 {
if (key.isNumber()) {
return std.hash.Wyhash.hash(0, std.mem.asBytes(&key.val));
}
if (vm.tryValueAsComparableString(key)) |str| {
return std.hash.Wyhash.hash(0, str);
} else {
return std.hash.Wyhash.hash(0, std.mem.asBytes(&key.val));
}
}
fn stringKeyEqual(vm: *const cy.VM, a: []const u8, b: cy.Value) linksection(cy.Section) bool {
const bstr = vm.tryValueAsComparableString(b) orelse return false;
return std.mem.eql(u8, a, bstr);
}
fn keysEqual(vm: *const cy.VM, a: cy.Value, b: cy.Value) linksection(cy.Section) bool {
if (a.val == b.val) {
return true;
}
if (vm.tryValueAsComparableString(a)) |astr| {
const bstr = vm.tryValueAsComparableString(b) orelse return false;
return std.mem.eql(u8, astr, bstr);
} else return false;
}
fn capacityForSize(size: u32) linksection(cy.Section) u32 {
var newCap = @truncate(u32, (@as(u64, size) * 100) / MaxLoadPercentage + 1);
newCap = std.math.ceilPowerOfTwo(u32, newCap) catch unreachable;
return newCap;
}
inline fn getIndexByString(self: ValueMap, vm: *const cy.VM, key: []const u8) linksection(cy.Section) ?usize {
@setRuntimeSafety(debug);
if (self.size == 0) {
return null;
}
const hash = computeStringHash(key);
const mask = self.cap - 1;
const fingerprint = Metadata.takeFingerprint(hash);
// Don't loop indefinitely when there are no empty slots.
var limit = self.cap;
var idx = @truncate(usize, hash & mask);
var md = self.metadata.?[idx];
while (!md.isFree() and limit != 0) {
if (md.isUsed() and md.fingerprint == fingerprint) {
const testKey = self.entries.?[idx].key;
if (stringKeyEqual(vm, key, testKey)) {
return idx;
}
}
limit -= 1;
idx = (idx + 1) & mask;
md = self.metadata.?[idx];
}
return null;
}
/// Find the index containing the data for the given key.
/// Whether this function returns null is almost always
/// branched on after this function returns, and this function
/// returns null/not null from separate code paths. We
/// want the optimizer to remove that branch and instead directly
/// fuse the basic blocks after the branch to the basic blocks
/// from this function. To encourage that, this function is
/// marked as inline.
inline fn getIndex(self: ValueMap, vm: *const cy.VM, key: cy.Value) linksection(cy.Section) ?usize {
if (self.size == 0) {
return null;
}
const hash = computeHash(vm, key);
const mask = self.cap - 1;
const fingerprint = Metadata.takeFingerprint(hash);
// Don't loop indefinitely when there are no empty slots.
var limit = self.cap;
var idx = @truncate(usize, hash & mask);
var md = self.metadata.?[idx];
while (!md.isFree() and limit != 0) {
if (md.isUsed() and md.fingerprint == fingerprint) {
const testKey = self.entries.?[idx].key;
if (keysEqual(vm, key, testKey)) {
return idx;
}
}
limit -= 1;
idx = (idx + 1) & mask;
md = self.metadata.?[idx];
}
return null;
}
fn grow(self: *ValueMap, alloc: std.mem.Allocator, vm: *const cy.VM, newCap: u32) std.mem.Allocator.Error!void {
const finalCap = std.math.max(newCap, 8);
std.debug.assert(finalCap > self.cap);
std.debug.assert(std.math.isPowerOfTwo(newCap));
const metaSize = finalCap;
const entriesStart = std.mem.alignForward(metaSize, 8);
const totalSize = entriesStart + finalCap * @sizeOf(ValueMapEntry);
const newBuf = try alloc.alignedAlloc(u8, 8, totalSize);
var newMap = ValueMap{
.metadata = @ptrCast([*] align(8) Metadata, newBuf.ptr),
.entries = @intToPtr([*]ValueMapEntry, @ptrToInt(newBuf.ptr) + entriesStart),
.size = 0,
.cap = finalCap,
.available = @truncate(u32, (finalCap * MaxLoadPercentage) / 100),
};
@memset(@ptrCast([*] align(8) u8, newMap.metadata), 0, finalCap);
if (self.size != 0) {
// Rehash into new map.
var i: u32 = 0;
while (i < self.cap) : (i += 1) {
if (self.metadata.?[i].isUsed()) {
const entry = self.entries.?[i];
newMap.putAssumeCapacityNoClobber(vm, entry.key, entry.value);
if (newMap.size == self.size) {
break;
}
}
}
}
self.deinit(alloc);
self.* = newMap;
}
pub fn deinit(self: *ValueMap, alloc: std.mem.Allocator) void {
if (self.metadata == null) {
return;
}
const metaSize = self.cap;
const entriesStart = std.mem.alignForward(metaSize, 8);
const totalSize = entriesStart + self.cap * @sizeOf(ValueMapEntry);
alloc.free(self.metadata.?[0..totalSize]);
self.metadata = null;
self.entries = null;
self.size = 0;
self.cap = 0;
self.available = 0;
}
pub fn remove(self: *ValueMap, vm: *cy.VM, key: cy.Value) linksection(cy.Section) bool {
if (self.getIndex(vm, key)) |idx| {
self.removeByIndex(idx);
// Release key since it can be an object.
cy.arc.release(vm, self.entries.?[idx].key);
return true;
}
return false;
}
fn removeByIndex(self: *ValueMap, idx: usize) linksection(cy.Section) void {
self.metadata.?[idx].remove();
self.size -= 1;
self.available += 1;
}
pub fn next(self: *ValueMap, idx: *u32) linksection(cy.Section) ?ValueMapEntry {
std.debug.assert(idx.* <= self.cap);
if (self.size == 0) {
return null;
}
while (idx.* < self.cap) : (idx.* += 1) {
const md = self.metadata.?[idx.*];
if (md.isUsed()) {
defer idx.* += 1;
return self.entries.?[idx.*];
}
}
return null;
}
};
pub const ValueMapEntry = struct {
key: cy.Value,
value: cy.Value,
};
test "Internals." {
try t.eq(@sizeOf(ValueMapEntry), 16);
try t.eq(@alignOf(*ValueMapEntry), 8);
try t.eq(@sizeOf(Metadata), 1);
try t.eq(@sizeOf(ValueMap), 32);
}
/// Metadata for a slot. It can be in three states: empty, used or
/// tombstone. Tombstones indicate that an entry was previously used,
/// they are a simple way to handle removal.
/// To this state, we add 7 bits from the slot's key hash. These are
/// used as a fast way to disambiguate between entries without
/// having to use the equality function. If two fingerprints are
/// different, we know that we don't have to compare the keys at all.
/// The 7 bits are the highest ones from a 64 bit hash. This way, not
/// only we use the `log2(capacity)` lowest bits from the hash to determine
/// a slot index, but we use 7 more bits to quickly resolve collisions
/// when multiple elements with different hashes end up wanting to be in the same slot.
/// Not using the equality function means we don't have to read into
/// the entries array, likely avoiding a cache miss and a potentially
/// costly function call.
const Metadata = packed struct {
fingerprint: FingerPrint = Free,
used: u1 = 0,
const FingerPrint = u7;
const Free: FingerPrint = 0;
const Tombstone: FingerPrint = 1;
const SlotFree = @bitCast(u8, Metadata{ .fingerprint = Free });
const SlotTombstone = @bitCast(u8, Metadata{ .fingerprint = Tombstone });
pub inline fn isUsed(self: Metadata) bool {
return self.used == 1;
}
pub inline fn takeFingerprint(hash: u64) FingerPrint {
const hashBits = comptime @typeInfo(u64).Int.bits;
const fpBits = comptime @typeInfo(FingerPrint).Int.bits;
return @truncate(FingerPrint, hash >> (hashBits - fpBits));
}
pub inline fn isFree(self: Metadata) bool {
return @bitCast(u8, self) == SlotFree;
}
pub inline fn fill(self: *Metadata, fp: FingerPrint) void {
self.used = 1;
self.fingerprint = fp;
}
pub inline fn remove(self: *Metadata) void {
self.used = 0;
self.fingerprint = Tombstone;
}
pub inline fn isTombstone(self: Metadata) bool {
return @bitCast(u8, self) == SlotTombstone;
}
};
pub const GetOrPutResult = struct {
keyPtr: *cy.Value,
valuePtr: *cy.Value,
foundExisting: bool,
};
// TODO: See how much faster ValueMap is compared to std.HashMapUnmanaged.
// const MapKeyType = enum {
// constStr,
// heapStr,
// number,
// };
// const MapKey = struct {
// keyT: MapKeyType,
// inner: packed union {
// constStr: packed struct {
// start: u32,
// end: u32,
// },
// heapStr: Value,
// number: u64,
// },
// };
// pub const MapContext = struct {
// vm: *const VM,
// pub fn hash(self: MapContext, key: MapKey) u64 {
// @setRuntimeSafety(debug);
// switch (key.keyT) {
// .constStr => return std.hash.Wyhash.hash(0, self.vm.strBuf[key.inner.constStr.start..key.inner.constStr.end]),
// .heapStr => stdx.panic("unsupported heapStr"),
// .number => {
// return std.hash.Wyhash.hash(0, std.mem.asBytes(&key.inner.number));
// },
// }
// }
// pub fn eql(self: MapContext, a: MapKey, b: MapKey) bool {
// @setRuntimeSafety(debug);
// switch (a.keyT) {
// .constStr => {
// if (b.keyT == .constStr) {
// const aStr = self.vm.strBuf[a.inner.constStr.start..a.inner.constStr.end];
// const bStr = self.vm.strBuf[b.inner.constStr.start..b.inner.constStr.end];
// return std.mem.eql(u8, aStr, bStr);
// } else if (b.keyT == .heapStr) {
// stdx.panic("unsupported heapStr");
// } else {
// return false;
// }
// },
// .heapStr => {
// stdx.panic("unsupported heapStr");
// },
// .number => {
// if (b.keyT != .number) {
// return false;
// } else {
// return a.inner.number == b.inner.number;
// }
// },
// }
// }
// };
pub const Iterator = struct {
map: *const ValueMap,
idx: u32 = 0,
pub fn next(self: *Iterator) ?ValueMapEntry {
std.debug.assert(self.idx <= self.map.cap);
if (self.map.size == 0) {
return null;
}
while (self.idx < self.map.cap) : (self.idx += 1) {
const md = self.map.metadata.?[self.idx];
if (md.isUsed()) {
defer self.idx += 1;
return self.map.entries.?[self.idx];
}
}
return null;
}
};