-
-
Notifications
You must be signed in to change notification settings - Fork 97
/
Copy pathPackage.zig
200 lines (174 loc) · 7.19 KB
/
Package.zig
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
const std = @import("std");
const assert = std.debug.assert;
pub const Module = @import("Package/Module.zig");
pub const Fetch = @import("Package/Fetch.zig");
pub const build_zig_basename = "build.zig";
pub const Manifest = @import("Package/Manifest.zig");
pub const multihash_len = 1 + 1 + Hash.Algo.digest_length;
pub const multihash_hex_digest_len = 2 * multihash_len;
pub const MultiHashHexDigest = [multihash_hex_digest_len]u8;
pub const Fingerprint = packed struct(u64) {
id: u32,
checksum: u32,
pub fn generate(name: []const u8) Fingerprint {
return .{
.id = std.crypto.random.intRangeLessThan(u32, 1, 0xffffffff),
.checksum = std.hash.Crc32.hash(name),
};
}
pub fn validate(n: Fingerprint, name: []const u8) bool {
switch (n.id) {
0x00000000, 0xffffffff => return false,
else => return std.hash.Crc32.hash(name) == n.checksum,
}
}
pub fn int(n: Fingerprint) u64 {
return @bitCast(n);
}
};
/// A user-readable, file system safe hash that identifies an exact package
/// snapshot, including file contents.
///
/// The hash is not only to prevent collisions but must resist attacks where
/// the adversary fully controls the contents being hashed. Thus, it contains
/// a full SHA-256 digest.
///
/// This data structure can be used to store the legacy hash format too. Legacy
/// hash format is scheduled to be removed after 0.14.0 is tagged.
///
/// There's also a third way this structure is used. When using path rather than
/// hash, a unique hash is still needed, so one is computed based on the path.
pub const Hash = struct {
/// Maximum size of a package hash. Unused bytes at the end are
/// filled with zeroes.
bytes: [max_len]u8,
pub const Algo = std.crypto.hash.sha2.Sha256;
pub const Digest = [Algo.digest_length]u8;
/// Example: "nnnn-vvvv-hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh"
pub const max_len = 32 + 1 + 32 + 1 + (32 + 32 + 200) / 6;
pub fn fromSlice(s: []const u8) Hash {
assert(s.len <= max_len);
var result: Hash = undefined;
@memcpy(result.bytes[0..s.len], s);
@memset(result.bytes[s.len..], 0);
return result;
}
pub fn toSlice(ph: *const Hash) []const u8 {
var end: usize = ph.bytes.len;
while (true) {
end -= 1;
if (ph.bytes[end] != 0) return ph.bytes[0 .. end + 1];
}
}
pub fn eql(a: *const Hash, b: *const Hash) bool {
return std.mem.eql(u8, &a.bytes, &b.bytes);
}
/// Distinguishes whether the legacy multihash format is being stored here.
pub fn isOld(h: *const Hash) bool {
if (h.bytes.len < 2) return false;
const their_multihash_func = std.fmt.parseInt(u8, h.bytes[0..2], 16) catch return false;
if (@as(MultihashFunction, @enumFromInt(their_multihash_func)) != multihash_function) return false;
if (h.toSlice().len != multihash_hex_digest_len) return false;
return std.mem.indexOfScalar(u8, &h.bytes, '-') == null;
}
test isOld {
const h: Hash = .fromSlice("1220138f4aba0c01e66b68ed9e1e1e74614c06e4743d88bc58af4f1c3dd0aae5fea7");
try std.testing.expect(h.isOld());
}
/// Produces "$name-$semver-$hashplus".
/// * name is the name field from build.zig.zon, asserted to be at most 32
/// bytes and assumed be a valid zig identifier
/// * semver is the version field from build.zig.zon, asserted to be at
/// most 32 bytes
/// * hashplus is the following 33-byte array, base64 encoded using -_ to make
/// it filesystem safe:
/// - (4 bytes) LE u32 Package ID
/// - (4 bytes) LE u32 total decompressed size in bytes, overflow saturated
/// - (25 bytes) truncated SHA-256 digest of hashed files of the package
pub fn init(digest: Digest, name: []const u8, ver: []const u8, id: u32, size: u32) Hash {
assert(name.len <= 32);
assert(ver.len <= 32);
var result: Hash = undefined;
var buf: std.ArrayListUnmanaged(u8) = .initBuffer(&result.bytes);
buf.appendSliceAssumeCapacity(name);
buf.appendAssumeCapacity('-');
buf.appendSliceAssumeCapacity(ver);
buf.appendAssumeCapacity('-');
var hashplus: [33]u8 = undefined;
std.mem.writeInt(u32, hashplus[0..4], id, .little);
std.mem.writeInt(u32, hashplus[4..8], size, .little);
hashplus[8..].* = digest[0..25].*;
_ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(44), &hashplus);
@memset(buf.unusedCapacitySlice(), 0);
return result;
}
/// Produces a unique hash based on the path provided. The result should
/// not be user-visible.
pub fn initPath(sub_path: []const u8, is_global: bool) Hash {
var result: Hash = .{ .bytes = @splat(0) };
var i: usize = 0;
if (is_global) {
result.bytes[0] = '/';
i += 1;
}
if (i + sub_path.len <= result.bytes.len) {
@memcpy(result.bytes[i..][0..sub_path.len], sub_path);
return result;
}
var bin_digest: [Algo.digest_length]u8 = undefined;
Algo.hash(sub_path, &bin_digest, .{});
_ = std.fmt.bufPrint(result.bytes[i..], "{}", .{std.fmt.fmtSliceHexLower(&bin_digest)}) catch unreachable;
return result;
}
};
pub const MultihashFunction = enum(u16) {
identity = 0x00,
sha1 = 0x11,
@"sha2-256" = 0x12,
@"sha2-512" = 0x13,
@"sha3-512" = 0x14,
@"sha3-384" = 0x15,
@"sha3-256" = 0x16,
@"sha3-224" = 0x17,
@"sha2-384" = 0x20,
@"sha2-256-trunc254-padded" = 0x1012,
@"sha2-224" = 0x1013,
@"sha2-512-224" = 0x1014,
@"sha2-512-256" = 0x1015,
@"blake2b-256" = 0xb220,
_,
};
pub const multihash_function: MultihashFunction = switch (Hash.Algo) {
std.crypto.hash.sha2.Sha256 => .@"sha2-256",
else => unreachable,
};
pub fn multiHashHexDigest(digest: Hash.Digest) MultiHashHexDigest {
const hex_charset = std.fmt.hex_charset;
var result: MultiHashHexDigest = undefined;
result[0] = hex_charset[@intFromEnum(multihash_function) >> 4];
result[1] = hex_charset[@intFromEnum(multihash_function) & 15];
result[2] = hex_charset[Hash.Algo.digest_length >> 4];
result[3] = hex_charset[Hash.Algo.digest_length & 15];
for (digest, 0..) |byte, i| {
result[4 + i * 2] = hex_charset[byte >> 4];
result[5 + i * 2] = hex_charset[byte & 15];
}
return result;
}
comptime {
// We avoid unnecessary uleb128 code in hexDigest by asserting here the
// values are small enough to be contained in the one-byte encoding.
assert(@intFromEnum(multihash_function) < 127);
assert(Hash.Algo.digest_length < 127);
}
test Hash {
const example_digest: Hash.Digest = .{
0xc7, 0xf5, 0x71, 0xb7, 0xb4, 0xe7, 0x6f, 0x3c, 0xdb, 0x87, 0x7a, 0x7f, 0xdd, 0xf9, 0x77, 0x87,
0x9d, 0xd3, 0x86, 0xfa, 0x73, 0x57, 0x9a, 0xf7, 0x9d, 0x1e, 0xdb, 0x8f, 0x3a, 0xd9, 0xbd, 0x9f,
};
const result: Hash = .init(example_digest, "nasm", "2.16.1-3", 0xcafebabe, 10 * 1024 * 1024);
try std.testing.expectEqualStrings("nasm-2.16.1-3-vrr-ygAAoADH9XG3tOdvPNuHen_d-XeHndOG-nNXmved", result.toSlice());
}
test {
_ = Fetch;
}