Skip to content

Commit

Permalink
Merge pull request ziglang#14434 from FnControlOption/xz
Browse files Browse the repository at this point in the history
Add xz decoder

closes ziglang#14300
closes ziglang#2851
  • Loading branch information
andrewrk authored Jan 26, 2023
2 parents fcef728 + d0dedef commit 96a55f6
Show file tree
Hide file tree
Showing 32 changed files with 1,230 additions and 16 deletions.
2 changes: 2 additions & 0 deletions build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ pub fn build(b: *Builder) !void {
"compress-gettysburg.txt",
"compress-pi.txt",
"rfc1951.txt",
// exclude files from lib/std/compress/xz/testdata
".xz",
// exclude files from lib/std/tz/
".tzif",
// others
Expand Down
2 changes: 2 additions & 0 deletions lib/std/compress.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ const std = @import("std.zig");
pub const deflate = @import("compress/deflate.zig");
pub const gzip = @import("compress/gzip.zig");
pub const zlib = @import("compress/zlib.zig");
pub const xz = @import("compress/xz.zig");

pub fn HashedReader(
comptime ReaderType: anytype,
Expand Down Expand Up @@ -38,4 +39,5 @@ test {
_ = deflate;
_ = gzip;
_ = zlib;
_ = xz;
}
13 changes: 5 additions & 8 deletions lib/std/compress/gzip.zig
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//
// Decompressor for GZIP data streams (RFC1952)

const std = @import("std");
const std = @import("../std.zig");
const io = std.io;
const fs = std.fs;
const testing = std.testing;
Expand All @@ -17,10 +17,7 @@ const FCOMMENT = 1 << 4;

const max_string_len = 1024;

/// TODO: the fully qualified namespace to this declaration is
/// std.compress.gzip.GzipStream which has a redundant "gzip" in the name.
/// Instead, it should be `std.compress.gzip.Stream`.
pub fn GzipStream(comptime ReaderType: type) type {
pub fn Decompress(comptime ReaderType: type) type {
return struct {
const Self = @This();

Expand Down Expand Up @@ -154,14 +151,14 @@ pub fn GzipStream(comptime ReaderType: type) type {
};
}

pub fn gzipStream(allocator: mem.Allocator, reader: anytype) !GzipStream(@TypeOf(reader)) {
return GzipStream(@TypeOf(reader)).init(allocator, reader);
pub fn decompress(allocator: mem.Allocator, reader: anytype) !Decompress(@TypeOf(reader)) {
return Decompress(@TypeOf(reader)).init(allocator, reader);
}

fn testReader(data: []const u8, comptime expected: []const u8) !void {
var in_stream = io.fixedBufferStream(data);

var gzip_stream = try gzipStream(testing.allocator, in_stream.reader());
var gzip_stream = try decompress(testing.allocator, in_stream.reader());
defer gzip_stream.deinit();

// Read and decompress the whole file
Expand Down
145 changes: 145 additions & 0 deletions lib/std/compress/xz.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
const std = @import("std");
const block = @import("xz/block.zig");
const Allocator = std.mem.Allocator;
const Crc32 = std.hash.Crc32;

pub const Check = enum(u4) {
none = 0x00,
crc32 = 0x01,
crc64 = 0x04,
sha256 = 0x0A,
_,
};

fn readStreamFlags(reader: anytype, check: *Check) !void {
var bit_reader = std.io.bitReader(.Little, reader);

const reserved1 = try bit_reader.readBitsNoEof(u8, 8);
if (reserved1 != 0)
return error.CorruptInput;

check.* = @intToEnum(Check, try bit_reader.readBitsNoEof(u4, 4));

const reserved2 = try bit_reader.readBitsNoEof(u4, 4);
if (reserved2 != 0)
return error.CorruptInput;
}

pub fn decompress(allocator: Allocator, reader: anytype) !Decompress(@TypeOf(reader)) {
return Decompress(@TypeOf(reader)).init(allocator, reader);
}

pub fn Decompress(comptime ReaderType: type) type {
return struct {
const Self = @This();

pub const Error = ReaderType.Error || block.Decoder(ReaderType).Error;
pub const Reader = std.io.Reader(*Self, Error, read);

allocator: Allocator,
block_decoder: block.Decoder(ReaderType),
in_reader: ReaderType,

fn init(allocator: Allocator, source: ReaderType) !Self {
const magic = try source.readBytesNoEof(6);
if (!std.mem.eql(u8, &magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 }))
return error.BadHeader;

var check: Check = undefined;
const hash_a = blk: {
var hasher = std.compress.hashedReader(source, Crc32.init());
try readStreamFlags(hasher.reader(), &check);
break :blk hasher.hasher.final();
};

const hash_b = try source.readIntLittle(u32);
if (hash_a != hash_b)
return error.WrongChecksum;

return Self{
.allocator = allocator,
.block_decoder = try block.decoder(allocator, source, check),
.in_reader = source,
};
}

pub fn deinit(self: *Self) void {
self.block_decoder.deinit();
}

pub fn reader(self: *Self) Reader {
return .{ .context = self };
}

pub fn read(self: *Self, buffer: []u8) Error!usize {
if (buffer.len == 0)
return 0;

const r = try self.block_decoder.read(buffer);
if (r != 0)
return r;

const index_size = blk: {
var hasher = std.compress.hashedReader(self.in_reader, Crc32.init());
hasher.hasher.update(&[1]u8{0x00});

var counter = std.io.countingReader(hasher.reader());
counter.bytes_read += 1;

const counting_reader = counter.reader();

const record_count = try std.leb.readULEB128(u64, counting_reader);
if (record_count != self.block_decoder.block_count)
return error.CorruptInput;

var i: usize = 0;
while (i < record_count) : (i += 1) {
// TODO: validate records
_ = try std.leb.readULEB128(u64, counting_reader);
_ = try std.leb.readULEB128(u64, counting_reader);
}

while (counter.bytes_read % 4 != 0) {
if (try counting_reader.readByte() != 0)
return error.CorruptInput;
}

const hash_a = hasher.hasher.final();
const hash_b = try counting_reader.readIntLittle(u32);
if (hash_a != hash_b)
return error.WrongChecksum;

break :blk counter.bytes_read;
};

const hash_a = try self.in_reader.readIntLittle(u32);

const hash_b = blk: {
var hasher = std.compress.hashedReader(self.in_reader, Crc32.init());
const hashed_reader = hasher.reader();

const backward_size = (try hashed_reader.readIntLittle(u32) + 1) * 4;
if (backward_size != index_size)
return error.CorruptInput;

var check: Check = undefined;
try readStreamFlags(hashed_reader, &check);

break :blk hasher.hasher.final();
};

if (hash_a != hash_b)
return error.WrongChecksum;

const magic = try self.in_reader.readBytesNoEof(2);
if (!std.mem.eql(u8, &magic, &.{ 'Y', 'Z' }))
return error.CorruptInput;

return 0;
}
};
}

test {
_ = @import("xz/test.zig");
}
Loading

0 comments on commit 96a55f6

Please sign in to comment.