Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
root committed Dec 18, 2019
1 parent a5ca93f commit d6c5ca3
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 0 deletions.
15 changes: 15 additions & 0 deletions example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/usr/bin/env python3

import zreader
import ujson as json



# Adjust chunk_size as necessary -- defaults to 16,384 if not specified
zreader = zreader.Zreader("reddit_data.zst", chunk_size=8192)

# Read each line from the reader
for line in zreader.read():
obj = json.loads(line)
print (obj['author'], obj['subreddit'], sep=",")

Binary file added reddit_data.zst
Binary file not shown.
28 changes: 28 additions & 0 deletions zreader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import zstandard as zstd



class Zreader:

def __init__(self, file, chunk_size=16384):
'''Init method'''
self.fh = open(file,'rb')
self.chunk_size = chunk_size
self.dctx = zstd.ZstdDecompressor()
self.reader = self.dctx.stream_reader(self.fh)
self.buffer = ''


def read(self):
'''Generator method that creates an iterator for each line of JSON'''
while True:
chunk = self.reader.read(self.chunk_size).decode()
if not chunk:
break
lines = (self.buffer + chunk).split("\n")

for line in lines[:-1]:
yield line

self.buffer = lines[-1]

0 comments on commit d6c5ca3

Please sign in to comment.