Skip to content

Commit

Permalink
Reverts
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Nov 30, 2024
1 parent 586e48a commit 62131a8
Show file tree
Hide file tree
Showing 15 changed files with 1,634 additions and 2,550 deletions.
16 changes: 16 additions & 0 deletions .eslintrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"parser": "@typescript-eslint/parser",
"plugins": ["prettier", "@typescript-eslint"],
"extends": [
"plugin:@typescript-eslint/recommended",
"plugin:prettier/recommended"
],
"rules": {
"no-underscore-dangle": 0,
"curly": "error",
"@typescript-eslint/no-explicit-any": 0,
"@typescript-eslint/explicit-module-boundary-types": 0,
"@typescript-eslint/ban-ts-comment": 0,
"semi": ["error", "never"]
}
}
10 changes: 5 additions & 5 deletions .github/workflows/push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ on: push

jobs:
test:
name: Lint, build, and test on node 20.x and ubuntu-latest
name: Lint, build, and test on node 14.x and ubuntu-latest
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Use Node.js 20.x
uses: actions/setup-node@v4
- uses: actions/checkout@v2
- name: Use Node.js 14.x
uses: actions/setup-node@v1
with:
node-version: 20.x
node-version: 14.x
- name: Install deps (with cache)
uses: bahmutov/npm-install@v1
- name: Lint codebase
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
[![Coverage Status](https://img.shields.io/codecov/c/github/GMOD/bgzf-filehandle/master.svg?style=flat-square)](https://codecov.io/gh/GMOD/bgzf-filehandle/branch/master)
[![Build Status](https://img.shields.io/github/actions/workflow/status/GMOD/bgzf-filehandle/push.yml?branch=master)](https://github.com/GMOD/bgzf-filehandle/actions)

Transparently read [indexed block-gzipped (BGZF)](http://www.htslib.org/doc/bgzip.html) files, such as those created by bgzip, using coordinates from the uncompressed file. The module is used in @gmod/indexedfasta to read bgzip-indexed fasta files (with gzi index, fai index, and fa).
Transparently read [indexed block-gzipped (BGZF)](http://www.htslib.org/doc/bgzip.html) files, such as those created by bgzip, using coordinates from the uncompressed file. The module is used in @gmod/indexedfasta to read bgzip-indexed fasta files (with gzi index, fai index, and fa).

Users can also use the `unzip` function to unzip bgzip files whole (which pako has trouble with natively)

Expand Down
5 changes: 5 additions & 0 deletions jest.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/** @type {import('ts-jest/dist/types').InitialOptionsTsJest} */
module.exports = {
preset: 'ts-jest',
testEnvironment: 'node',
};
39 changes: 20 additions & 19 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@gmod/bgzf-filehandle",
"version": "1.5.2",
"version": "1.4.7",
"description": "read from a compressed bgzip file (with .gzi) as if it were uncompressed",
"license": "MIT",
"repository": "gmod/bgzf-filehandle",
Expand All @@ -20,14 +20,14 @@
"src"
],
"scripts": {
"test": "vitest",
"lint": "eslint --report-unused-disable-directives --max-warnings 0",
"test": "jest",
"lint": "eslint src test",
"clean": "rimraf dist esm",
"prebuild": "yarn clean",
"prebuild": "npm run clean",
"build:esm": "tsc --target es2018 --outDir esm",
"build:es5": "tsc --target es2015 --module commonjs --outDir dist",
"build": "yarn build:esm && yarn build:es5",
"prepublishOnly": "yarn test --run && yarn build",
"build:es5": "tsc --target es5 --outDir dist",
"build": "npm run build:esm && npm run build:es5",
"prepublishOnly": "npm test && npm run build",
"postversion": "git push --follow-tags"
},
"keywords": [
Expand All @@ -42,25 +42,26 @@
},
"devDependencies": {
"@types/es6-promisify": "^6.0.0",
"@types/jest": "^29.5.2",
"@types/long": "^4.0.1",
"@types/node": "^22.10.1",
"@types/node": "^18.11.16",
"@types/pako": "^2.0.0",
"@typescript-eslint/eslint-plugin": "^8.16.0",
"@typescript-eslint/parser": "^8.16.0",
"eslint": "^9.7.0",
"@typescript-eslint/eslint-plugin": "^5.59.9",
"@typescript-eslint/parser": "^5.59.9",
"eslint": "^8.42.0",
"eslint-config-prettier": "^8.8.0",
"eslint-plugin-import": "^2.27.5",
"eslint-plugin-unicorn": "^56.0.1",
"prettier": "^3.2.5",
"rimraf": "^6.0.1",
"typescript": "^5.1.3",
"typescript-eslint": "^8.16.0",
"vitest": "^2.1.6"
"eslint-plugin-prettier": "^4.2.1",
"jest": "^29.5.0",
"prettier": "^2.8.8",
"rimraf": "^5.0.1",
"ts-jest": "^29.1.0",
"typescript": "^5.1.3"
},
"publishConfig": {
"access": "public"
},
"browser": {
"./esm/unzip.js": "./esm/unzip-pako.js",
"./dist/unzip.js": "./dist/unzip-pako.js"
"./esm/unzip.js": "./esm/unzip-pako.js"
}
}
17 changes: 6 additions & 11 deletions src/bgzFilehandle.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,7 @@ export default class BgzFilehandle {
async getUncompressedFileSize() {
// read the last block's ISIZE (see gzip RFC),
// and add it to its uncompressedPosition
const ret = await this.gzi.getLastBlock()
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (!ret) {
throw new Error('no blocks')
}
const [, uncompressedPosition] = ret
const [, uncompressedPosition] = await this.gzi.getLastBlock()

const { size } = await this.filehandle.stat()

Expand All @@ -72,13 +67,12 @@ export default class BgzFilehandle {

async _readAndUncompressBlock(
blockBuffer: Buffer,
[compressedPosition]: [number, number],
[nextCompressedPosition]: [number, number],
[compressedPosition]: [number],
[nextCompressedPosition]: [number],
) {
let next = nextCompressedPosition
if (!next) {
const stat = await this.filehandle.stat()
next = stat.size
next = (await this.filehandle.stat()).size
}

// read the compressed data into the block buffer
Expand All @@ -96,7 +90,7 @@ export default class BgzFilehandle {
blockBuffer.slice(0, blockCompressedLength),
)

return unzippedBuffer
return unzippedBuffer as Buffer
}

async read(buf: Buffer, offset: number, length: number, position: number) {
Expand All @@ -114,6 +108,7 @@ export default class BgzFilehandle {
blockNum < blockPositions.length - 1;
blockNum += 1
) {
// eslint-disable-next-line no-await-in-loop
const uncompressedBuffer = await this._readAndUncompressBlock(
blockBuffer,
blockPositions[blockNum],
Expand Down
22 changes: 12 additions & 10 deletions src/gziIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const UNCOMPRESSED_POSITION = 1
export default class GziIndex {
filehandle: GenericFilehandle

index?: Promise<[number, number][]>
index?: any

constructor({
filehandle,
Expand Down Expand Up @@ -46,15 +46,15 @@ export default class GziIndex {
return this.index
}

async _readIndex(): Promise<[number, number][]> {
async _readIndex() {
let buf = Buffer.allocUnsafe(8)
await this.filehandle.read(buf, 0, 8, 0)
const numEntries = this._readLongWithOverflow(buf, 0, true)
if (!numEntries) {
return [[0, 0]]
}

const entries = new Array(numEntries + 1) as [number, number][]
const entries = new Array(numEntries + 1)
entries[0] = [0, 0]

// TODO rewrite this to make an index-index that stays in memory
Expand All @@ -81,7 +81,10 @@ export default class GziIndex {

async getLastBlock() {
const entries = await this._getIndex()
return entries.at(-1)!
if (!entries.length) {
return undefined
}
return entries[entries.length - 1]
}

async getRelevantBlocksForRead(length: number, position: number) {
Expand All @@ -90,13 +93,12 @@ export default class GziIndex {
return []
}
const entries = await this._getIndex()
const relevant = [] as [number, number][]
const relevant = []

// binary search to find the block that the
// read starts in and extend forward from that
const compare = (entry: number[], nextEntry: number[]) => {
const compare = (entry: any, nextEntry: any) => {
const uncompressedPosition = entry[UNCOMPRESSED_POSITION]
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
const nextUncompressedPosition = nextEntry
? nextEntry[UNCOMPRESSED_POSITION]
: Infinity
Expand Down Expand Up @@ -142,9 +144,9 @@ export default class GziIndex {
break
}
}
// if (relevant.at(-1)![UNCOMPRESSED_POSITION] < endPosition) {
// relevant.push([])
// }
if (relevant[relevant.length - 1][UNCOMPRESSED_POSITION] < endPosition) {
relevant.push([])
}
return relevant
}
}
6 changes: 4 additions & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
export { default as BgzfFilehandle } from './bgzFilehandle'
export { unzip, unzipChunkSlice, unzipChunk } from './unzip'
import BgzfFilehandle from './bgzFilehandle'
import { unzip, unzipChunk, unzipChunkSlice } from './unzip'

export { BgzfFilehandle, unzip, unzipChunk, unzipChunkSlice }
40 changes: 15 additions & 25 deletions src/unzip-pako.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,9 @@ interface Chunk {
}

// browserify-zlib, which is the zlib shim used by default in webpacked code,
// does not properly uncompress bgzf chunks that contain more than one bgzf
// block, so export an unzip function that uses pako directly if we are running
// in a browser.
//
//

// does not properly uncompress bgzf chunks that contain more than
// one bgzf block, so export an unzip function that uses pako directly
// if we are running in a browser.
async function unzip(inputData: Buffer) {
try {
let strm
Expand All @@ -31,7 +28,6 @@ async function unzip(inputData: Buffer) {
inflator = new Inflate()
//@ts-ignore
;({ strm } = inflator)

inflator.push(remainingInput, Z_SYNC_FLUSH)
if (inflator.err) {
throw new Error(inflator.msg)
Expand All @@ -49,21 +45,20 @@ async function unzip(inputData: Buffer) {
offset += chunks[i].length
}
return Buffer.from(result)
} catch (error) {
} catch (e) {
//cleanup error message
if (`${error}`.includes('incorrect header check')) {
if (`${e}`.match(/incorrect header check/)) {
throw new Error(
'problem decompressing block: incorrect gzip header check',
)
}
throw error
throw e
}
}

// similar to pakounzip, except it does extra counting to return the positions
// of compressed and decompressed data offsets
//

// similar to pakounzip, except it does extra counting
// to return the positions of compressed and decompressed
// data offsets
async function unzipChunk(inputData: Buffer) {
try {
let strm
Expand All @@ -77,7 +72,6 @@ async function unzipChunk(inputData: Buffer) {
const inflator = new Inflate()
// @ts-ignore
;({ strm } = inflator)

inflator.push(remainingInput, Z_SYNC_FLUSH)
if (inflator.err) {
throw new Error(inflator.msg)
Expand All @@ -95,21 +89,19 @@ async function unzipChunk(inputData: Buffer) {

const buffer = Buffer.concat(blocks)
return { buffer, cpositions, dpositions }
} catch (error) {
} catch (e) {
//cleanup error message
if (`${error}`.includes('incorrect header check')) {
if (`${e}`.match(/incorrect header check/)) {
throw new Error(
'problem decompressing block: incorrect gzip header check',
)
}
throw error
throw e
}
}

// similar to unzipChunk above but slices (0,minv.dataPosition) and
// (maxv.dataPosition,end) off
//

async function unzipChunkSlice(inputData: Buffer, chunk: Chunk) {
try {
let strm
Expand All @@ -127,7 +119,6 @@ async function unzipChunkSlice(inputData: Buffer, chunk: Chunk) {
const inflator = new Inflate()
// @ts-ignore
;({ strm } = inflator)

inflator.push(remainingInput, Z_SYNC_FLUSH)
if (inflator.err) {
throw new Error(inflator.msg)
Expand All @@ -145,7 +136,6 @@ async function unzipChunkSlice(inputData: Buffer, chunk: Chunk) {
len = chunks[0].length
}
const origCpos = cpos

cpos += strm.next_in
dpos += len

Expand Down Expand Up @@ -178,14 +168,14 @@ async function unzipChunkSlice(inputData: Buffer, chunk: Chunk) {
const buffer = Buffer.from(result)

return { buffer, cpositions, dpositions }
} catch (error) {
} catch (e) {
//cleanup error message
if (`${error}`.includes('incorrect header check')) {
if (`${e}`.match(/incorrect header check/)) {
throw new Error(
'problem decompressing block: incorrect gzip header check',
)
}
throw error
throw e
}
}

Expand Down
6 changes: 2 additions & 4 deletions src/unzip.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
import zlib from 'zlib'
import { Buffer } from 'buffer'
import { promisify } from 'es6-promisify'
import { pakoUnzip, unzipChunk, unzipChunkSlice } from './unzip-pako'

const gunzip = promisify(zlib.gunzip)

// in node, just use the native unzipping with Z_SYNC_FLUSH
function nodeUnzip(input: Buffer): Promise<Buffer> {
//@ts-ignore
return gunzip(input, {
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
finishFlush: (zlib.constants || zlib).Z_SYNC_FLUSH,
})
}

export { nodeUnzip as unzip, nodeUnzip }

export { pakoUnzip, unzipChunkSlice, unzipChunk } from './unzip-pako'
export { nodeUnzip as unzip, unzipChunk, unzipChunkSlice, nodeUnzip, pakoUnzip }
Loading

0 comments on commit 62131a8

Please sign in to comment.