Skip to content

Commit

Permalink
Unneeeded rider changes
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Nov 30, 2024
1 parent aa14a00 commit 8d172bc
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 68 deletions.
26 changes: 6 additions & 20 deletions src/csi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,8 @@ export default class CSI extends IndexFile {
return { lineCount }
}

async blocksForRangeForRefId(
refId: number,
async blocksForRange(
refName: string,
min: number,
max: number,
opts: Options = {},
Expand All @@ -207,6 +207,10 @@ export default class CSI extends IndexFile {
}

const indexData = await this.parse(opts)
const refId = indexData.refNameToId[refName]
if (refId === undefined) {
return []
}
const ba = indexData.indices[refId]
if (!ba) {
return []
Expand All @@ -231,24 +235,6 @@ export default class CSI extends IndexFile {
return optimizeChunks(chunks, new VirtualOffset(0, 0))
}

async blocksForRange(
refName: string,
min: number,
max: number,
opts: Options = {},
) {
if (min < 0) {
min = 0
}

const indexData = await this.parse(opts)
const refId = indexData.refNameToId[refName]
if (refId === undefined) {
return []
}
return this.blocksForRangeForRefId(refId, min, max, opts)
}

/**
* calculate the list of bins that may overlap with region [beg,end) (zero-based half-open)
*/
Expand Down
7 changes: 0 additions & 7 deletions src/indexFile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,6 @@ export default abstract class IndexFile {
return rest
}

public abstract blocksForRangeForRefId(
refId: number,
start: number,
end: number,
opts: Options,
): Promise<Chunk[]>

public abstract blocksForRange(
refName: string,
start: number,
Expand Down
52 changes: 20 additions & 32 deletions src/tabixIndexedFile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,6 @@ export default class TabixIndexedFile {

// now go through each chunk and parse and filter the lines out of it
for (const c of chunks) {
let previousStartCoordinate: number | undefined
const { buffer, cpositions, dpositions } = await this.chunkCache.get(
c.toString(),
c,
Expand All @@ -209,9 +208,11 @@ export default class TabixIndexedFile {
let blockStart = 0
let pos = 0

// fast path, Buffer is just ASCII chars, process directly
const str = decoder?.decode(buffer) ?? buffer.toString()
const strIsASCII = isASCII(str)
// fast path, Buffer is just ASCII chars and not gigantor, can be
// converted to string and processed directly. if it is not ASCII or
// gigantic (chrome max str len is 512Mb), we have to decode line by line
const strIsASCII = buffer.length < 500_000_000 && isASCII(str)
while (blockStart < str.length) {
let line: string
let n: number
Expand All @@ -226,7 +227,7 @@ export default class TabixIndexedFile {
if (n === -1) {
break
}
const b = buffer.subarray(blockStart, n)
const b = buffer.slice(blockStart, n)
line = decoder?.decode(b) ?? b.toString()
}

Expand All @@ -245,39 +246,29 @@ export default class TabixIndexedFile {
line,
)

// do a small check just to make sure that the lines are really sorted
// by start coordinate
if (
previousStartCoordinate !== undefined &&
startCoordinate !== undefined &&
previousStartCoordinate > startCoordinate
) {
throw new Error(
`Lines not sorted by start coordinate (${previousStartCoordinate} > ${startCoordinate}), this file is not usable with Tabix.`,
)
}
previousStartCoordinate = startCoordinate

if (overlaps) {
callback(
line,
// cpositions[pos] refers to actual file offset of a bgzip block boundaries
// cpositions[pos] refers to actual file offset of a bgzip block
// boundaries
//
// we multiply by (1 <<8) in order to make sure each block has a "unique"
// address space so that data in that block could never overlap
// we multiply by (1 <<8) in order to make sure each block has a
// "unique" address space so that data in that block could never
// overlap
//
// then the blockStart-dpositions is an uncompressed file offset from
// that bgzip block boundary, and since the cpositions are multiplied by
// (1 << 8) these uncompressed offsets get a unique space
// then the blockStart-dpositions is an uncompressed file offset
// from that bgzip block boundary, and since the cpositions are
// multiplied by (1 << 8) these uncompressed offsets get a unique
// space
cpositions[pos]! * (1 << 8) +
(blockStart - dpositions[pos]!) +
c.minv.dataPosition +
1,
)
} else if (startCoordinate !== undefined && startCoordinate >= end) {
// the lines were overlapping the region, but now have stopped, so
// we must be at the end of the relevant data and we can stop
// processing data now
// the lines were overlapping the region, but now have stopped, so we
// must be at the end of the relevant data and we can stop processing
// data now
return
}
blockStart = n + 1
Expand All @@ -294,12 +285,9 @@ export default class TabixIndexedFile {
* bytes up to the first non-meta line
*/
async getHeaderBuffer(opts: Options = {}) {
const {
firstDataLine,
metaChar: m,
maxBlockSize,
} = await this.getMetadata(opts)
const metaChar = m || '@'
const { firstDataLine, metaChar, maxBlockSize } =
await this.getMetadata(opts)

checkAbortSignal(opts.signal)

// eslint-disable-next-line @typescript-eslint/restrict-plus-operands
Expand Down
9 changes: 0 additions & 9 deletions src/tbi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -199,15 +199,6 @@ export default class TabixIndex extends IndexFile {
if (refId === undefined) {
return []
}
return this.blocksForRangeForRefId(refId, min, max, opts)
}
async blocksForRangeForRefId(
refId: number,
min: number,
max: number,
opts: Options = {},
) {
const indexData = await this.parse(opts)
const ba = indexData.indices[refId]
if (!ba) {
return []
Expand Down

0 comments on commit 8d172bc

Please sign in to comment.