From d299d6732e1e7879b0c99fe4c948e57a2ad059ac Mon Sep 17 00:00:00 2001 From: TalonTrecek Date: Mon, 21 Apr 2025 16:45:06 +0000 Subject: [PATCH 01/16] Add support for generating fastxpp format --- ishlib/formats/generate_fastxpp.mojo | 115 +++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 ishlib/formats/generate_fastxpp.mojo diff --git a/ishlib/formats/generate_fastxpp.mojo b/ishlib/formats/generate_fastxpp.mojo new file mode 100644 index 0000000..a8b0e4a --- /dev/null +++ b/ishlib/formats/generate_fastxpp.mojo @@ -0,0 +1,115 @@ +import sys +from collections import Optional +from ExtraMojo.io.buffered import BufferedReader, BufferedWriter + +# ---------- helpers ------------------------------------------------- + + +fn string_count(s: String) -> Int: + var n: Int = 0 + for _ in s.codepoints(): + n = n + 1 + return n + + +fn read_line(mut rdr: BufferedReader) raises -> String: + var buf = List[UInt8]() + var n = rdr.read_until(buf, ord("\n")) + if n == 0: + return "" + var s = String() + s.write_bytes(Span(buf)) + return s + + +# ---------- FASTX++ builder ----------------------------------------- + + +fn generate_fastxpp( + marker: String, + header: String, + seq_lines: List[String], + qualities: Optional[List[String]] = None, +) -> String: + var seq_len: Int = 0 + for i in range(len(seq_lines)): + seq_len = seq_len + string_count(seq_lines[i]) + + var meta = String(string_count(header)) + ":" + String( + seq_len + ) + ":" + String(len(seq_lines)) + + var rec = marker + "`" + meta + "`" + header + "\n" + + for i in range(len(seq_lines)): + rec.write(seq_lines[i], "\n") + + if qualities: + var q = qualities.value() + rec += "+\n" + for i in range(len(q)): + rec.write(q[i], "\n") + + return rec + + +# ---------- main ---------------------------------------------------- + + +fn main() raises: + var argv = sys.argv() + if len(argv) != 3: + print( + "Usage: mojo run generate_fastxpp.mojo " + " " + ) + return + + var reader = BufferedReader( + open(String(argv[1]), "r"), buffer_capacity=128 * 1024 + ) + var writer = BufferedWriter( + open(String(argv[2]), "w"), buffer_capacity=128 * 1024 + ) + + var pending_header = String() # carries a header we already read + + while True: + var header_line = pending_header + if header_line == "": + header_line = read_line(reader) + pending_header = String() + + if header_line == "": + break + + var marker = String(header_line[0:1]) + var header = String(header_line[1:]) + + var seq = List[String]() + var line: String + + while True: + line = read_line(reader) + if line == "": + break + if ( + line.startswith(">") + or line.startswith("@") + or (marker == "@" and line.startswith("+")) + ): + pending_header = line # save for the next record + break + seq.append(line) + + var qual: Optional[List[String]] = None + if marker == "@" and line.startswith("+"): + var qlines = List[String]() + for _ in range(len(seq)): + qlines.append(read_line(reader)) + qual = Optional[List[String]](qlines) + + writer.write(generate_fastxpp(marker, header, seq, qual)) + + writer.flush() + writer.close() From 250ef261c646042f72dd3577cd1be934ec9f47a0 Mon Sep 17 00:00:00 2001 From: TalonTrecek Date: Mon, 21 Apr 2025 16:45:18 +0000 Subject: [PATCH 02/16] Add support for reading fastxpp format --- ishlib/vendor/kseq.mojo | 174 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 173 insertions(+), 1 deletion(-) diff --git a/ishlib/vendor/kseq.mojo b/ishlib/vendor/kseq.mojo index 2af6dcb..70024c6 100644 --- a/ishlib/vendor/kseq.mojo +++ b/ishlib/vendor/kseq.mojo @@ -20,6 +20,7 @@ def main(): print(count, slen, qlen, sep="\t") ``` """ +import sys from memory import UnsafePointer, memcpy from utils import StringSlice @@ -36,6 +37,59 @@ alias ASCII_FASTA_RECORD_START = ord(">") alias ASCII_FASTQ_RECORD_START = ord("@") alias ASCII_FASTQ_SEPARATOR = ord("+") +# ────────────────────────────────────────────────────────────── +# Helpers for reading in fastx++ files +# ────────────────────────────────────────────────────────────── + + +@always_inline +fn strip_newlines_in_place( + mut bs: ByteString, disk: Int, expected: Int +) -> Bool: + """Compact `bs` by removing every `\n` byte in‑place; return True if the + resulting length equals `expected`. + SIMD search for newline, shifts the bytes to the left, and resizes the buffer. + Avoids allocating a new buffer and copying the data. + + bs: Mutable buffer that already holds the raw FASTQ/FASTA chunk just read from disk + disk: The number of bytes that were actually read into bs + expected: how many bases/quality bytes should remain after stripping newlines; + used as a quick integrity check. + + Returns: + True if the resulting buffer's length equals `expected`, False otherwise. + """ + var read_pos: Int = 0 + var write_pos: Int = 0 + while read_pos < disk: + var span_rel = memchr[do_alignment=True]( + Span[UInt8, __origin_of(bs.ptr)]( + ptr=bs.ptr.offset(read_pos), length=disk - read_pos + ), + UInt8(ASCII_NEWLINE), + ) + # If there are no new lines we dont have to adjust buffer + # If there was newlines, compute the contiguous span without newlines + var end_pos = disk if span_rel == -1 else read_pos + span_rel + var span_len = end_pos - read_pos + if span_len > 0 and write_pos != read_pos: + memcpy(bs.ptr.offset(write_pos), bs.ptr.offset(read_pos), span_len) + write_pos += span_len + read_pos = end_pos + 1 # skip the '\n' (or exit loop if none) + bs.resize(write_pos) + return write_pos == expected + + +# This is long way of https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/ +fn ascii_to_int[O: Origin](buf: Span[UInt8, O], s: Int, e: Int) -> Int: + var v: Int = 0 + for i in range(s, e): + v = v * 10 + Int(buf[i] - ord("0")) + return v + + +# ────────────────────────────────────────────────────────────── + @value @register_passable("trivial") @@ -384,7 +438,7 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): if c < 0: return Int(c) # EOF Error - # Reset all members + # Reset all buffers for reuse self.seq.clear() self.qual.clear() self.comment.clear() @@ -455,3 +509,121 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): if len(self.qual) != len(self.seq): return -2 # error: qual string is of different length return len(self.seq) + + fn read_fastxpp(mut self) raises -> Int: + # Locate next header + if self.last_char == 0: + var c = self.reader.read_byte() + while ( + c >= 0 + and c != ASCII_FASTA_RECORD_START + and c != ASCII_FASTQ_RECORD_START + ): + c = self.reader.read_byte() + if c < 0: + return Int(c) # EOF + else: + var c = self.last_char + self.last_char = 0 + + # Clear buffers for this record + self.seq.clear() + self.qual.clear() + self.comment.clear() + self.name.clear() + + # Read header line (without copying '\n') + var r = self.reader.read_until[SearchChar.Newline](self.name, 0, False) + if r < 0: + return Int(r) + + var hdr = self.name.as_span() + if len(hdr) == 0 or hdr[0] != UInt8(ord("`")): + return -3 # not FASTX++ header + + # Find closing back‑tick with one memchr + var after = Span[UInt8, __origin_of(hdr)]( + ptr=hdr.unsafe_ptr().offset(1), length=len(hdr) - 1 + ) + var rel = memchr(after, UInt8(ord("`"))) + if rel == -1: + return -3 + var pos2 = rel + 1 + + # Parse hlen, slen, lcnt in one pass + var hlen: Int = 0 + var slen: Int = 0 + var lcnt: Int = 0 + var cur: Int = 0 + var which: Int = 0 + for i in range(1, pos2): + var ch = hdr[i] + if ch == UInt8(ord(":")): + if which == 0: + hlen = cur + elif which == 1: + slen = cur + cur = 0 + which += 1 + else: + cur = cur * 10 + Int(ch - UInt8(ord("0"))) + lcnt = cur + + # Validate header length and resize name + if len(hdr) - (pos2 + 1) != hlen: + return -3 + self.name.resize(hlen) + + # ── Sequence block ──────────────────────────────────────────── + var disk = slen + lcnt # bytes on disk (bases+LFs) + var need = disk # local copy; will be mutated + self.seq.clear() + self.seq.reserve(need) + var got = self.reader.read_bytes(self.seq, need) + if got != disk: + return -3 + if not strip_newlines_in_place(self.seq, disk, slen): + return -2 + + # ── Quality block (FASTQ) ───────────────────────────────────── + if hdr[0] == UInt8(ASCII_FASTQ_RECORD_START): + if ( + self.reader.read_byte() != ASCII_NEWLINE + ): # consume LF after '+' line + return -3 + var needq = disk # separate mutable copy + self.qual.clear() + self.qual.reserve(needq) + var gotq = self.reader.read_bytes(self.qual, needq) + if gotq != disk: + return -3 + if not strip_newlines_in_place(self.qual, disk, slen): + return -2 + + return len(self.seq) + + +# ────────────────────────────────────────────────────────────── +# Main for debugging +# ────────────────────────────────────────────────────────────── +# +# fn main() raises: +# var argv = sys.argv() +# if len(argv) != 2: +# print("Usage: mojo run kseq.mojo ") +# return + +# var fh = open(String(argv[1]), "r") +# var reader = FastxReader[read_comment=False]( +# BufferedReader(FileReader(fh^)) # move FileHandle into FileReader +# ) + +# var code = reader.read_fastxpp() +# print("first‑read returned", code) +# var count = 0 +# while True: +# var n = reader.read_fastxpp() +# if n < 0: +# break +# count += 1 +# print("rec#", count, "seq_len", n, "hdr_len", len(reader.name)) From 0d210b46ef25938e43f05d72d545db8210e132ef Mon Sep 17 00:00:00 2001 From: TalonTrecek Date: Mon, 21 Apr 2025 16:45:30 +0000 Subject: [PATCH 03/16] Add file for benchmarking --- ishlib/formats/fastxpp_bench.mojo | 72 +++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 ishlib/formats/fastxpp_bench.mojo diff --git a/ishlib/formats/fastxpp_bench.mojo b/ishlib/formats/fastxpp_bench.mojo new file mode 100644 index 0000000..dab0f61 --- /dev/null +++ b/ishlib/formats/fastxpp_bench.mojo @@ -0,0 +1,72 @@ +import sys +from time.time import perf_counter +from ishlib.vendor.kseq import FastxReader, BufferedReader, KRead + + +# ── thin wrapper so FileHandle implements KRead ───────────────────────── +struct FileReader(KRead): + var fh: FileHandle + + fn __init__(out self, owned fh: FileHandle): + self.fh = fh^ + + fn __moveinit__(out self, owned other: Self): + self.fh = other.fh^ + + fn unbuffered_read[ + o: MutableOrigin + ](mut self, buffer: Span[UInt8, o]) raises -> Int: + return Int(self.fh.read(buffer.unsafe_ptr(), len(buffer))) + + +# ──────────────────────────────────────────────────────────────────────── + + +fn bench_original(path: String) raises -> (Int, Int, Float64): + var fh = open(path, "r") + var rdr = FastxReader[read_comment=False](BufferedReader(FileReader(fh^))) + var rec = 0 + var seq = 0 + var t0 = perf_counter() + while rdr.read() > 0: + rec += 1 + seq += len(rdr.seq) + return (rec, seq, perf_counter() - t0) + + +fn bench_fastxpp(path: String) raises -> (Int, Int, Float64): + var fh = open(path, "r") + var rdr = FastxReader[read_comment=False](BufferedReader(FileReader(fh^))) + var rec = 0 + var seq = 0 + var t0 = perf_counter() + while True: + var n = rdr.read_fastxpp() + if n < 0: + break + rec += 1 + seq += n + return (rec, seq, perf_counter() - t0) + + +fn main() raises: + var argv = sys.argv() + if len(argv) < 2 or len(argv) > 3: + print("Usage: mojo run fastxpp_bench.mojo [fastxpp]") + return + + var path = String(argv[1]) + var use_fast = (len(argv) == 3) and (String(argv[2]) == "fastxpp") + + if use_fast: + var tup = bench_fastxpp(path) # (Int, Int, Float64) + var r = tup[0] # records + var s = tup[1] # bases + var t = tup[2] # seconds + print("mode=fastxpp records=", r, " bases=", s, " time=", t, "s") + else: + var tup = bench_original(path) + var r = tup[0] + var s = tup[1] + var t = tup[2] + print("mode=orig records=", r, " bases=", s, " time=", t, "s") From f229a572831077b366a01f2ba812c90f3c650e22 Mon Sep 17 00:00:00 2001 From: TalonTrecek Date: Mon, 21 Apr 2025 17:47:29 +0000 Subject: [PATCH 04/16] Comments --- ishlib/vendor/kseq.mojo | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ishlib/vendor/kseq.mojo b/ishlib/vendor/kseq.mojo index 70024c6..4f1625f 100644 --- a/ishlib/vendor/kseq.mojo +++ b/ishlib/vendor/kseq.mojo @@ -59,8 +59,13 @@ fn strip_newlines_in_place( Returns: True if the resulting buffer's length equals `expected`, False otherwise. """ + # read_pos always starts the loop at the first byte that has not yet been examined. var read_pos: Int = 0 + # write_pos always starts at the first byte that has not yet been written into its final position var write_pos: Int = 0 + # Before the first newline, every byte is kept, so the pointers march together (no gap) + # After the first newline, the pointers may diverge, and we will need to copy bytes + while read_pos < disk: var span_rel = memchr[do_alignment=True]( Span[UInt8, __origin_of(bs.ptr)]( @@ -72,6 +77,8 @@ fn strip_newlines_in_place( # If there was newlines, compute the contiguous span without newlines var end_pos = disk if span_rel == -1 else read_pos + span_rel var span_len = end_pos - read_pos + # We only need to copy if there are newlines that would made gaps resulting in write_pos != read_pos + # See read_pos and write_pos comments above if span_len > 0 and write_pos != read_pos: memcpy(bs.ptr.offset(write_pos), bs.ptr.offset(read_pos), span_len) write_pos += span_len @@ -81,6 +88,8 @@ fn strip_newlines_in_place( # This is long way of https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/ +# not directly used, read_fastxpp uses cur = cur * 10 + Int(ch - UInt8(ord("0"))) directly +# Keeping here for comparison to SWAR implementation fn ascii_to_int[O: Origin](buf: Span[UInt8, O], s: Int, e: Int) -> Int: var v: Int = 0 for i in range(s, e): From 69830a3802f9b70f7ea5679aa02c3fa26f8a612b Mon Sep 17 00:00:00 2001 From: TalonTrecek Date: Wed, 23 Apr 2025 16:40:28 +0000 Subject: [PATCH 05/16] Add bpl --- ishlib/formats/fastxpp_bench.mojo | 77 ++++++++++++-- ishlib/vendor/kseq.mojo | 164 +++++++++++++++++++++++++----- 2 files changed, 207 insertions(+), 34 deletions(-) diff --git a/ishlib/formats/fastxpp_bench.mojo b/ishlib/formats/fastxpp_bench.mojo index dab0f61..db66a5a 100644 --- a/ishlib/formats/fastxpp_bench.mojo +++ b/ishlib/formats/fastxpp_bench.mojo @@ -1,6 +1,7 @@ import sys from time.time import perf_counter from ishlib.vendor.kseq import FastxReader, BufferedReader, KRead +from ExtraMojo.utils.ir import dump_ir # ── thin wrapper so FileHandle implements KRead ───────────────────────── @@ -49,24 +50,78 @@ fn bench_fastxpp(path: String) raises -> (Int, Int, Float64): return (rec, seq, perf_counter() - t0) +fn bench_fastxpp_bpl(path: String) raises -> (Int, Int, Float64): + var fh = open(path, "r") + var rdr = FastxReader[read_comment=False](BufferedReader(FileReader(fh^))) + var rec = 0 + var seq = 0 + var t0 = perf_counter() + while True: + var n = rdr.read_fastxpp_bpl() + if n < 0: + break + rec += 1 + seq += n + return (rec, seq, perf_counter() - t0) + + +fn bench_fastxpp_bpl2(path: String) raises -> (Int, Int, Float64): + var fh = open(path, "r") + var rdr = FastxReader[read_comment=False](BufferedReader(FileReader(fh^))) + var rec = 0 + var seq = 0 + var t0 = perf_counter() + while True: + var n = rdr.read_fastxpp_bpl() + if n < 0: + break + rec += 1 + seq += n + return (rec, seq, perf_counter() - t0) + + fn main() raises: var argv = sys.argv() if len(argv) < 2 or len(argv) > 3: - print("Usage: mojo run fastxpp_bench.mojo [fastxpp]") + print("Usage: mojo run fastxpp_bench.mojo [orig|fastxpp|bpl]") return var path = String(argv[1]) - var use_fast = (len(argv) == 3) and (String(argv[2]) == "fastxpp") - - if use_fast: - var tup = bench_fastxpp(path) # (Int, Int, Float64) - var r = tup[0] # records - var s = tup[1] # bases - var t = tup[2] # seconds - print("mode=fastxpp records=", r, " bases=", s, " time=", t, "s") - else: + var mode: String = "orig" # default when no flag given + if len(argv) == 3: + mode = String(argv[2]) + + if mode == "orig": var tup = bench_original(path) var r = tup[0] var s = tup[1] var t = tup[2] - print("mode=orig records=", r, " bases=", s, " time=", t, "s") + print( + "mode=orig records=", r, " bases=", s, " time=", t, "s" + ) + elif mode == "fastxpp": + var tup = bench_fastxpp(path) + var r = tup[0] + var s = tup[1] + var t = tup[2] + print( + "mode=fastxpp records=", r, " bases=", s, " time=", t, "s" + ) + elif mode == "bpl": + var tup = bench_fastxpp_bpl(path) + var r = tup[0] + var s = tup[1] + var t = tup[2] + print( + "mode=fastxpp_bpl records=", r, " bases=", s, " time=", t, "s" + ) + elif mode == "filler": + var tup = bench_fastxpp_bpl2(path) + var r = tup[0] + var s = tup[1] + var t = tup[2] + print( + "mode=fastxpp_bpl records=", r, " bases=", s, " time=", t, "s" + ) + else: + print("Unknown mode:", mode) diff --git a/ishlib/vendor/kseq.mojo b/ishlib/vendor/kseq.mojo index 4f1625f..419ced6 100644 --- a/ishlib/vendor/kseq.mojo +++ b/ishlib/vendor/kseq.mojo @@ -67,7 +67,7 @@ fn strip_newlines_in_place( # After the first newline, the pointers may diverge, and we will need to copy bytes while read_pos < disk: - var span_rel = memchr[do_alignment=True]( + var span_rel = memchr[do_alignment=False]( Span[UInt8, __origin_of(bs.ptr)]( ptr=bs.ptr.offset(read_pos), length=disk - read_pos ), @@ -521,6 +521,8 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): fn read_fastxpp(mut self) raises -> Int: # Locate next header + var marker: UInt8 + if self.last_char == 0: var c = self.reader.read_byte() while ( @@ -530,8 +532,11 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): ): c = self.reader.read_byte() if c < 0: - return Int(c) # EOF + # print("[DBG] EOF before header, c=", c) + return Int(c) + marker = UInt8(c) # NEW – store the marker else: + marker = UInt8(self.last_char) # NEW var c = self.last_char self.last_char = 0 @@ -595,7 +600,7 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): return -2 # ── Quality block (FASTQ) ───────────────────────────────────── - if hdr[0] == UInt8(ASCII_FASTQ_RECORD_START): + if marker == UInt8(ASCII_FASTQ_RECORD_START): if ( self.reader.read_byte() != ASCII_NEWLINE ): # consume LF after '+' line @@ -611,28 +616,141 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): return len(self.seq) + fn read_fastxpp_bpl(mut self) raises -> Int: + # ── 0 Locate the next header byte ('>' or '@') ────────────────────── + var marker: UInt8 # remember which flavour we’re on + if self.last_char == 0: + var c = self.reader.read_byte() + while ( + c >= 0 + and c != ASCII_FASTA_RECORD_START + and c != ASCII_FASTQ_RECORD_START + ): + c = self.reader.read_byte() + if c < 0: + return Int(c) # EOF / stream error (-1 / -2) + marker = UInt8(c) + else: + marker = UInt8(self.last_char) + var c = self.last_char + self.last_char = 0 + + # ── 1 Reset buffers reused across records ─────────────────────────── + self.seq.clear() + self.qual.clear() + self.comment.clear() + self.name.clear() + + # ── 2 Read the back‑tick header line -------------------------------- + var r = self.reader.read_until[SearchChar.Newline](self.name, 0, False) + if r < 0: + return Int(r) + + var hdr = self.name.as_span() + if len(hdr) == 0 or hdr[0] != UInt8(ord("`")): + return -3 # not a FASTX++ BPL header + + # ── 3 Find closing back‑tick and parse hlen:slen:lcnt:bpl ----------- + var after = Span[UInt8, __origin_of(hdr)]( + ptr=hdr.unsafe_ptr().offset(1), length=len(hdr) - 1 + ) + var rel = memchr(after, UInt8(ord("`"))) + if rel == -1: + return -3 + var pos2 = rel + 1 # index of 2nd back‑tick + + var hlen: Int = 0 + var slen: Int = 0 + var lcnt: Int = 0 + var bpl: Int = 0 + var cur: Int = 0 + var which: Int = 0 + for i in range(1, pos2): + var ch = hdr[i] + if ch == UInt8(ord(":")): + if which == 0: + hlen = cur + elif which == 1: + slen = cur + elif which == 2: + lcnt = cur + cur = 0 + which += 1 + else: + cur = cur * 10 + Int(ch - UInt8(ord("0"))) + bpl = cur # fourth number + + # Validate and keep only the header + # var imputed_len = len(hdr) - (pos2 + 1) + # if imputed_len != hlen: + # return -3 + memcpy(self.name.ptr, self.name.ptr.offset(pos2 + 1), hlen) + self.name.resize(hlen) + + # ── 4 SEQUENCE block --------------------------------------- + var disk_seq = slen + lcnt # immutable reference + var rest_seq = disk_seq # mutable copy for read_bytes + + self.seq.reserve(disk_seq) + var got_seq = self.reader.read_bytes(self.seq, rest_seq) + if got_seq != disk_seq: + return -3 # truncated record + + # compact in‑place: copy (bpl‑1) bases, skip the LF, repeat + var write_pos: Int = 0 + var read_pos: Int = 0 + while read_pos < disk_seq: + memcpy( + self.seq.addr(write_pos), # destination + self.seq.addr(read_pos), # source + bpl - 1, + ) # copy only the bases + write_pos += bpl - 1 + read_pos += bpl # jump over the LF + self.seq.resize(write_pos) # write_pos == slen + + # ── 5 Copy the QUALITY block (FASTQ only) ---------------------------- + + return len(self.seq) + + +struct FileReader(KRead): + var fh: FileHandle + + fn __init__(out self, owned fh: FileHandle): + self.fh = fh^ + + fn __moveinit__(out self, owned other: Self): + self.fh = other.fh^ + + fn unbuffered_read[ + o: MutableOrigin + ](mut self, buffer: Span[UInt8, o]) raises -> Int: + return Int(self.fh.read(buffer.unsafe_ptr(), len(buffer))) + # ────────────────────────────────────────────────────────────── # Main for debugging # ────────────────────────────────────────────────────────────── # -# fn main() raises: -# var argv = sys.argv() -# if len(argv) != 2: -# print("Usage: mojo run kseq.mojo ") -# return - -# var fh = open(String(argv[1]), "r") -# var reader = FastxReader[read_comment=False]( -# BufferedReader(FileReader(fh^)) # move FileHandle into FileReader -# ) - -# var code = reader.read_fastxpp() -# print("first‑read returned", code) -# var count = 0 -# while True: -# var n = reader.read_fastxpp() -# if n < 0: -# break -# count += 1 -# print("rec#", count, "seq_len", n, "hdr_len", len(reader.name)) +fn main() raises: + var argv = sys.argv() + if len(argv) != 2: + print("Usage: mojo run kseq.mojo ") + return + + var fh = open(String(argv[1]), "r") + var reader = FastxReader[read_comment=False]( + BufferedReader(FileReader(fh^)) + ) + + var first = reader.read_fastxpp_bpl() + print("first‑read returned", first) + + var count = 0 + while True: + var n = reader.read_fastxpp_bpl() + if n < 0: + break + count += 1 + print("rec#", count, "seq_len", n, "hdr_len", len(reader.name)) From f1a1df30154c7f32e104a43ff9a612b6c0791999 Mon Sep 17 00:00:00 2001 From: TalonTrecek Date: Wed, 23 Apr 2025 16:40:46 +0000 Subject: [PATCH 06/16] Add bpl generation --- ishlib/formats/generate_fastxpp.mojo | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/ishlib/formats/generate_fastxpp.mojo b/ishlib/formats/generate_fastxpp.mojo index a8b0e4a..b865ffa 100644 --- a/ishlib/formats/generate_fastxpp.mojo +++ b/ishlib/formats/generate_fastxpp.mojo @@ -31,6 +31,7 @@ fn generate_fastxpp( seq_lines: List[String], qualities: Optional[List[String]] = None, ) -> String: + var bpl = string_count(seq_lines[0]) + 1 # bases + LF var seq_len: Int = 0 for i in range(len(seq_lines)): seq_len = seq_len + string_count(seq_lines[i]) @@ -53,6 +54,29 @@ fn generate_fastxpp( return rec +fn generate_fastxpp_bpl( + marker: String, + header: String, + seq_lines: List[String], + qualities: Optional[List[String]] = None, +) -> String: + var bpl = string_count(seq_lines[0]) + 1 # bases + LF + var slen = (bpl - 1) * (len(seq_lines) - 1) + # (bases per full line) + string_count(seq_lines[-1]) # + last (ragged) line + var meta = String(string_count(header)) + ":" + + String(slen) + ":" + + String(len(seq_lines)) + ":" + + String(bpl) + var rec = marker + "`" + meta + "`" + header + "\n" + for i in range(len(seq_lines)): + rec.write(seq_lines[i], "\n") + if qualities: + var q = qualities.value() + for i in range(len(q)): + rec.write(q[i], "\n") + return rec + + # ---------- main ---------------------------------------------------- @@ -109,7 +133,7 @@ fn main() raises: qlines.append(read_line(reader)) qual = Optional[List[String]](qlines) - writer.write(generate_fastxpp(marker, header, seq, qual)) + writer.write(generate_fastxpp_bpl(marker, header, seq, qual)) writer.flush() writer.close() From e3c3c8bbd68c5f7ddc930fe33887b5753f2077d6 Mon Sep 17 00:00:00 2001 From: TalonTrecek Date: Thu, 24 Apr 2025 15:49:45 +0000 Subject: [PATCH 07/16] Add swar decode --- ishlib/formats/fastxpp_bench.mojo | 44 +++++--- ishlib/formats/generate_fastxpp.mojo | 46 +++++++- ishlib/formats/swar_decode.mojo | 102 +++++++++++++++++ ishlib/vendor/kseq.mojo | 159 +++++++++++++++++++++++---- 4 files changed, 311 insertions(+), 40 deletions(-) create mode 100644 ishlib/formats/swar_decode.mojo diff --git a/ishlib/formats/fastxpp_bench.mojo b/ishlib/formats/fastxpp_bench.mojo index db66a5a..f914743 100644 --- a/ishlib/formats/fastxpp_bench.mojo +++ b/ishlib/formats/fastxpp_bench.mojo @@ -65,6 +65,21 @@ fn bench_fastxpp_bpl(path: String) raises -> (Int, Int, Float64): return (rec, seq, perf_counter() - t0) +fn bench_fastxpp_swar(path: String) raises -> (Int, Int, Float64): + var fh = open(path, "r") + var rdr = FastxReader[read_comment=False](BufferedReader(FileReader(fh^))) + var rec = 0 + var seq = 0 + var t0 = perf_counter() + while True: + var n = rdr.read_fastxpp_swar() + if n < 0: + break + rec += 1 + seq += n + return (rec, seq, perf_counter() - t0) + + fn bench_fastxpp_bpl2(path: String) raises -> (Int, Int, Float64): var fh = open(path, "r") var rdr = FastxReader[read_comment=False](BufferedReader(FileReader(fh^))) @@ -92,34 +107,27 @@ fn main() raises: mode = String(argv[2]) if mode == "orig": - var tup = bench_original(path) - var r = tup[0] - var s = tup[1] - var t = tup[2] + r, s, t = bench_original(path) print( "mode=orig records=", r, " bases=", s, " time=", t, "s" ) + elif mode == "bpl": + r, s, t = bench_fastxpp_bpl(path) + print( + "mode=fastxpp_bpl records=", r, " bases=", s, " time=", t, "s" + ) elif mode == "fastxpp": - var tup = bench_fastxpp(path) - var r = tup[0] - var s = tup[1] - var t = tup[2] + r, s, t = bench_fastxpp(path) print( "mode=fastxpp records=", r, " bases=", s, " time=", t, "s" ) - elif mode == "bpl": - var tup = bench_fastxpp_bpl(path) - var r = tup[0] - var s = tup[1] - var t = tup[2] + elif mode == "swar": + r, s, t = bench_fastxpp_swar(path) print( - "mode=fastxpp_bpl records=", r, " bases=", s, " time=", t, "s" + "mode=fastxpp_swar records=", r, " bases=", s, " time=", t, "s" ) elif mode == "filler": - var tup = bench_fastxpp_bpl2(path) - var r = tup[0] - var s = tup[1] - var t = tup[2] + r, s, t = bench_fastxpp_bpl2(path) print( "mode=fastxpp_bpl records=", r, " bases=", s, " time=", t, "s" ) diff --git a/ishlib/formats/generate_fastxpp.mojo b/ishlib/formats/generate_fastxpp.mojo index b865ffa..534631d 100644 --- a/ishlib/formats/generate_fastxpp.mojo +++ b/ishlib/formats/generate_fastxpp.mojo @@ -1,6 +1,8 @@ import sys from collections import Optional from ExtraMojo.io.buffered import BufferedReader, BufferedWriter +from collections import List # dynamic grow-able buffer +from memory import Span # view into the List for zero-copy writes # ---------- helpers ------------------------------------------------- @@ -76,6 +78,48 @@ fn generate_fastxpp_bpl( rec.write(q[i], "\n") return rec +# Helper: encode an unsigned ≤9-digit value as zero-padded ASCII. +fn to_ascii_padded(value: Int, width: Int) -> String: + # build the decimal text first … + var digits = String(value) # e.g. "123" + var pad = width - string_count(digits) # how many zeros needed + + # … then emit into a single pre-sized String + var out = String(capacity=width) + for _ in range(pad): + out.write("0") + out.write(digits) # concat is zero-copy + return out # length == width + +fn generate_fastxpp_bpl_fixed( + marker: String, + header: String, + seq_lines: List[String], + qualities: Optional[List[String]] = None, +) -> String: + + # --- numeric fields ------------------------------------------------ + var bpl = string_count(seq_lines[0]) + 1 # incl. LF + var slen = (bpl - 1) * (len(seq_lines) - 1) + + string_count(seq_lines[-1]) + + # --- fixed-width metadata block ------------------------------------ + var meta = "`" + + to_ascii_padded(string_count(header), 6) + # hlen + to_ascii_padded(slen, 9) + # slen + to_ascii_padded(len(seq_lines), 7) + # nlin + to_ascii_padded(bpl, 3) + # bpl + "`" + + # --- assemble record ----------------------------------------------- + var rec = marker + meta + header + "\n" + for i in range(len(seq_lines)): + rec.write(seq_lines[i], "\n") + if qualities: + var q = qualities.value() + for i in range(len(q)): + rec.write(q[i], "\n") + return rec # ---------- main ---------------------------------------------------- @@ -133,7 +177,7 @@ fn main() raises: qlines.append(read_line(reader)) qual = Optional[List[String]](qlines) - writer.write(generate_fastxpp_bpl(marker, header, seq, qual)) + writer.write(generate_fastxpp_bpl_fixed(marker, header, seq, qual)) writer.flush() writer.close() diff --git a/ishlib/formats/swar_decode.mojo b/ishlib/formats/swar_decode.mojo new file mode 100644 index 0000000..968cd18 --- /dev/null +++ b/ishlib/formats/swar_decode.mojo @@ -0,0 +1,102 @@ +# This is for testing only + +from memory import UnsafePointer +from sys import exit + +alias U8x8 = SIMD[DType.uint8 , 8] +alias U16x8 = SIMD[DType.uint16, 8] +alias U32x8 = SIMD[DType.uint32, 8] + +@always_inline +fn to_simd(p: UnsafePointer[UInt8]) -> + UnsafePointer[SIMD[DType.uint8, 1]]: + return p.bitcast[SIMD[DType.uint8, 1]]() + +# 8 ASCII digits +@always_inline +fn decode_8(ptr: UnsafePointer[UInt8]) -> Int: + var v = to_simd(ptr).load[width=8](0) - UInt8(ord("0")) + var w = v.cast[DType.uint32]() + var mul = U32x8(10_000_000, 1_000_000, 100_000, 10_000, + 1_000, 100, 10, 1) + return Int((w * mul).reduce_add()) + +# 6 digits (still load 8 lanes) +@always_inline +fn decode_6(ptr: UnsafePointer[UInt8]) -> Int: + var v = to_simd(ptr).load[width=8](0) - UInt8(ord("0")) + var w = v.cast[DType.uint32]() + var mul = U32x8(100_000, 10_000, 1_000, 100, 10, 1, 0, 0) + return Int((w * mul).reduce_add()) + +# 7 digits: scalar last digit +@always_inline +fn decode_7(ptr: UnsafePointer[UInt8]) -> Int: + # Read the 7th digit at ptr[6] (offset 6) + var last_digit = Int(to_simd(ptr.offset(6)).load[width=1](0) - UInt8(ord("0"))) + # decode_6 handles ptr[0] through ptr[5] + return decode_6(ptr) * 10 + last_digit + + +# scalar last digit +@always_inline +fn decode_9(ptr: UnsafePointer[UInt8]) -> Int: + # Read the 9th digit at ptr[8] (offset 8) + var last_digit = Int(to_simd(ptr.offset(8)).load[width=1](0) - UInt8(ord("0"))) + # decode_8 handles ptr[0] through ptr[7] + return decode_8(ptr) * 10 + last_digit + +# 3-digit fallback +@always_inline +fn decode_3(ptr: UnsafePointer[UInt8]) -> Int: + var sp = to_simd(ptr) + + var d0 = Int(sp.load[width = 1](0) - UInt8(ord("0"))) + var d1 = Int(sp.load[width = 1](1) - UInt8(ord("0"))) + var d2 = Int(sp.load[width = 1](2) - UInt8(ord("0"))) + + return d0 * 100 + d1 * 10 + d2 + + +# zero-pad an Int to a fixed width +fn zpad(value: Int, width: Int) -> String: + var s = String(value) + var pad = width - len(s) + var out = String(capacity = width) + for _ in range(pad): + out.write("0") + out.write(s) + return out + +fn expect(label: String, got: Int, want: Int): + if got != want: + print("FAIL {", label, "} got ", got, " expected ", want) + exit(1) + +fn run_tests(): + # 3-digit + expect("3-zero", decode_3("000".unsafe_ptr()), 0) + expect("3-123", decode_3("123".unsafe_ptr()), 123) + expect("3-999", decode_3("999".unsafe_ptr()), 999) + + # 6-digit + expect("6-zero", decode_6("000000".unsafe_ptr()), 0) + expect("6-00123", decode_6("000123".unsafe_ptr()), 123) + expect("6-654321",decode_6("654321".unsafe_ptr()), 654321) + + # 7-digit + expect("7-zero", decode_7("0000000".unsafe_ptr()), 0) + expect("7-1234567",decode_7("1234567".unsafe_ptr()), 1234567) + + # 8-digit + expect("8-zero", decode_8("00000000".unsafe_ptr()), 0) + expect("8-87654321",decode_8("87654321".unsafe_ptr()), 87654321) + + # 9-digit + expect("9-zero", decode_9("000000000".unsafe_ptr()), 0) + expect("9-123456789",decode_9("123456789".unsafe_ptr()), 123456789) + + print("All SWAR decode tests passed.") + +fn main(): + run_tests() diff --git a/ishlib/vendor/kseq.mojo b/ishlib/vendor/kseq.mojo index 419ced6..0b450aa 100644 --- a/ishlib/vendor/kseq.mojo +++ b/ishlib/vendor/kseq.mojo @@ -28,7 +28,6 @@ from time.time import perf_counter from ExtraMojo.bstr.memchr import memchr - alias ASCII_NEWLINE = ord("\n") alias ASCII_CARRIAGE_RETURN = ord("\r") alias ASCII_TAB = ord("\t") @@ -37,11 +36,64 @@ alias ASCII_FASTA_RECORD_START = ord(">") alias ASCII_FASTQ_RECORD_START = ord("@") alias ASCII_FASTQ_SEPARATOR = ord("+") +alias U8x8 = SIMD[DType.uint8 , 8] +alias U16x8 = SIMD[DType.uint16, 8] +alias U32x8 = SIMD[DType.uint32, 8] + +@always_inline +fn to_simd(p: UnsafePointer[UInt8]) -> + UnsafePointer[SIMD[DType.uint8, 1]]: + return p.bitcast[SIMD[DType.uint8, 1]]() + +# 8 ASCII digits +@always_inline +fn decode_8(ptr: UnsafePointer[UInt8]) -> Int: + var v = to_simd(ptr).load[width=8](0) - UInt8(ord("0")) + var w = v.cast[DType.uint32]() + var mul = U32x8(10_000_000, 1_000_000, 100_000, 10_000, + 1_000, 100, 10, 1) + return Int((w * mul).reduce_add()) + +# 6 digits (still load 8 lanes) +@always_inline +fn decode_6(ptr: UnsafePointer[UInt8]) -> Int: + var v = to_simd(ptr).load[width=8](0) - UInt8(ord("0")) + var w = v.cast[DType.uint32]() + var mul = U32x8(100_000, 10_000, 1_000, 100, 10, 1, 0, 0) + return Int((w * mul).reduce_add()) + +# 7 digits: scalar last digit +@always_inline +fn decode_7(ptr: UnsafePointer[UInt8]) -> Int: + # Read the 7th digit at ptr[6] (offset 6) + var last_digit = Int(to_simd(ptr.offset(6)).load[width=1](0) - UInt8(ord("0"))) + # decode_6 handles ptr[0] through ptr[5] + return decode_6(ptr) * 10 + last_digit + + +# 9 digits: scalar last digit +@always_inline +fn decode_9(ptr: UnsafePointer[UInt8]) -> Int: + # Read the 9th digit at ptr[8] (offset 8) + var last_digit = Int(to_simd(ptr.offset(8)).load[width=1](0) - UInt8(ord("0"))) + # decode_8 handles ptr[0] through ptr[7] + return decode_8(ptr) * 10 + last_digit + +# 3-digit fallback +@always_inline +fn decode_3(ptr: UnsafePointer[UInt8]) -> Int: + var sp = to_simd(ptr) + + var d0 = Int(sp.load[width = 1](0) - UInt8(ord("0"))) + var d1 = Int(sp.load[width = 1](1) - UInt8(ord("0"))) + var d2 = Int(sp.load[width = 1](2) - UInt8(ord("0"))) + + return d0 * 100 + d1 * 10 + d2 + # ────────────────────────────────────────────────────────────── # Helpers for reading in fastx++ files # ────────────────────────────────────────────────────────────── - @always_inline fn strip_newlines_in_place( mut bs: ByteString, disk: Int, expected: Int @@ -599,21 +651,6 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): if not strip_newlines_in_place(self.seq, disk, slen): return -2 - # ── Quality block (FASTQ) ───────────────────────────────────── - if marker == UInt8(ASCII_FASTQ_RECORD_START): - if ( - self.reader.read_byte() != ASCII_NEWLINE - ): # consume LF after '+' line - return -3 - var needq = disk # separate mutable copy - self.qual.clear() - self.qual.reserve(needq) - var gotq = self.reader.read_bytes(self.qual, needq) - if gotq != disk: - return -3 - if not strip_newlines_in_place(self.qual, disk, slen): - return -2 - return len(self.seq) fn read_fastxpp_bpl(mut self) raises -> Int: @@ -708,11 +745,91 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): write_pos += bpl - 1 read_pos += bpl # jump over the LF self.seq.resize(write_pos) # write_pos == slen + # This is cold code that seems to preven inlining, it improve performance. + if 1 == False: + print( + "ERROR: Sequence read failed. got_seq != disk_seq", + "write_pos", write_pos, "disk_seq", disk_seq, + "bpl", bpl, "slen", len(self.seq) + ) + pass + return len(self.seq) - # ── 5 Copy the QUALITY block (FASTQ only) ---------------------------- + fn read_fastxpp_swar(mut self) raises -> Int: + # ── 0 Locate the next header byte ('>' or '@') ────────────────────── + var marker: UInt8 # remember which flavour we’re on + if self.last_char == 0: + var c = self.reader.read_byte() + while ( + c >= 0 + and c != ASCII_FASTA_RECORD_START + and c != ASCII_FASTQ_RECORD_START + ): + c = self.reader.read_byte() + if c < 0: + return Int(c) # EOF / stream error (-1 / -2) + marker = UInt8(c) + else: + marker = UInt8(self.last_char) + var c = self.last_char + self.last_char = 0 - return len(self.seq) + # ── 1 Reset buffers reused across records ─────────────────────────── + self.seq.clear() + self.qual.clear() + self.comment.clear() + self.name.clear() + # ── 2 Read the back‑tick header line -------------------------------- + var r = self.reader.read_until[SearchChar.Newline](self.name, 0, False) + if r < 0: + return Int(r) + + var hdr = self.name.as_span() + if len(hdr) == 0 or hdr[0] != UInt8(ord("`")): + print("ERROR: Opening backtick check failed. hdr[0]") + return -3 # not a FASTX++ BPL header + + #for i in range(len(hdr)): + # var code = Int(hdr[i]) + # print(i, hdr[i], code, chr(code)) + + # ── 3 Find closing back‑tick and parse hlen:slen:lcnt:bpl ----------- + var base = hdr.unsafe_ptr().offset(1) + + var hlen = decode_6(base) # bytes 0–5 + var slen = decode_9(base.offset(6)) # bytes 6–14 + var lcnt = decode_7(base.offset(15)) # bytes 15–21 + var bpl = decode_3(base.offset(22)) # bytes 22–24 + + if base.offset(25)[0] != UInt8(ord("`")): + print("ERROR: Closing backtick check failed. base[25]") + return -3 + + # ── 4 SEQUENCE block --------------------------------------- + var disk_seq = slen + lcnt # immutable reference + var rest_seq = disk_seq # mutable copy for read_bytes + + self.seq.reserve(disk_seq) + var got_seq = self.reader.read_bytes(self.seq, rest_seq) + if got_seq != disk_seq: + print("ERROR: Sequence read failed. got_seq != disk_seq") + return -3 # truncated record + + # compact in‑place: copy (bpl‑1) bases, skip the LF, repeat + var write_pos: Int = 0 + var read_pos: Int = 0 + while read_pos < disk_seq: + memcpy( + self.seq.addr(write_pos), # destination + self.seq.addr(read_pos), # source + bpl - 1, + ) # copy only the bases + write_pos += bpl - 1 + read_pos += bpl # jump over the LF + self.seq.resize(write_pos) # write_pos == slen + + return len(self.seq) struct FileReader(KRead): var fh: FileHandle @@ -744,12 +861,12 @@ fn main() raises: BufferedReader(FileReader(fh^)) ) - var first = reader.read_fastxpp_bpl() + var first = reader.read_fastxpp_swar() print("first‑read returned", first) var count = 0 while True: - var n = reader.read_fastxpp_bpl() + var n = reader.read_fastxpp_swar() if n < 0: break count += 1 From a4614eb1a452f3f09df105d3c1263b4e714dee0a Mon Sep 17 00:00:00 2001 From: TalonTrecek Date: Thu, 24 Apr 2025 23:36:34 +0000 Subject: [PATCH 08/16] remove hlen --- ishlib/formats/generate_fastxpp.mojo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ishlib/formats/generate_fastxpp.mojo b/ishlib/formats/generate_fastxpp.mojo index 534631d..52458af 100644 --- a/ishlib/formats/generate_fastxpp.mojo +++ b/ishlib/formats/generate_fastxpp.mojo @@ -105,7 +105,7 @@ fn generate_fastxpp_bpl_fixed( # --- fixed-width metadata block ------------------------------------ var meta = "`" + - to_ascii_padded(string_count(header), 6) + # hlen + #to_ascii_padded(string_count(header), 6) + # hlen to_ascii_padded(slen, 9) + # slen to_ascii_padded(len(seq_lines), 7) + # nlin to_ascii_padded(bpl, 3) + # bpl From ae738bb95706d8ad3584cd3eb97ad522984ec892 Mon Sep 17 00:00:00 2001 From: TalonTrecek Date: Thu, 24 Apr 2025 23:36:46 +0000 Subject: [PATCH 09/16] move swar decode for eventual vendor --- ishlib/formats/swar_decode.mojo | 102 -------------------------------- ishlib/vendor/swar_decode.mojo | 100 +++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 102 deletions(-) delete mode 100644 ishlib/formats/swar_decode.mojo create mode 100644 ishlib/vendor/swar_decode.mojo diff --git a/ishlib/formats/swar_decode.mojo b/ishlib/formats/swar_decode.mojo deleted file mode 100644 index 968cd18..0000000 --- a/ishlib/formats/swar_decode.mojo +++ /dev/null @@ -1,102 +0,0 @@ -# This is for testing only - -from memory import UnsafePointer -from sys import exit - -alias U8x8 = SIMD[DType.uint8 , 8] -alias U16x8 = SIMD[DType.uint16, 8] -alias U32x8 = SIMD[DType.uint32, 8] - -@always_inline -fn to_simd(p: UnsafePointer[UInt8]) -> - UnsafePointer[SIMD[DType.uint8, 1]]: - return p.bitcast[SIMD[DType.uint8, 1]]() - -# 8 ASCII digits -@always_inline -fn decode_8(ptr: UnsafePointer[UInt8]) -> Int: - var v = to_simd(ptr).load[width=8](0) - UInt8(ord("0")) - var w = v.cast[DType.uint32]() - var mul = U32x8(10_000_000, 1_000_000, 100_000, 10_000, - 1_000, 100, 10, 1) - return Int((w * mul).reduce_add()) - -# 6 digits (still load 8 lanes) -@always_inline -fn decode_6(ptr: UnsafePointer[UInt8]) -> Int: - var v = to_simd(ptr).load[width=8](0) - UInt8(ord("0")) - var w = v.cast[DType.uint32]() - var mul = U32x8(100_000, 10_000, 1_000, 100, 10, 1, 0, 0) - return Int((w * mul).reduce_add()) - -# 7 digits: scalar last digit -@always_inline -fn decode_7(ptr: UnsafePointer[UInt8]) -> Int: - # Read the 7th digit at ptr[6] (offset 6) - var last_digit = Int(to_simd(ptr.offset(6)).load[width=1](0) - UInt8(ord("0"))) - # decode_6 handles ptr[0] through ptr[5] - return decode_6(ptr) * 10 + last_digit - - -# scalar last digit -@always_inline -fn decode_9(ptr: UnsafePointer[UInt8]) -> Int: - # Read the 9th digit at ptr[8] (offset 8) - var last_digit = Int(to_simd(ptr.offset(8)).load[width=1](0) - UInt8(ord("0"))) - # decode_8 handles ptr[0] through ptr[7] - return decode_8(ptr) * 10 + last_digit - -# 3-digit fallback -@always_inline -fn decode_3(ptr: UnsafePointer[UInt8]) -> Int: - var sp = to_simd(ptr) - - var d0 = Int(sp.load[width = 1](0) - UInt8(ord("0"))) - var d1 = Int(sp.load[width = 1](1) - UInt8(ord("0"))) - var d2 = Int(sp.load[width = 1](2) - UInt8(ord("0"))) - - return d0 * 100 + d1 * 10 + d2 - - -# zero-pad an Int to a fixed width -fn zpad(value: Int, width: Int) -> String: - var s = String(value) - var pad = width - len(s) - var out = String(capacity = width) - for _ in range(pad): - out.write("0") - out.write(s) - return out - -fn expect(label: String, got: Int, want: Int): - if got != want: - print("FAIL {", label, "} got ", got, " expected ", want) - exit(1) - -fn run_tests(): - # 3-digit - expect("3-zero", decode_3("000".unsafe_ptr()), 0) - expect("3-123", decode_3("123".unsafe_ptr()), 123) - expect("3-999", decode_3("999".unsafe_ptr()), 999) - - # 6-digit - expect("6-zero", decode_6("000000".unsafe_ptr()), 0) - expect("6-00123", decode_6("000123".unsafe_ptr()), 123) - expect("6-654321",decode_6("654321".unsafe_ptr()), 654321) - - # 7-digit - expect("7-zero", decode_7("0000000".unsafe_ptr()), 0) - expect("7-1234567",decode_7("1234567".unsafe_ptr()), 1234567) - - # 8-digit - expect("8-zero", decode_8("00000000".unsafe_ptr()), 0) - expect("8-87654321",decode_8("87654321".unsafe_ptr()), 87654321) - - # 9-digit - expect("9-zero", decode_9("000000000".unsafe_ptr()), 0) - expect("9-123456789",decode_9("123456789".unsafe_ptr()), 123456789) - - print("All SWAR decode tests passed.") - -fn main(): - run_tests() diff --git a/ishlib/vendor/swar_decode.mojo b/ishlib/vendor/swar_decode.mojo new file mode 100644 index 0000000..8e2e071 --- /dev/null +++ b/ishlib/vendor/swar_decode.mojo @@ -0,0 +1,100 @@ +# This is for testing only + +from memory import UnsafePointer +from sys import exit + +alias U8x8 = SIMD[DType.uint8, 8] +alias U32x8 = SIMD[DType.uint32, 8] + + +# 8 ASCII digits +@always_inline +fn decode_8(ptr: UnsafePointer[UInt8]) -> Int: + var v = ptr.load[width=8](0) - ASCII_ZERO + var w = v.cast[DType.uint32]() + var mul = U32x8(10_000_000, 1_000_000, 100_000, 10_000, 1_000, 100, 10, 1) + return Int((w * mul).reduce_add()) + + +# 6 digits (still load 8 lanes) +@always_inline +fn decode_6(ptr: UnsafePointer[UInt8]) -> Int: + var v = ptr.load[width=8](0) - ASCII_ZERO + var w = v.cast[DType.uint32]() + var mul = U32x8(100_000, 10_000, 1_000, 100, 10, 1, 0, 0) + return Int((w * mul).reduce_add()) + + +# 7 digits: scalar last digit +@always_inline +fn decode_7(ptr: UnsafePointer[UInt8]) -> Int: + # Read the 7th digit at ptr[6] (offset 6) + var last_digit = Int(ptr.offset(6).load[width=1](0) - ASCII_ZERO) + # decode_6 handles ptr[0] through ptr[5] + return decode_6(ptr) * 10 + last_digit + + +# 9 digits: scalar last digit +@always_inline +fn decode_9(ptr: UnsafePointer[UInt8]) -> Int: + # Read the 9th digit at ptr[8] (offset 8) + var last_digit = Int(ptr.offset(8).load[width=1](0) - ASCII_ZERO) + # decode_8 handles ptr[0] through ptr[7] + return decode_8(ptr) * 10 + last_digit + + +# 3-digit fallback +@always_inline +fn decode_3(ptr: UnsafePointer[UInt8]) -> Int: + var d0 = Int(ptr.load[width=1](0) - ASCII_ZERO) + var d1 = Int(ptr.load[width=1](1) - ASCII_ZERO) + var d2 = Int(ptr.load[width=1](2) - ASCII_ZERO) + + return d0 * 100 + d1 * 10 + d2 + + +# zero-pad an Int to a fixed width +fn zpad(value: Int, width: Int) -> String: + var s = String(value) + var pad = width - len(s) + var out = String(capacity=width) + for _ in range(pad): + out.write("0") + out.write(s) + return out + + +fn expect(label: String, got: Int, want: Int): + if got != want: + print("FAIL {", label, "} got ", got, " expected ", want) + exit(1) + + +fn run_tests(): + # 3-digit + expect("3-zero", decode_3("000".unsafe_ptr()), 0) + expect("3-123", decode_3("123".unsafe_ptr()), 123) + expect("3-999", decode_3("999".unsafe_ptr()), 999) + + # 6-digit + expect("6-zero", decode_6("000000".unsafe_ptr()), 0) + expect("6-00123", decode_6("000123".unsafe_ptr()), 123) + expect("6-654321", decode_6("654321".unsafe_ptr()), 654321) + + # 7-digit + expect("7-zero", decode_7("0000000".unsafe_ptr()), 0) + expect("7-1234567", decode_7("1234567".unsafe_ptr()), 1234567) + + # 8-digit + expect("8-zero", decode_8("00000000".unsafe_ptr()), 0) + expect("8-87654321", decode_8("87654321".unsafe_ptr()), 87654321) + + # 9-digit + expect("9-zero", decode_9("000000000".unsafe_ptr()), 0) + expect("9-123456789", decode_9("123456789".unsafe_ptr()), 123456789) + + print("All SWAR decode tests passed.") + + +fn main(): + run_tests() From d20d1b0c99f6809430bbebd45f96b86235eb4f6c Mon Sep 17 00:00:00 2001 From: TalonTrecek Date: Thu, 24 Apr 2025 23:37:04 +0000 Subject: [PATCH 10/16] clean up --- ishlib/vendor/kseq.mojo | 102 +++++++++++++++++++++++----------------- 1 file changed, 58 insertions(+), 44 deletions(-) diff --git a/ishlib/vendor/kseq.mojo b/ishlib/vendor/kseq.mojo index 0b450aa..cdb2eb7 100644 --- a/ishlib/vendor/kseq.mojo +++ b/ishlib/vendor/kseq.mojo @@ -35,38 +35,35 @@ alias ASCII_SPACE = ord(" ") alias ASCII_FASTA_RECORD_START = ord(">") alias ASCII_FASTQ_RECORD_START = ord("@") alias ASCII_FASTQ_SEPARATOR = ord("+") +alias ASCII_ZERO = UInt8(ord("0")) -alias U8x8 = SIMD[DType.uint8 , 8] -alias U16x8 = SIMD[DType.uint16, 8] +alias U8x8 = SIMD[DType.uint8, 8] alias U32x8 = SIMD[DType.uint32, 8] -@always_inline -fn to_simd(p: UnsafePointer[UInt8]) -> - UnsafePointer[SIMD[DType.uint8, 1]]: - return p.bitcast[SIMD[DType.uint8, 1]]() # 8 ASCII digits @always_inline fn decode_8(ptr: UnsafePointer[UInt8]) -> Int: - var v = to_simd(ptr).load[width=8](0) - UInt8(ord("0")) - var w = v.cast[DType.uint32]() - var mul = U32x8(10_000_000, 1_000_000, 100_000, 10_000, - 1_000, 100, 10, 1) + var v = ptr.load[width=8](0) - ASCII_ZERO + var w = v.cast[DType.uint32]() + var mul = U32x8(10_000_000, 1_000_000, 100_000, 10_000, 1_000, 100, 10, 1) return Int((w * mul).reduce_add()) + # 6 digits (still load 8 lanes) @always_inline fn decode_6(ptr: UnsafePointer[UInt8]) -> Int: - var v = to_simd(ptr).load[width=8](0) - UInt8(ord("0")) - var w = v.cast[DType.uint32]() + var v = ptr.load[width=8](0) - ASCII_ZERO + var w = v.cast[DType.uint32]() var mul = U32x8(100_000, 10_000, 1_000, 100, 10, 1, 0, 0) return Int((w * mul).reduce_add()) + # 7 digits: scalar last digit @always_inline fn decode_7(ptr: UnsafePointer[UInt8]) -> Int: # Read the 7th digit at ptr[6] (offset 6) - var last_digit = Int(to_simd(ptr.offset(6)).load[width=1](0) - UInt8(ord("0"))) + var last_digit = Int(ptr.offset(6).load[width=1](0) - ASCII_ZERO) # decode_6 handles ptr[0] through ptr[5] return decode_6(ptr) * 10 + last_digit @@ -75,25 +72,47 @@ fn decode_7(ptr: UnsafePointer[UInt8]) -> Int: @always_inline fn decode_9(ptr: UnsafePointer[UInt8]) -> Int: # Read the 9th digit at ptr[8] (offset 8) - var last_digit = Int(to_simd(ptr.offset(8)).load[width=1](0) - UInt8(ord("0"))) + var last_digit = Int(ptr.offset(8).load[width=1](0) - ASCII_ZERO) # decode_8 handles ptr[0] through ptr[7] return decode_8(ptr) * 10 + last_digit + # 3-digit fallback @always_inline fn decode_3(ptr: UnsafePointer[UInt8]) -> Int: - var sp = to_simd(ptr) - - var d0 = Int(sp.load[width = 1](0) - UInt8(ord("0"))) - var d1 = Int(sp.load[width = 1](1) - UInt8(ord("0"))) - var d2 = Int(sp.load[width = 1](2) - UInt8(ord("0"))) + var d0 = Int(ptr.load[width=1](0) - ASCII_ZERO) + var d1 = Int(ptr.load[width=1](1) - ASCII_ZERO) + var d2 = Int(ptr.load[width=1](2) - ASCII_ZERO) return d0 * 100 + d1 * 10 + d2 + +@always_inline +fn decode[size: UInt](bstr: Span[UInt8]) -> Int: + constrained[ + size >= 3 and size <= 9, "size outside allowed range of 3 to 9" + ]() + + @parameter + if size == 3: + return decode_3(bstr.unsafe_ptr()) + elif size == 6: + return decode_6(bstr.unsafe_ptr()) + elif size == 7: + return decode_7(bstr.unsafe_ptr()) + elif size == 8: + return decode_8(bstr.unsafe_ptr()) + elif size == 9: + return decode_9(bstr.unsafe_ptr()) + else: + return -1 + + # ────────────────────────────────────────────────────────────── # Helpers for reading in fastx++ files # ────────────────────────────────────────────────────────────── + @always_inline fn strip_newlines_in_place( mut bs: ByteString, disk: Int, expected: Int @@ -139,16 +158,6 @@ fn strip_newlines_in_place( return write_pos == expected -# This is long way of https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/ -# not directly used, read_fastxpp uses cur = cur * 10 + Int(ch - UInt8(ord("0"))) directly -# Keeping here for comparison to SWAR implementation -fn ascii_to_int[O: Origin](buf: Span[UInt8, O], s: Int, e: Int) -> Int: - var v: Int = 0 - for i in range(s, e): - v = v * 10 + Int(buf[i] - ord("0")) - return v - - # ────────────────────────────────────────────────────────────── @@ -458,6 +467,7 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): self.seq = ByteString(256) self.qual = ByteString() self.comment = ByteString() + # Special comment field is 26 bytes long +1 from backtick self.last_char = 0 fn __moveinit__(out self, owned other: Self): @@ -632,7 +642,7 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): cur = 0 which += 1 else: - cur = cur * 10 + Int(ch - UInt8(ord("0"))) + cur = cur * 10 + Int(ch - ASCII_ZERO) lcnt = cur # Validate header length and resize name @@ -714,7 +724,7 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): cur = 0 which += 1 else: - cur = cur * 10 + Int(ch - UInt8(ord("0"))) + cur = cur * 10 + Int(ch - ASCII_ZERO) bpl = cur # fourth number # Validate and keep only the header @@ -749,8 +759,14 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): if 1 == False: print( "ERROR: Sequence read failed. got_seq != disk_seq", - "write_pos", write_pos, "disk_seq", disk_seq, - "bpl", bpl, "slen", len(self.seq) + "write_pos", + write_pos, + "disk_seq", + disk_seq, + "bpl", + bpl, + "slen", + len(self.seq), ) pass return len(self.seq) @@ -790,19 +806,16 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): print("ERROR: Opening backtick check failed. hdr[0]") return -3 # not a FASTX++ BPL header - #for i in range(len(hdr)): - # var code = Int(hdr[i]) - # print(i, hdr[i], code, chr(code)) + # for i in range(len(hdr)): + # var code = Int(hdr[i]) + # print(i, hdr[i], code, chr(code)) - # ── 3 Find closing back‑tick and parse hlen:slen:lcnt:bpl ----------- - var base = hdr.unsafe_ptr().offset(1) - - var hlen = decode_6(base) # bytes 0–5 - var slen = decode_9(base.offset(6)) # bytes 6–14 - var lcnt = decode_7(base.offset(15)) # bytes 15–21 - var bpl = decode_3(base.offset(22)) # bytes 22–24 + # ── 3 Find closing back‑tick and parse slen:lcnt:bpl ----------- + var slen = decode[9](hdr[1:10]) # bytes 1–9 + var lcnt = decode[7](hdr[10:17]) # bytes 10–16 + var bpl = decode[3](hdr[17:20]) # bytes 17–19 - if base.offset(25)[0] != UInt8(ord("`")): + if hdr[20] != UInt8(ord("`")): print("ERROR: Closing backtick check failed. base[25]") return -3 @@ -831,6 +844,7 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): return len(self.seq) + struct FileReader(KRead): var fh: FileHandle From f82709dd64a6b940e29f17112ee3b7a45876de4b Mon Sep 17 00:00:00 2001 From: TalonTrecek Date: Fri, 25 Apr 2025 15:30:16 +0000 Subject: [PATCH 11/16] Switch to gzip to open --- ishlib/formats/fastxpp_bench.mojo | 38 ++++++++++++++----------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/ishlib/formats/fastxpp_bench.mojo b/ishlib/formats/fastxpp_bench.mojo index f914743..a317bcf 100644 --- a/ishlib/formats/fastxpp_bench.mojo +++ b/ishlib/formats/fastxpp_bench.mojo @@ -1,31 +1,31 @@ import sys from time.time import perf_counter -from ishlib.vendor.kseq import FastxReader, BufferedReader, KRead +from ishlib.vendor.kseq import FastxReader, BufferedReader +from ishlib.vendor.zlib import GZFile from ExtraMojo.utils.ir import dump_ir # ── thin wrapper so FileHandle implements KRead ───────────────────────── -struct FileReader(KRead): - var fh: FileHandle +# struct FileReader(KRead): +# var fh: FileHandle - fn __init__(out self, owned fh: FileHandle): - self.fh = fh^ +# fn __init__(out self, owned fh: FileHandle): +# self.fh = fh^ - fn __moveinit__(out self, owned other: Self): - self.fh = other.fh^ +# fn __moveinit__(out self, owned other: Self): +# self.fh = other.fh^ - fn unbuffered_read[ - o: MutableOrigin - ](mut self, buffer: Span[UInt8, o]) raises -> Int: - return Int(self.fh.read(buffer.unsafe_ptr(), len(buffer))) +# fn unbuffered_read[ +# o: MutableOrigin +# ](mut self, buffer: Span[UInt8, o]) raises -> Int: +# return Int(self.fh.read(buffer.unsafe_ptr(), len(buffer))) # ──────────────────────────────────────────────────────────────────────── fn bench_original(path: String) raises -> (Int, Int, Float64): - var fh = open(path, "r") - var rdr = FastxReader[read_comment=False](BufferedReader(FileReader(fh^))) + var rdr = FastxReader[read_comment=False](BufferedReader(GZFile(path, "r"))) var rec = 0 var seq = 0 var t0 = perf_counter() @@ -36,8 +36,7 @@ fn bench_original(path: String) raises -> (Int, Int, Float64): fn bench_fastxpp(path: String) raises -> (Int, Int, Float64): - var fh = open(path, "r") - var rdr = FastxReader[read_comment=False](BufferedReader(FileReader(fh^))) + var rdr = FastxReader[read_comment=False](BufferedReader(GZFile(path, "r"))) var rec = 0 var seq = 0 var t0 = perf_counter() @@ -51,8 +50,7 @@ fn bench_fastxpp(path: String) raises -> (Int, Int, Float64): fn bench_fastxpp_bpl(path: String) raises -> (Int, Int, Float64): - var fh = open(path, "r") - var rdr = FastxReader[read_comment=False](BufferedReader(FileReader(fh^))) + var rdr = FastxReader[read_comment=False](BufferedReader(GZFile(path, "r"))) var rec = 0 var seq = 0 var t0 = perf_counter() @@ -66,8 +64,7 @@ fn bench_fastxpp_bpl(path: String) raises -> (Int, Int, Float64): fn bench_fastxpp_swar(path: String) raises -> (Int, Int, Float64): - var fh = open(path, "r") - var rdr = FastxReader[read_comment=False](BufferedReader(FileReader(fh^))) + var rdr = FastxReader[read_comment=False](BufferedReader(GZFile(path, "r"))) var rec = 0 var seq = 0 var t0 = perf_counter() @@ -81,8 +78,7 @@ fn bench_fastxpp_swar(path: String) raises -> (Int, Int, Float64): fn bench_fastxpp_bpl2(path: String) raises -> (Int, Int, Float64): - var fh = open(path, "r") - var rdr = FastxReader[read_comment=False](BufferedReader(FileReader(fh^))) + var rdr = FastxReader[read_comment=False](BufferedReader(GZFile(path, "r"))) var rec = 0 var seq = 0 var t0 = perf_counter() From 9c4697c8c91072079c60eeb50951683d02e5c1c7 Mon Sep 17 00:00:00 2001 From: TalonTrecek Date: Fri, 25 Apr 2025 18:49:44 +0000 Subject: [PATCH 12/16] Add memcpy free method --- ishlib/vendor/kseq.mojo | 152 ++++++++++++++++++--------------- ishlib/vendor/swar_decode.mojo | 24 +++++- 2 files changed, 104 insertions(+), 72 deletions(-) diff --git a/ishlib/vendor/kseq.mojo b/ishlib/vendor/kseq.mojo index cdb2eb7..307b8e5 100644 --- a/ishlib/vendor/kseq.mojo +++ b/ishlib/vendor/kseq.mojo @@ -23,6 +23,7 @@ def main(): import sys from memory import UnsafePointer, memcpy from utils import StringSlice +from ishlib.vendor.swar_decode import decode from time.time import perf_counter @@ -37,75 +38,16 @@ alias ASCII_FASTQ_RECORD_START = ord("@") alias ASCII_FASTQ_SEPARATOR = ord("+") alias ASCII_ZERO = UInt8(ord("0")) -alias U8x8 = SIMD[DType.uint8, 8] -alias U32x8 = SIMD[DType.uint32, 8] - -# 8 ASCII digits -@always_inline -fn decode_8(ptr: UnsafePointer[UInt8]) -> Int: - var v = ptr.load[width=8](0) - ASCII_ZERO - var w = v.cast[DType.uint32]() - var mul = U32x8(10_000_000, 1_000_000, 100_000, 10_000, 1_000, 100, 10, 1) - return Int((w * mul).reduce_add()) - - -# 6 digits (still load 8 lanes) -@always_inline -fn decode_6(ptr: UnsafePointer[UInt8]) -> Int: - var v = ptr.load[width=8](0) - ASCII_ZERO - var w = v.cast[DType.uint32]() - var mul = U32x8(100_000, 10_000, 1_000, 100, 10, 1, 0, 0) - return Int((w * mul).reduce_add()) - - -# 7 digits: scalar last digit -@always_inline -fn decode_7(ptr: UnsafePointer[UInt8]) -> Int: - # Read the 7th digit at ptr[6] (offset 6) - var last_digit = Int(ptr.offset(6).load[width=1](0) - ASCII_ZERO) - # decode_6 handles ptr[0] through ptr[5] - return decode_6(ptr) * 10 + last_digit - - -# 9 digits: scalar last digit -@always_inline -fn decode_9(ptr: UnsafePointer[UInt8]) -> Int: - # Read the 9th digit at ptr[8] (offset 8) - var last_digit = Int(ptr.offset(8).load[width=1](0) - ASCII_ZERO) - # decode_8 handles ptr[0] through ptr[7] - return decode_8(ptr) * 10 + last_digit - - -# 3-digit fallback -@always_inline -fn decode_3(ptr: UnsafePointer[UInt8]) -> Int: - var d0 = Int(ptr.load[width=1](0) - ASCII_ZERO) - var d1 = Int(ptr.load[width=1](1) - ASCII_ZERO) - var d2 = Int(ptr.load[width=1](2) - ASCII_ZERO) - - return d0 * 100 + d1 * 10 + d2 - - -@always_inline -fn decode[size: UInt](bstr: Span[UInt8]) -> Int: - constrained[ - size >= 3 and size <= 9, "size outside allowed range of 3 to 9" - ]() - - @parameter - if size == 3: - return decode_3(bstr.unsafe_ptr()) - elif size == 6: - return decode_6(bstr.unsafe_ptr()) - elif size == 7: - return decode_7(bstr.unsafe_ptr()) - elif size == 8: - return decode_8(bstr.unsafe_ptr()) - elif size == 9: - return decode_9(bstr.unsafe_ptr()) - else: - return -1 +fn read_chunk(mut self) raises -> Span[UInt8]: + if self.start >= self.end: + self.start = 0 + self.end = self.reader.unbuffered_read(self.buffer.as_span()) + if self.end < 0: + raise IOError + var out = self.buffer.as_span()[self.start : self.end] + self.start = self.end # mark consumed + return out # ────────────────────────────────────────────────────────────── @@ -844,6 +786,76 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): return len(self.seq) + fn read_fastxpp_read_once(mut self) raises -> Int: + # ── 0 Locate the next header byte ('>' or '@') ────────────────────── + var marker: UInt8 + if self.last_char == 0: + var c = self.reader.read_byte() + while ( + c >= 0 + and c != ASCII_FASTA_RECORD_START + and c != ASCII_FASTQ_RECORD_START + ): + c = self.reader.read_byte() + if c < 0: + return Int(c) + marker = UInt8(c) + else: + marker = UInt8(self.last_char) + self.last_char = 0 + + # ── 1 Reset buffers reused across records ─────────────────────────── + self.seq.clear() + self.qual.clear() + self.comment.clear() + self.name.clear() + + # ── 2 Read the back‑tick header line -------------------------------- + var r = self.reader.read_until[SearchChar.Newline](self.name, 0, False) + if r < 0: + return Int(r) + + var hdr = self.name.as_span() + # ── 3 Find closing back‑tick and parse slen:lcnt:bpl ----------- + if len(hdr) == 0 or hdr[0] != UInt8(ord("`")): + print("ERROR: header lacks opening back-tick") + return -3 + + var slen = decode[9](hdr[1:10]) + var lcnt = decode[7](hdr[10:17]) + var bpl = decode[3](hdr[17:20]) + + if hdr[20] != UInt8(ord("`")): + print("ERROR: header lacks closing back-tick") + return -3 + + # ── 4 SEQUENCE block --------------------------------------- + self.seq.reserve(UInt32(slen)) + var copied: Int = 0 + while copied < slen: + var want = min(bpl - 1, slen - copied) + + # track length before and after + var before = len(self.seq) + var _want = want + var _total = self.reader.read_bytes(self.seq, _want) + if _total < 0: + print("ERROR: read_bytes returned error", _total) + return -3 + var got = Int(_total) - before # true delta + + if got != want: + return -3 + + copied += got + + # consume newline (or CRLF pair) + var nl = self.reader.read_byte() + if nl != ASCII_NEWLINE: + print("ERROR: expected newline after sequence chunk, found", nl) + return -3 + return slen + struct FileReader(KRead): var fh: FileHandle @@ -875,12 +887,12 @@ fn main() raises: BufferedReader(FileReader(fh^)) ) - var first = reader.read_fastxpp_swar() + var first = reader.read_fastxpp_read_once() print("first‑read returned", first) var count = 0 while True: - var n = reader.read_fastxpp_swar() + var n = reader.read_fastxpp_read_once() if n < 0: break count += 1 diff --git a/ishlib/vendor/swar_decode.mojo b/ishlib/vendor/swar_decode.mojo index 8e2e071..48fd011 100644 --- a/ishlib/vendor/swar_decode.mojo +++ b/ishlib/vendor/swar_decode.mojo @@ -1,10 +1,9 @@ -# This is for testing only - from memory import UnsafePointer from sys import exit alias U8x8 = SIMD[DType.uint8, 8] alias U32x8 = SIMD[DType.uint32, 8] +alias ASCII_ZERO = UInt8(ord("0")) # 8 ASCII digits @@ -53,6 +52,27 @@ fn decode_3(ptr: UnsafePointer[UInt8]) -> Int: return d0 * 100 + d1 * 10 + d2 +@always_inline +fn decode[size: UInt](bstr: Span[UInt8]) -> Int: + constrained[ + size >= 3 and size <= 9, "size outside allowed range of 3 to 9" + ]() + + @parameter + if size == 3: + return decode_3(bstr.unsafe_ptr()) + elif size == 6: + return decode_6(bstr.unsafe_ptr()) + elif size == 7: + return decode_7(bstr.unsafe_ptr()) + elif size == 8: + return decode_8(bstr.unsafe_ptr()) + elif size == 9: + return decode_9(bstr.unsafe_ptr()) + else: + return -1 + + # zero-pad an Int to a fixed width fn zpad(value: Int, width: Int) -> String: var s = String(value) From 8d56122a947819c50c36df758771ae220e8c59ac Mon Sep 17 00:00:00 2001 From: TalonTrecek Date: Mon, 28 Apr 2025 15:55:13 +0000 Subject: [PATCH 13/16] add read once test --- ishlib/formats/fastxpp_bench.mojo | 17 +++++++++++++++++ ishlib/vendor/kseq.mojo | 5 +++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/ishlib/formats/fastxpp_bench.mojo b/ishlib/formats/fastxpp_bench.mojo index a317bcf..572f339 100644 --- a/ishlib/formats/fastxpp_bench.mojo +++ b/ishlib/formats/fastxpp_bench.mojo @@ -77,6 +77,20 @@ fn bench_fastxpp_swar(path: String) raises -> (Int, Int, Float64): return (rec, seq, perf_counter() - t0) +fn bench_fastxpp_read_once(path: String) raises -> (Int, Int, Float64): + var rdr = FastxReader[read_comment=False](BufferedReader(GZFile(path, "r"))) + var rec = 0 + var seq = 0 + var t0 = perf_counter() + while True: + var n = rdr.read_fastxpp_read_once() + if n < 0: + break + rec += 1 + seq += n + return (rec, seq, perf_counter() - t0) + + fn bench_fastxpp_bpl2(path: String) raises -> (Int, Int, Float64): var rdr = FastxReader[read_comment=False](BufferedReader(GZFile(path, "r"))) var rec = 0 @@ -122,6 +136,9 @@ fn main() raises: print( "mode=fastxpp_swar records=", r, " bases=", s, " time=", t, "s" ) + elif mode == "read_once": + r, s, t = bench_fastxpp_read_once(path) + print("mode=read_once records=", r, " bases=", s, " time=", t, "s") elif mode == "filler": r, s, t = bench_fastxpp_bpl2(path) print( diff --git a/ishlib/vendor/kseq.mojo b/ishlib/vendor/kseq.mojo index 307b8e5..6b48b32 100644 --- a/ishlib/vendor/kseq.mojo +++ b/ishlib/vendor/kseq.mojo @@ -748,6 +748,7 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): print("ERROR: Opening backtick check failed. hdr[0]") return -3 # not a FASTX++ BPL header + # useful debugging # for i in range(len(hdr)): # var code = Int(hdr[i]) # print(i, hdr[i], code, chr(code)) @@ -822,7 +823,7 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): return -3 var slen = decode[9](hdr[1:10]) - var lcnt = decode[7](hdr[10:17]) + var lcnt = decode[7](hdr[10:17]) # not needed in this approach var bpl = decode[3](hdr[17:20]) if hdr[20] != UInt8(ord("`")): @@ -849,7 +850,7 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): copied += got - # consume newline (or CRLF pair) + # consume newline var nl = self.reader.read_byte() if nl != ASCII_NEWLINE: print("ERROR: expected newline after sequence chunk, found", nl) From b3b11d5bc3cd4d35b71f9f1f51cf54f5826675c7 Mon Sep 17 00:00:00 2001 From: TalonTrecek Date: Mon, 28 Apr 2025 15:55:53 +0000 Subject: [PATCH 14/16] remove toying around code --- ishlib/vendor/kseq.mojo | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/ishlib/vendor/kseq.mojo b/ishlib/vendor/kseq.mojo index 6b48b32..e0c39c9 100644 --- a/ishlib/vendor/kseq.mojo +++ b/ishlib/vendor/kseq.mojo @@ -39,17 +39,6 @@ alias ASCII_FASTQ_SEPARATOR = ord("+") alias ASCII_ZERO = UInt8(ord("0")) -fn read_chunk(mut self) raises -> Span[UInt8]: - if self.start >= self.end: - self.start = 0 - self.end = self.reader.unbuffered_read(self.buffer.as_span()) - if self.end < 0: - raise IOError - var out = self.buffer.as_span()[self.start : self.end] - self.start = self.end # mark consumed - return out - - # ────────────────────────────────────────────────────────────── # Helpers for reading in fastx++ files # ────────────────────────────────────────────────────────────── From 64620730d8b72adecda5d72cb4b0c9cbbe7d0dd7 Mon Sep 17 00:00:00 2001 From: TalonTrecek Date: Mon, 28 Apr 2025 15:58:00 +0000 Subject: [PATCH 15/16] cleanup --- ishlib/formats/fastxpp_bench.mojo | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/ishlib/formats/fastxpp_bench.mojo b/ishlib/formats/fastxpp_bench.mojo index 572f339..7481956 100644 --- a/ishlib/formats/fastxpp_bench.mojo +++ b/ishlib/formats/fastxpp_bench.mojo @@ -5,25 +5,6 @@ from ishlib.vendor.zlib import GZFile from ExtraMojo.utils.ir import dump_ir -# ── thin wrapper so FileHandle implements KRead ───────────────────────── -# struct FileReader(KRead): -# var fh: FileHandle - -# fn __init__(out self, owned fh: FileHandle): -# self.fh = fh^ - -# fn __moveinit__(out self, owned other: Self): -# self.fh = other.fh^ - -# fn unbuffered_read[ -# o: MutableOrigin -# ](mut self, buffer: Span[UInt8, o]) raises -> Int: -# return Int(self.fh.read(buffer.unsafe_ptr(), len(buffer))) - - -# ──────────────────────────────────────────────────────────────────────── - - fn bench_original(path: String) raises -> (Int, Int, Float64): var rdr = FastxReader[read_comment=False](BufferedReader(GZFile(path, "r"))) var rec = 0 From b7a8cd472ff3e5fb3599a87058337a585639f1fc Mon Sep 17 00:00:00 2001 From: TalonTrecek Date: Mon, 28 Apr 2025 21:44:55 +0000 Subject: [PATCH 16/16] Clean up --- ishlib/formats/fastxpp_bench.mojo | 59 ++++------ ishlib/vendor/kseq.mojo | 177 ++++-------------------------- 2 files changed, 42 insertions(+), 194 deletions(-) diff --git a/ishlib/formats/fastxpp_bench.mojo b/ishlib/formats/fastxpp_bench.mojo index 7481956..3899caf 100644 --- a/ishlib/formats/fastxpp_bench.mojo +++ b/ishlib/formats/fastxpp_bench.mojo @@ -16,27 +16,13 @@ fn bench_original(path: String) raises -> (Int, Int, Float64): return (rec, seq, perf_counter() - t0) -fn bench_fastxpp(path: String) raises -> (Int, Int, Float64): +fn bench_fastxpp_strip_newline(path: String) raises -> (Int, Int, Float64): var rdr = FastxReader[read_comment=False](BufferedReader(GZFile(path, "r"))) var rec = 0 var seq = 0 var t0 = perf_counter() while True: - var n = rdr.read_fastxpp() - if n < 0: - break - rec += 1 - seq += n - return (rec, seq, perf_counter() - t0) - - -fn bench_fastxpp_bpl(path: String) raises -> (Int, Int, Float64): - var rdr = FastxReader[read_comment=False](BufferedReader(GZFile(path, "r"))) - var rec = 0 - var seq = 0 - var t0 = perf_counter() - while True: - var n = rdr.read_fastxpp_bpl() + var n = rdr.read_fastxpp_strip_newline() if n < 0: break rec += 1 @@ -72,20 +58,6 @@ fn bench_fastxpp_read_once(path: String) raises -> (Int, Int, Float64): return (rec, seq, perf_counter() - t0) -fn bench_fastxpp_bpl2(path: String) raises -> (Int, Int, Float64): - var rdr = FastxReader[read_comment=False](BufferedReader(GZFile(path, "r"))) - var rec = 0 - var seq = 0 - var t0 = perf_counter() - while True: - var n = rdr.read_fastxpp_bpl() - if n < 0: - break - rec += 1 - seq += n - return (rec, seq, perf_counter() - t0) - - fn main() raises: var argv = sys.argv() if len(argv) < 2 or len(argv) > 3: @@ -102,15 +74,16 @@ fn main() raises: print( "mode=orig records=", r, " bases=", s, " time=", t, "s" ) - elif mode == "bpl": - r, s, t = bench_fastxpp_bpl(path) + elif mode == "strip_newline": + r, s, t = bench_fastxpp_strip_newline(path) print( - "mode=fastxpp_bpl records=", r, " bases=", s, " time=", t, "s" - ) - elif mode == "fastxpp": - r, s, t = bench_fastxpp(path) - print( - "mode=fastxpp records=", r, " bases=", s, " time=", t, "s" + "mode=read_fastxpp_strip_newline records=", + r, + " bases=", + s, + " time=", + t, + "s", ) elif mode == "swar": r, s, t = bench_fastxpp_swar(path) @@ -121,9 +94,15 @@ fn main() raises: r, s, t = bench_fastxpp_read_once(path) print("mode=read_once records=", r, " bases=", s, " time=", t, "s") elif mode == "filler": - r, s, t = bench_fastxpp_bpl2(path) + r, s, t = bench_fastxpp_read_once(path) print( - "mode=fastxpp_bpl records=", r, " bases=", s, " time=", t, "s" + "mode=bench_fastxpp_read_once records=", + r, + " bases=", + s, + " time=", + t, + "s", ) else: print("Unknown mode:", mode) diff --git a/ishlib/vendor/kseq.mojo b/ishlib/vendor/kseq.mojo index e0c39c9..5116eae 100644 --- a/ishlib/vendor/kseq.mojo +++ b/ishlib/vendor/kseq.mojo @@ -512,10 +512,9 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): return -2 # error: qual string is of different length return len(self.seq) - fn read_fastxpp(mut self) raises -> Int: - # Locate next header + fn read_fastxpp_strip_newline(mut self) raises -> Int: + # ── 0 Locate the next header byte ('>' or '@') ────────────────────── var marker: UInt8 - if self.last_char == 0: var c = self.reader.read_byte() while ( @@ -525,92 +524,11 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): ): c = self.reader.read_byte() if c < 0: - # print("[DBG] EOF before header, c=", c) + # EOF or stream error return Int(c) - marker = UInt8(c) # NEW – store the marker - else: - marker = UInt8(self.last_char) # NEW - var c = self.last_char - self.last_char = 0 - - # Clear buffers for this record - self.seq.clear() - self.qual.clear() - self.comment.clear() - self.name.clear() - - # Read header line (without copying '\n') - var r = self.reader.read_until[SearchChar.Newline](self.name, 0, False) - if r < 0: - return Int(r) - - var hdr = self.name.as_span() - if len(hdr) == 0 or hdr[0] != UInt8(ord("`")): - return -3 # not FASTX++ header - - # Find closing back‑tick with one memchr - var after = Span[UInt8, __origin_of(hdr)]( - ptr=hdr.unsafe_ptr().offset(1), length=len(hdr) - 1 - ) - var rel = memchr(after, UInt8(ord("`"))) - if rel == -1: - return -3 - var pos2 = rel + 1 - - # Parse hlen, slen, lcnt in one pass - var hlen: Int = 0 - var slen: Int = 0 - var lcnt: Int = 0 - var cur: Int = 0 - var which: Int = 0 - for i in range(1, pos2): - var ch = hdr[i] - if ch == UInt8(ord(":")): - if which == 0: - hlen = cur - elif which == 1: - slen = cur - cur = 0 - which += 1 - else: - cur = cur * 10 + Int(ch - ASCII_ZERO) - lcnt = cur - - # Validate header length and resize name - if len(hdr) - (pos2 + 1) != hlen: - return -3 - self.name.resize(hlen) - - # ── Sequence block ──────────────────────────────────────────── - var disk = slen + lcnt # bytes on disk (bases+LFs) - var need = disk # local copy; will be mutated - self.seq.clear() - self.seq.reserve(need) - var got = self.reader.read_bytes(self.seq, need) - if got != disk: - return -3 - if not strip_newlines_in_place(self.seq, disk, slen): - return -2 - - return len(self.seq) - - fn read_fastxpp_bpl(mut self) raises -> Int: - # ── 0 Locate the next header byte ('>' or '@') ────────────────────── - var marker: UInt8 # remember which flavour we’re on - if self.last_char == 0: - var c = self.reader.read_byte() - while ( - c >= 0 - and c != ASCII_FASTA_RECORD_START - and c != ASCII_FASTQ_RECORD_START - ): - c = self.reader.read_byte() - if c < 0: - return Int(c) # EOF / stream error (-1 / -2) marker = UInt8(c) else: marker = UInt8(self.last_char) - var c = self.last_char self.last_char = 0 # ── 1 Reset buffers reused across records ─────────────────────────── @@ -626,80 +544,31 @@ struct FastxReader[R: KRead, read_comment: Bool = True](Movable): var hdr = self.name.as_span() if len(hdr) == 0 or hdr[0] != UInt8(ord("`")): - return -3 # not a FASTX++ BPL header + # We need at least 21 bytes: 1 backtick + 9 + 7 + 3 + 1 backtick + return -3 # Not a proper FASTX++ header - # ── 3 Find closing back‑tick and parse hlen:slen:lcnt:bpl ----------- - var after = Span[UInt8, __origin_of(hdr)]( - ptr=hdr.unsafe_ptr().offset(1), length=len(hdr) - 1 - ) - var rel = memchr(after, UInt8(ord("`"))) - if rel == -1: - return -3 - var pos2 = rel + 1 # index of 2nd back‑tick - - var hlen: Int = 0 - var slen: Int = 0 - var lcnt: Int = 0 - var bpl: Int = 0 - var cur: Int = 0 - var which: Int = 0 - for i in range(1, pos2): - var ch = hdr[i] - if ch == UInt8(ord(":")): - if which == 0: - hlen = cur - elif which == 1: - slen = cur - elif which == 2: - lcnt = cur - cur = 0 - which += 1 - else: - cur = cur * 10 + Int(ch - ASCII_ZERO) - bpl = cur # fourth number - - # Validate and keep only the header - # var imputed_len = len(hdr) - (pos2 + 1) - # if imputed_len != hlen: - # return -3 - memcpy(self.name.ptr, self.name.ptr.offset(pos2 + 1), hlen) - self.name.resize(hlen) + # ── 3 Decode slen:lcnt:bpl from the fixed fields -------------------- + var slen = decode[9](hdr[1:10]) # 9‑digit field at positions [1..9] + var lcnt = decode[7](hdr[10:17]) # 7‑digit field at positions [10..16] + var bpl = decode[3](hdr[17:20]) # 3‑digit field at positions [17..19] - # ── 4 SEQUENCE block --------------------------------------- - var disk_seq = slen + lcnt # immutable reference - var rest_seq = disk_seq # mutable copy for read_bytes + # Confirm the second backtick is at hdr[20] + if hdr[20] != UInt8(ord("`")): + return -3 + # ── 4 Read the sequence block (slen + lcnt bytes on disk) ----------- + var disk_seq = slen + lcnt self.seq.reserve(disk_seq) - var got_seq = self.reader.read_bytes(self.seq, rest_seq) + var _disk_seq = disk_seq + var got_seq = self.reader.read_bytes(self.seq, _disk_seq) if got_seq != disk_seq: return -3 # truncated record - # compact in‑place: copy (bpl‑1) bases, skip the LF, repeat - var write_pos: Int = 0 - var read_pos: Int = 0 - while read_pos < disk_seq: - memcpy( - self.seq.addr(write_pos), # destination - self.seq.addr(read_pos), # source - bpl - 1, - ) # copy only the bases - write_pos += bpl - 1 - read_pos += bpl # jump over the LF - self.seq.resize(write_pos) # write_pos == slen - # This is cold code that seems to preven inlining, it improve performance. - if 1 == False: - print( - "ERROR: Sequence read failed. got_seq != disk_seq", - "write_pos", - write_pos, - "disk_seq", - disk_seq, - "bpl", - bpl, - "slen", - len(self.seq), - ) - pass + # ── 5 Remove newline characters in‑place using the helper ----------- + var ok = strip_newlines_in_place(self.seq, disk_seq, slen) + if not ok: + return -2 # mismatch: not the expected base count + return len(self.seq) fn read_fastxpp_swar(mut self) raises -> Int: @@ -877,12 +746,12 @@ fn main() raises: BufferedReader(FileReader(fh^)) ) - var first = reader.read_fastxpp_read_once() + var first = reader.read_fastxpp() print("first‑read returned", first) var count = 0 while True: - var n = reader.read_fastxpp_read_once() + var n = reader.read_fastxpp() if n < 0: break count += 1