Skip to content

Commit 75562a3

Browse files
committed
Add functions encode! and encode_append!
These functions encodes from one AbstractCIGAR type to another, writing into a MemoryView, or appending to a Vector. These functions are now the more general primitives used to build one cigar from another.
1 parent a31518f commit 75562a3

6 files changed

Lines changed: 283 additions & 72 deletions

File tree

AGENTS.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@ This file provides guidance to AI agents when working with code in this reposito
66

77
```bash
88
# Run all tests
9-
JULIA_TEST_FAILFAST=true julia --project -e 'using Pkg; Pkg.test()'
9+
JULIA_TEST_FAILFAST=true julia --project=. --startup=no -e 'using Pkg; Pkg.test()'
1010

1111
# Run tests with specific test file
12-
JULIA_TEST_FAILFAST=true julia --project test/runtests.jl
12+
JULIA_TEST_FAILFAST=true julia --project=. --startup=no test/runtests.jl
1313

1414
# Run Julia REPL with project activated
15-
julia --project
15+
julia --project --startup=no
1616

1717
# Generate documentation locally
18-
julia --project=docs docs/make.jl
18+
julia --project=docs --startup=no docs/make.jl
1919
```
2020

2121
## Architecture

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "CIGARStrings"
22
uuid = "e5b51f81-6ffd-4e20-bbec-f118e37ea906"
3-
version = "0.1.7"
3+
version = "0.1.8"
44
authors = ["Jakob Nybo Nissen <jakobnybonissen@gmail.com>"]
55

66
[deps]

docs/src/reference.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ CIGARStrings.try_parse
2222

2323
```@docs
2424
unsafe_switch_memory
25+
encode!
26+
encode_append!
2527
OP_M
2628
OP_I
2729
OP_D

src/CIGARStrings.jl

Lines changed: 164 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ export CIGAR,
77
aln_to_query, aln_to_ref, Translation, count_matches,
88
BAMCIGAR, AbstractCIGAR, cigar_view!, ref, query, aln, pos_to_pos,
99
unsafe_switch_memory, is_compatible,
10-
normalize, unsafe_normalize, normalize!
10+
normalize, unsafe_normalize, normalize!,
11+
encode!, encode_append!
1112

1213
public CIGARError, CIGARErrorType, Errors, try_parse, outside, pos, gap,
1314
TranslationKind, PositionMapper
@@ -268,6 +269,166 @@ Base.eltype(::Type{<:AbstractCIGAR}) = CIGARElement
268269
include("bytecigar.jl")
269270
include("bamcigar.jl")
270271

272+
"""
273+
encode!(
274+
mem::MutableMemoryView{UInt8},
275+
::Type{T <: AbstractCIGAR},
276+
cigar::AbstractCIGAR
277+
)::T
278+
279+
Encode `cigar` as type `T` into the beginning of `mem`,
280+
and return the result as a new `T` backed by `mem`.
281+
282+
Throw a `BoundsError` if `mem` is too short to contain the whole encoding.
283+
284+
!!! warning
285+
Since the first bytes of `mem` are used to back the newly created cigar,
286+
mutating `mem` after this function is being called will invalidate
287+
the created cigar.
288+
289+
# Examples
290+
```jldoctest
291+
julia> c = CIGAR("3S5M1D19X2I6M3H");
292+
293+
julia> mem = MemoryView(zeros(UInt8, 60));
294+
295+
julia> bc = encode!(mem, BAMCIGAR, c)
296+
BAMCIGAR(CIGAR("3S5M1D19X2I6M3H"))
297+
298+
julia> n_bytes = length(MemoryView(bc));
299+
300+
julia> ImmutableMemoryView(mem)[1:n_bytes] === MemoryView(bc)
301+
true
302+
```
303+
"""
304+
function encode!(mem::MutableMemoryView{UInt8}, ::Type{BAMCIGAR}, cigar::BAMCIGAR)
305+
src_mem = MemoryView(cigar)
306+
@boundscheck checkbounds(mem, eachindex(src_mem))
307+
@inbounds copyto!(mem, src_mem)
308+
dst = @inbounds ImmutableMemoryView(mem)[eachindex(src_mem)]
309+
return BAMCIGAR(unsafe, dst, cigar.aln_len, cigar.ref_len, cigar.query_len)
310+
end
311+
312+
function encode!(mem::MutableMemoryView{UInt8}, ::Type{CIGAR}, cigar::CIGAR)
313+
src_mem = MemoryView(cigar)
314+
@boundscheck checkbounds(mem, eachindex(src_mem))
315+
@inbounds copyto!(mem, src_mem)
316+
dst = @inbounds ImmutableMemoryView(mem)[eachindex(src_mem)]
317+
return CIGAR(unsafe, dst, cigar.n_ops, cigar.aln_len, cigar.ref_len, cigar.query_len)
318+
end
319+
320+
function encode!(mem::MutableMemoryView{UInt8}, ::Type{BAMCIGAR}, cigar::CIGAR)
321+
nbytes = 4 * length(cigar)
322+
@boundscheck checkbounds(mem, 1:nbytes)
323+
mem = @inbounds mem[1:nbytes]
324+
GC.@preserve mem begin
325+
ptr = Ptr{UInt32}(pointer(mem))
326+
for element in cigar
327+
u = htol(getfield(element, :x))
328+
unsafe_store!(ptr, u)
329+
ptr += 4
330+
end
331+
end
332+
return BAMCIGAR(unsafe, ImmutableMemoryView(mem), cigar.aln_len, cigar.ref_len, cigar.query_len)
333+
end
334+
335+
function encode!(mem::MutableMemoryView{UInt8}, ::Type{CIGAR}, cigar::BAMCIGAR)
336+
len = 0
337+
for element in cigar
338+
n = element.len % UInt32
339+
n_digits = 0
340+
while !iszero(n)
341+
(n, r) = divrem(n, UInt32(10))
342+
mem[len += 1] = r + 0x30
343+
n_digits += 1
344+
end
345+
if n_digits > 1
346+
@inbounds for digit in 1:(n_digits >>> 1)
347+
a = len - n_digits + digit
348+
b = len - digit + 1
349+
mem[a], mem[b] = mem[b], mem[a]
350+
end
351+
end
352+
shift = (7 * (getfield(element, :x) & 0x0f)) & 63
353+
byte = ((CIGAR_BYTE_LUT >> shift) % UInt8) & 0x7f
354+
mem[len += 1] = byte
355+
end
356+
result_mem = @inbounds ImmutableMemoryView(mem)[1:len]
357+
return CIGAR(unsafe, result_mem, length(cigar) % UInt32, cigar.aln_len, cigar.ref_len, cigar.query_len)
358+
end
359+
360+
"""
361+
encode_append!(
362+
v::Vector{UInt8},
363+
::Type{T <: AbstractCIGAR},
364+
cigar::AbstractCIGAR
365+
)::T
366+
367+
Encode `cigar` as type `T` by appending its representation to `v`,
368+
and return the result as a new `T` backed by `v`'s memory.
369+
370+
!!! warning
371+
Since the returned cigar will use `v` as backing memory,
372+
mutating `v` after this function has been called will invalidate
373+
the created cigar.
374+
375+
# Examples
376+
```jldoctest
377+
julia> c = CIGAR("3S5M1D19X2I6M3H");
378+
379+
julia> v = UInt8[1, 2, 3, 4];
380+
381+
julia> bc = encode_append!(v, BAMCIGAR, c)
382+
BAMCIGAR(CIGAR("3S5M1D19X2I6M3H"))
383+
384+
julia> v[1:4] == 1:4 # unchanged
385+
true
386+
387+
julia> MemoryView(bc) === ImmutableMemoryView(v)[5:end]
388+
true
389+
```
390+
"""
391+
function encode_append!(v::Vector{UInt8}, ::Type{T}, cigar::T) where {T <: Union{CIGAR, BAMCIGAR}}
392+
src_mem = MemoryView(cigar)
393+
old_len = length(v)
394+
resize!(v, old_len + length(src_mem))
395+
dst_mem = @inbounds MemoryView(v)[(old_len + 1):end]
396+
return @inbounds encode!(dst_mem, T, cigar)
397+
end
398+
399+
function encode_append!(v::Vector{UInt8}, ::Type{BAMCIGAR}, cigar::CIGAR)
400+
old_len = length(v)
401+
resize!(v, old_len + 4 * length(cigar))
402+
dst_mem = @inbounds MemoryView(v)[(old_len + 1):end]
403+
return @inbounds encode!(dst_mem, BAMCIGAR, cigar)
404+
end
405+
406+
function encode_append!(v::Vector{UInt8}, ::Type{CIGAR}, cigar::BAMCIGAR)
407+
old_len = length(v)
408+
sizehint!(v, old_len + 2 * length(cigar); shrink = false)
409+
for element in cigar
410+
n = element.len % UInt32
411+
n_digits = 0
412+
while !iszero(n)
413+
(n, r) = divrem(n, UInt32(10))
414+
push!(v, r + 0x30)
415+
n_digits += 1
416+
end
417+
if n_digits > 1
418+
len = length(v)
419+
@inbounds for digit in 1:(n_digits >>> 1)
420+
a = len - n_digits + digit
421+
b = len - digit + 1
422+
v[a], v[b] = v[b], v[a]
423+
end
424+
end
425+
shift = (7 * (getfield(element, :x) & 0x0f)) & 63
426+
push!(v, ((CIGAR_BYTE_LUT >> shift) % UInt8) & 0x7f)
427+
end
428+
result_mem = @inbounds ImmutableMemoryView(v)[(old_len + 1):end]
429+
return CIGAR(unsafe, result_mem, length(cigar) % UInt32, cigar.aln_len, cigar.ref_len, cigar.query_len)
430+
end
431+
271432
const CONSUMES = let
272433
x = UInt32(0)
273434
for query in [OP_M, OP_I, OP_S, OP_H, OP_Eq, OP_X] # not PDN
@@ -528,12 +689,12 @@ function normalize end
528689
"""
529690
normalize!(cigar::T, mem::MutableMemoryView{UInt8})::T where {T <: AbstractCIGAR}
530691
531-
Same as [`normalize`](@ref), but uses allocaiton of `mem` instead of allocating
692+
Same as [`normalize`](@ref), but uses allocation of `mem` instead of allocating
532693
a new array.
533694
534695
Throws a `BoundsError` if `mem` cannot hold the normalized cigar.
535696
A cigar after normalization is guaranteed to use at most as much memory
536-
as cigar before normalization, so if `length(mem) ≥ length(MemoryViews(cigar))`,
697+
as cigar before normalization, so if `length(mem) ≥ length(MemoryView(cigar))`,
537698
a `BoundsError` cannot happen.
538699
539700
!!! warning

src/bamcigar.jl

Lines changed: 23 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -74,23 +74,18 @@ end
7474
end
7575
end
7676

77-
CIGAR(x::BAMCIGAR) = CIGAR(x, UInt8[])
78-
function CIGAR(x::BAMCIGAR, v::Vector{UInt8})
79-
return CIGAR(
80-
unsafe,
81-
cigar_view!(v, x),
82-
length(x) % UInt32,
83-
x.aln_len,
84-
x.ref_len,
85-
x.query_len,
86-
)
87-
end
77+
CIGAR(x::BAMCIGAR) = encode_append!(UInt8[], CIGAR, x)
78+
79+
@deprecate CIGAR(x::BAMCIGAR, v::Vector{UInt8}) encode_append!(empty!(v), CIGAR, x)
8880

8981
MemoryViews.MemoryView(x::BAMCIGAR) = x.mem
9082

9183
"""
9284
cigar_view!(v::Vector{UInt8}, x::BAMCIGAR)::ImmutableMemoryView{UInt8}
9385
86+
!!! warning
87+
This function is DEPRECATED in favor of `encode_append!`
88+
9489
Write the ASCII (i.e. `CIGAR`) representation `x` into `v`,
9590
emptying `v`'s original content.
9691
A memory view of `v` is returned:
@@ -110,42 +105,20 @@ julia> String(mem_view) == string(CIGAR(bc))
110105
true
111106
```
112107
"""
113-
function cigar_view!(v::Vector{UInt8}, x::BAMCIGAR)
114-
# Sizehint to minimum size to reduce further reallocations
115-
sizehint!(v, 2 * length(x); shrink=false)
116-
empty!(v)
117-
for element in x
118-
n = element.len % UInt32
119-
n_digits = 0
120-
while !iszero(n)
121-
(n, r) = divrem(n, UInt32(10))
122-
push!(v, r + 0x30)
123-
n_digits += 1
124-
end
125-
if n_digits > 1
126-
@inbounds for i in 1:(n_digits >>> 1)
127-
a = lastindex(v) - i + 1
128-
b = lastindex(v) - n_digits + i
129-
v[a], v[b] = v[b], v[a]
130-
end
131-
end
132-
shift = (7 * (getfield(element, :x) & 0x0f)) & 63
133-
byte = ((CIGAR_BYTE_LUT >> shift) % UInt8) & 0x7f
134-
push!(v, byte)
135-
end
136-
return ImmutableMemoryView(v)
137-
end
108+
function cigar_view! end
138109

139-
BAMCIGAR(x::CIGAR) = BAMCIGAR(x, UInt8[])
110+
@deprecate cigar_view!(v::Vector{UInt8}, x::BAMCIGAR) MemoryView(encode_append!(empty!(v), CIGAR, x))
140111

141-
function BAMCIGAR(x::CIGAR, v::Vector{UInt8})
142-
resize!(v, 4 * length(x))
143-
return @inbounds BAMCIGAR(MemoryView(v), x)
144-
end
112+
BAMCIGAR(x::CIGAR) = encode_append!(UInt8[], BAMCIGAR, x)
113+
114+
@deprecate BAMCIGAR(x::CIGAR, v::Vector{UInt8}) encode_append!(empty!(v), BAMCIGAR, x)
145115

146116
"""
147117
BAMCIGAR(mem::MutableMemoryView{UInt8}, x::CIGAR)::BAMCIGAR
148118
119+
!!! warning
120+
This function is DEPRECATED. Use `encode!(mem, BAMCIGAR, x)`
121+
149122
Construct a `BAMCIGAR` equal to `x`, using the memory `mem`.
150123
After calling this, `mem` may not be mutated, and is considered
151124
owned by the resulting `BAMCIGAR`.
@@ -165,30 +138,16 @@ julia> parent(MemoryView(cigar)) === parent(mem)
165138
true
166139
```
167140
"""
168-
function BAMCIGAR(mem::MutableMemoryView{UInt8}, x::CIGAR)
169-
@boundscheck if length(mem) < 4 * length(x)
170-
throw(BoundsError(mem, 4 * length(x)))
171-
end
172-
mem = @inbounds mem[1:(4 * length(x))]
173-
i = 1
174-
for element in x
175-
u = getfield(element, :x)
176-
for _ in 1:4
177-
@inbounds mem[i] = u % UInt8
178-
i += 1
179-
u >>= 8
180-
end
181-
end
182-
return BAMCIGAR(
183-
unsafe,
184-
ImmutableMemoryView(mem),
185-
x.aln_len,
186-
x.ref_len,
187-
x.query_len
188-
)
189-
end
141+
BAMCIGAR(::MutableMemoryView{UInt8}, ::CIGAR)
190142

191-
Base.print(io::IO, x::BAMCIGAR) = (write(io, cigar_view!(UInt8[], x)); nothing)
143+
@deprecate BAMCIGAR(mem::MutableMemoryView{UInt8}, x::CIGAR) encode!(mem, BAMCIGAR, x)
144+
145+
function Base.print(io::IO, x::BAMCIGAR)
146+
v = UInt8[]
147+
encode_append!(v, CIGAR, x)
148+
write(io, v)
149+
return nothing
150+
end
192151

193152
function try_parse(::Type{BAMCIGAR}, x)::Union{CIGARError, BAMCIGAR}
194153
mem = ImmutableMemoryView(x)::ImmutableMemoryView{UInt8}

0 commit comments

Comments
 (0)