Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 14 additions & 10 deletions src/decompress.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ end

function decompress_bytes(data, compression::CompressionType; expected_bytes::Union{Nothing, Int} = nothing)
compression == NoCompression && return data
@assert compression in (GzipCompression, RLECompression)
compression in (GzipCompression, RLECompression) ||
throw(ArgumentError("unsupported compression: $compression"))
result = if compression == GzipCompression
decompressor = Decompressor()
input = convert(Vector{UInt8}, data)
Expand All @@ -44,15 +45,18 @@ end
function decompress_bytes!(decompressor, dest, doffs, src::AbstractVector{UInt8}, soffs, N, n_in, compression::CompressionType)
if compression == NoCompression
_copy_to!(dest, doffs, src, soffs, N)
return
end
@assert compression in (GzipCompression, RLECompression)
n_out = N * sizeof(eltype(dest))
out_ptr = pointer(dest, doffs)
in_ptr = pointer(src, soffs)
return if compression == GzipCompression
out = _unsafe_gzip_decompress!(decompressor, out_ptr, n_out, in_ptr, n_in)
@assert !(out isa LibDeflateError) out
elseif compression == GzipCompression
n_out = N * sizeof(eltype(dest))
GC.@preserve dest src begin
out = _unsafe_gzip_decompress!(decompressor, pointer(dest, doffs), n_out, pointer(src, soffs), n_in)
out isa LibDeflateError && throw(ArgumentError("gzip decompression failed: $out"))
end
elseif compression == RLECompression
n_out = N * sizeof(eltype(dest))
out = _rle_decompress(view(src, soffs:(soffs + n_in - 1)), n_out)
_copy_to!(dest, doffs, out, 1, N)
else
throw(ArgumentError("unsupported variable compression: $compression"))
end
return
end
60 changes: 60 additions & 0 deletions test/decompress_test.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Variable-level (CVVR) decompression. No CDF writer in the ecosystem emits
# RLE-compressed variables (cdflib/pycdf are gzip-only), so build CVVRs by hand.
using CommonDataFormat: load_cvvr_data!, decompress_bytes!, Decompressor,
RLECompression, HuffmanCompression, NoCompression

# CDF RLE: 0x00 followed by (run_length - 1); other bytes literal
function rle_compress(bytes)
out = UInt8[]
i = firstindex(bytes)
while i <= lastindex(bytes)
if bytes[i] == 0x00
run = 1
while i + run <= lastindex(bytes) && bytes[i + run] == 0x00 && run < 256
run += 1
end
push!(out, 0x00, UInt8(run - 1))
i += run
else
push!(out, bytes[i])
i += 1
end
end
return out
end

# CVVR layout (v3, Int64 record size): [record_size 8][type=13 4][rfu 4][cSize 8][data]
function make_cvvr(payload)
buf = zeros(UInt8, 24 + length(payload))
buf[1:8] .= reinterpret(UInt8, [hton(Int64(length(buf)))])
buf[9:12] .= reinterpret(UInt8, [hton(Int32(13))])
buf[17:24] .= reinterpret(UInt8, [hton(Int64(length(payload)))])
buf[25:end] .= payload
return buf
end

@testset "RLE compressed variable records" begin
data = Float64[0.0, 1.0, 0.0, 0.0, 2.5, 0.0, 0.0, 0.0, 3.0]
raw = collect(reinterpret(UInt8, data))
payload = rle_compress(raw)
@test length(payload) < length(raw) # zeros actually compressed
buf = make_cvvr(payload)

dest = Vector{Float64}(undef, length(data))
load_cvvr_data!(dest, 1, buf, 0, length(data), Int64, RLECompression)
@test dest == data

# long zero run crossing the 256-byte chunk limit
data2 = zeros(UInt8, 1000)
data2[513] = 0x7f
buf2 = make_cvvr(rle_compress(data2))
dest2 = Vector{UInt8}(undef, 1000)
load_cvvr_data!(dest2, 1, buf2, 0, 1000, Int64, RLECompression)
@test dest2 == data2
end

@testset "unsupported variable compression" begin
src = zeros(UInt8, 16)
dest = Vector{Float64}(undef, 1)
@test_throws ArgumentError decompress_bytes!(Decompressor(), dest, 1, src, 1, 1, 8, HuffmanCompression)
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ include("epochs_test.jl")
include("comprehensive_test.jl")
include("cdf2_test.jl")
include("CommonDataModelExt_test.jl")
include("decompress_test.jl")
@testset "StaticString" begin
include("staticstring.jl")
end
Expand Down
Loading