From 9f2791efebdcee41e21cdcdf21f2c8b3142094f7 Mon Sep 17 00:00:00 2001 From: Beforerr Date: Thu, 11 Jun 2026 08:59:05 -0700 Subject: [PATCH] fix: RLE decompression for compressed variable records decompress_bytes! had an empty RLE branch: RLE-compressed CVVRs silently left dest uninitialized. Route RLE through _rle_decompress (output size is known from the read request) and throw ArgumentError for unsupported codecs (Huffman/AHUFF) instead of @assert. No CDF writer in the ecosystem emits variable-level RLE (cdflib/pycdf fixtures are gzip-only, which is how this went untested), so tests build CVVRs by hand. Co-Authored-By: Claude Fable 5 --- src/decompress.jl | 24 ++++++++++------- test/decompress_test.jl | 60 +++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 1 + 3 files changed, 75 insertions(+), 10 deletions(-) create mode 100644 test/decompress_test.jl diff --git a/src/decompress.jl b/src/decompress.jl index ab728c2..757efed 100644 --- a/src/decompress.jl +++ b/src/decompress.jl @@ -22,7 +22,8 @@ end function decompress_bytes(data, compression::CompressionType; expected_bytes::Union{Nothing, Int} = nothing) compression == NoCompression && return data - @assert compression in (GzipCompression, RLECompression) + compression in (GzipCompression, RLECompression) || + throw(ArgumentError("unsupported compression: $compression")) result = if compression == GzipCompression decompressor = Decompressor() input = convert(Vector{UInt8}, data) @@ -44,15 +45,18 @@ end function decompress_bytes!(decompressor, dest, doffs, src::AbstractVector{UInt8}, soffs, N, n_in, compression::CompressionType) if compression == NoCompression _copy_to!(dest, doffs, src, soffs, N) - return - end - @assert compression in (GzipCompression, RLECompression) - n_out = N * sizeof(eltype(dest)) - out_ptr = pointer(dest, doffs) - in_ptr = pointer(src, soffs) - return if compression == GzipCompression - out = _unsafe_gzip_decompress!(decompressor, out_ptr, n_out, in_ptr, n_in) - @assert !(out isa LibDeflateError) out + elseif compression == GzipCompression + n_out = N * sizeof(eltype(dest)) + GC.@preserve dest src begin + out = _unsafe_gzip_decompress!(decompressor, pointer(dest, doffs), n_out, pointer(src, soffs), n_in) + out isa LibDeflateError && throw(ArgumentError("gzip decompression failed: $out")) + end elseif compression == RLECompression + n_out = N * sizeof(eltype(dest)) + out = _rle_decompress(view(src, soffs:(soffs + n_in - 1)), n_out) + _copy_to!(dest, doffs, out, 1, N) + else + throw(ArgumentError("unsupported variable compression: $compression")) end + return end diff --git a/test/decompress_test.jl b/test/decompress_test.jl new file mode 100644 index 0000000..a6f7e7f --- /dev/null +++ b/test/decompress_test.jl @@ -0,0 +1,60 @@ +# Variable-level (CVVR) decompression. No CDF writer in the ecosystem emits +# RLE-compressed variables (cdflib/pycdf are gzip-only), so build CVVRs by hand. +using CommonDataFormat: load_cvvr_data!, decompress_bytes!, Decompressor, + RLECompression, HuffmanCompression, NoCompression + +# CDF RLE: 0x00 followed by (run_length - 1); other bytes literal +function rle_compress(bytes) + out = UInt8[] + i = firstindex(bytes) + while i <= lastindex(bytes) + if bytes[i] == 0x00 + run = 1 + while i + run <= lastindex(bytes) && bytes[i + run] == 0x00 && run < 256 + run += 1 + end + push!(out, 0x00, UInt8(run - 1)) + i += run + else + push!(out, bytes[i]) + i += 1 + end + end + return out +end + +# CVVR layout (v3, Int64 record size): [record_size 8][type=13 4][rfu 4][cSize 8][data] +function make_cvvr(payload) + buf = zeros(UInt8, 24 + length(payload)) + buf[1:8] .= reinterpret(UInt8, [hton(Int64(length(buf)))]) + buf[9:12] .= reinterpret(UInt8, [hton(Int32(13))]) + buf[17:24] .= reinterpret(UInt8, [hton(Int64(length(payload)))]) + buf[25:end] .= payload + return buf +end + +@testset "RLE compressed variable records" begin + data = Float64[0.0, 1.0, 0.0, 0.0, 2.5, 0.0, 0.0, 0.0, 3.0] + raw = collect(reinterpret(UInt8, data)) + payload = rle_compress(raw) + @test length(payload) < length(raw) # zeros actually compressed + buf = make_cvvr(payload) + + dest = Vector{Float64}(undef, length(data)) + load_cvvr_data!(dest, 1, buf, 0, length(data), Int64, RLECompression) + @test dest == data + + # long zero run crossing the 256-byte chunk limit + data2 = zeros(UInt8, 1000) + data2[513] = 0x7f + buf2 = make_cvvr(rle_compress(data2)) + dest2 = Vector{UInt8}(undef, 1000) + load_cvvr_data!(dest2, 1, buf2, 0, 1000, Int64, RLECompression) + @test dest2 == data2 +end + +@testset "unsupported variable compression" begin + src = zeros(UInt8, 16) + dest = Vector{Float64}(undef, 1) + @test_throws ArgumentError decompress_bytes!(Decompressor(), dest, 1, src, 1, 1, 8, HuffmanCompression) +end diff --git a/test/runtests.jl b/test/runtests.jl index 88ab575..097ba19 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,6 +7,7 @@ include("epochs_test.jl") include("comprehensive_test.jl") include("cdf2_test.jl") include("CommonDataModelExt_test.jl") +include("decompress_test.jl") @testset "StaticString" begin include("staticstring.jl") end