JuliaSpacePhysics · Beforerr · Jun 10, 2026 · Jun 5, 2026
diff --git a/README.md b/README.md
@@ -1,26 +1,18 @@
 # CommonDataFormat.jl
 
+[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://juliaspacephysics.github.io/CommonDataFormat.jl/dev/)
 [![DOI](https://zenodo.org/badge/1057373325.svg)](https://doi.org/10.5281/zenodo.17517061)
 [![version](https://juliahub.com/docs/General/CommonDataFormat/stable/version.svg)](https://juliahub.com/ui/Packages/General/CommonDataFormat)
 
 [![Build Status](https://github.com/JuliaSpacePhysics/CommonDataFormat.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/JuliaSpacePhysics/CommonDataFormat.jl/actions/workflows/CI.yml?query=branch%3Amain)
 [![Coverage](https://codecov.io/gh/JuliaSpacePhysics/CommonDataFormat.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/JuliaSpacePhysics/CommonDataFormat.jl)
 
-A Julia package for reading [Common Data Format (CDF)](https://cdf.gsfc.nasa.gov/) files, widely used in space physics for storing multidimensional data arrays and metadata. See [CDFDatasets.jl](https://github.com/JuliaSpacePhysics/CDFDatasets.jl) for a high-level interface.
-
-**Installation**: at the Julia REPL, run `using Pkg; Pkg.add("CommonDataFormat")`
-
-**Documentation**: [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://juliaspacephysics.github.io/CommonDataFormat.jl/dev/)
-
-## Features
-
-- **Pure Julia implementation** - No external dependencies on CDF libraries
-- **Efficient data access** - Memory-mapped access for data and attributes, super fast decompression using [`LibDeflate`](https://github.com/jakobnissen/LibDeflate.jl)
-- **[DiskArrays.jl](https://github.com/JuliaIO/DiskArrays.jl) integration** - Lazy representation of data on hard disk with AbstractDiskArray interface
+A Julia package for reading [Common Data Format (CDF)](https://cdf.gsfc.nasa.gov/) files, widely used in space physics for storing multidimensional data arrays and metadata. See [CDFDatasets.jl](https://github.com/JuliaSpacePhysics/CDFDatasets.jl) for high-level interfaces.
 
 ## Quick Start
 
 ```julia
+using Pkg; Pkg.add("CommonDataFormat")
 using CommonDataFormat
 
 # Load a CDF file
@@ -38,6 +30,13 @@ println("Variables: ", keys(cdf))
 var = cdf["temperature"]
 ```
 
+## Features
+
+- **Pure Julia implementation** - No external dependencies on CDF libraries
+- **Efficient data access** - Memory-mapped access for data and attributes, super fast decompression using [`LibDeflate`](https://github.com/jakobnissen/LibDeflate.jl)
+- **[DiskArrays.jl](https://github.com/JuliaIO/DiskArrays.jl) integration** - Lazy representation of data on hard disk with AbstractDiskArray interface
+
+
 ## Elsewhere
 
 - [CDFpp](https://github.com/SciQLop/CDFpp): A modern C++ header only cdf library with Python bindings

diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
@@ -4,10 +4,8 @@ using Downloads
 
 const SUITE = BenchmarkGroup()
 
-# elb file is committed to the repo, so it ships with the package at any rev
 const ELX_FILE = joinpath(pkgdir(CommonDataFormat), "data", "elb_l2_epdef_20210914_v01.cdf")
 
-# mms file (50 MB) is gitignored; download once and cache across revisions
 function download_data(url, filename = basename(url))
     dir = joinpath(tempdir(), "CommonDataFormat_benchmark_data")
     mkpath(dir)

diff --git a/src/dataset.jl b/src/dataset.jl
@@ -1,14 +1,15 @@
-struct CDFDataset{CT, FST}
+struct CDFDataset{FST}
     filename::String
     cdr::CDR{FST}
     gdr::GDR{FST}
     buffer::Vector{UInt8}
+    compression::CompressionType
 end
 
 Base.parent(cdf::CDFDataset) = getfield(cdf, :buffer)
 GDR(cdf::CDFDataset) = getfield(cdf, :gdr)
 filename(cdf::CDFDataset) = getfield(cdf, :filename)
-recordsize_type(::CDFDataset{CT, RS}) where {CT, RS} = RS
+recordsize_type(::CDFDataset{RS}) where {RS} = RS
 
 """
     CDFDataset(filename)
@@ -22,21 +23,27 @@ cdf = CDFDataset("data.cdf")
 """
 function CDFDataset(filename)
     fname = String(filename)
-    return open(fname, "r") do io
+    # `open(f, name, mode) do` form: routes through varargs splatting (`_apply_iterate`) which `juliac --trim` can't resolve.
+    io = open(fname, "r")
+    try
         buffer = Mmap.mmap(io)
         magic_bytes = read_be(buffer, 1, UInt32)
         @assert validate_cdf_magic(magic_bytes)
+        return is_cdf_v3(magic_bytes) ? _load_dataset(fname, buffer, Int64) :
+            _load_dataset(fname, buffer, Int32)
+    finally
+        close(io)
+    end
+end
 
-        FieldSizeType = is_cdf_v3(magic_bytes) ? Int64 : Int32
-        compression = NoCompression
-        if is_compressed(read_be(buffer, 5, UInt32))
-            buffer, compression = decompress_bytes(buffer, FieldSizeType)
-        end
-        # Parse CDF header
-        cdr = CDR(buffer, 8, FieldSizeType)
-        gdr = GDR(buffer, Int(cdr.gdr_offset), FieldSizeType)
-        return CDFDataset{compression, FieldSizeType}(fname, cdr, gdr, buffer)
+function _load_dataset(fname, buffer, ::Type{FieldSizeType}) where {FieldSizeType}
+    compression = NoCompression
+    if is_compressed(read_be(buffer, 5, UInt32))
+        buffer, compression = decompress_bytes(buffer, FieldSizeType)
     end
+    cdr = CDR(buffer, 8, FieldSizeType)
+    gdr = GDR(buffer, Int(cdr.gdr_offset), FieldSizeType)
+    return CDFDataset{FieldSizeType}(fname, cdr, gdr, buffer, compression)
 end
 
 is_big_endian_encoding(cdf::CDFDataset) = is_big_endian_encoding(cdf.cdr.encoding)
@@ -45,23 +52,16 @@ is_compressed(magic_numbers::UInt32) = magic_numbers != 0x0000FFFF
 majority(cdf::CDFDataset) = majority(cdf.cdr)
 
 # Convenience accessors for the dataset with lazy loading
-@inline function Base.getproperty(cdf::CDFDataset{CT, FST}, name::Symbol) where {CT, FST}
+@inline function Base.getproperty(cdf::CDFDataset, name::Symbol)
+    # Real fields FIRST so internal accesses (`cdf.cdr`, `cdf.gdr`, …) short-circuit and
+    # never traverse the lazy `attrib` branches below.
     name in fieldnames(CDFDataset) && return getfield(cdf, name)
-    if name === :version
-        return version(cdf.cdr)
-    elseif name === :majority
-        return majority(cdf)
-    elseif name === :compression
-        return CT
-    elseif name === :adr
-        return ADR(parent(cdf), GDR(cdf).ADRhead, recordsize_type(cdf))
-    elseif name === :attrib
-        return attrib(cdf)
-    elseif name === :vattrib
-        return attrib(cdf; predicate = !is_global)
-    else
-        throw(ArgumentError("Unknown property $name"))
-    end
+    name === :version && return version(getfield(cdf, :cdr))
+    name === :majority && return majority(cdf)
+    name === :adr && return ADR(parent(cdf), GDR(cdf).ADRhead, recordsize_type(cdf))
+    name === :attrib && return attrib(cdf)
+    name === :vattrib && return attrib(cdf; predicate = !is_global)
+    throw(ArgumentError("Unknown property $name"))
 end
 
 function find_vdr(cdf::CDFDataset, var_name::String)
@@ -134,5 +134,5 @@ function Base.show(io::IO, m::MIME"text/plain", cdf::CDFDataset)
     return
 end
 
-OffsetsIterator(cdf::CDFDataset) = 
+OffsetsIterator(cdf::CDFDataset) =
     OffsetsIterator{recordsize_type(cdf)}(cdf.buffer, cdf.gdr.ADRhead)
diff --git a/src/decompress.jl b/src/decompress.jl
@@ -2,7 +2,7 @@ include("decompress/rle.jl")
 include("decompress/gzip.jl")
 
 
-function decompress_bytes(buffer, RecordSizeType)
+function decompress_bytes(buffer, ::Type{RecordSizeType}) where {RecordSizeType}
     ccr = CCR(buffer, 8, RecordSizeType)
     cpr = CPR(buffer, Int(ccr.cpr_offset), RecordSizeType)
     compression = CompressionType(cpr.compression_type)

diff --git a/src/loading/attribute.jl b/src/loading/attribute.jl
@@ -2,7 +2,7 @@
 # Handles loading of ADR (Attribute Descriptor Record) and AEDR (Attribute Entry Descriptor Record) chains
 
 # Load all attribute entries for a given attribute from its AEDRs.
-@inline function load_attribute_entries(buffer::Vector{UInt8}, adr, RecordSizeType, needs_byte_swap)
+@inline function load_attribute_entries(buffer::Vector{UInt8}, adr, ::Type{RecordSizeType}, needs_byte_swap) where {RecordSizeType}
     head = max(adr.AgrEDRhead, adr.AzEDRhead)
     offsets = OffsetsIterator{RecordSizeType}(buffer, head)
     return map(offsets) do offset
@@ -122,7 +122,7 @@ function _get_attributes(name, value, cdf)
     return value
 end
 
-@inline function _search_aedr_entries(source, aedr_head, RecordSizeType, needs_byte_swap, target_varnum)
+@inline function _search_aedr_entries(source, aedr_head, ::Type{RecordSizeType}, needs_byte_swap, target_varnum) where {RecordSizeType}
     aedr_head == 0 && return nothing
     offset = Int(aedr_head)
     _num_offset = 13 + 2 * sizeof(RecordSizeType)

diff --git a/src/loading/variable.jl b/src/loading/variable.jl
@@ -48,6 +48,48 @@ function variable(cdf::CDFDataset, name)
     )
 end
 
+"""
+    read!(ds::CDFDataset, name, dest::AbstractArray{T, N}) -> dest
+
+Read the full contents of variable `name` into the preallocated `dest`.
+
+Statically-typed entry point: `T` and `N` come from `dest` instead of the file, so —
+unlike `ds[name]`, whose type is only known at runtime — the call chain is resolvable
+at compile time and survives `juliac --trim`.
+"""
+function Base.read!(ds::CDFDataset, name::String, dest::AbstractArray{T, N}) where {T, N}
+    vdr = find_vdr(ds, name)
+    isnothing(vdr) && throw(KeyError(name))
+    return _read_full!(dest, ds, name, vdr)
+end
+
+"""
+    read(ds::CDFDataset, name, ::Type{Array{T, N}}) -> Array{T, N}
+
+Allocating variant of [`read!`](@ref): read the full contents of variable `name` into a
+freshly allocated `Array{T, N}`.
+"""
+function Base.read(ds::CDFDataset, name::String, ::Type{Array{T, N}}) where {T, N}
+    vdr = find_vdr(ds, name)
+    isnothing(vdr) && throw(KeyError(name))
+    dims = (map(Int, record_sizes(vdr, Val(N - 1)))..., Int(vdr.max_rec) + 1)
+    return _read_full!(Array{T, N}(undef, dims), ds, name, vdr)
+end
+
+function _read_full!(dest::AbstractArray{T, N}, ds, name, vdr) where {T, N}
+    Base.require_one_based_indexing(dest)
+    Tfile = julia_type(vdr.data_type, vdr.num_elems)
+    T === Tfile || throw(ArgumentError("element type mismatch for \"$name\": file has $Tfile, destination has $T"))
+    dims = (map(Int, record_sizes(vdr, Val(N - 1)))..., Int(vdr.max_rec) + 1)
+    size(dest) == dims || throw(DimensionMismatch("variable \"$name\" has size $dims, destination has size $(size(dest))"))
+    var = CDFVariable{T, N, typeof(vdr), typeof(ds)}(name, vdr, ds, dims)
+    DiskArrays.readblock!(var, dest, axes(dest)...)
+    return dest
+end
+
+@inline _record_view(A::AbstractArray{<:Any, M}, r) where {M} =
+    view(A, ntuple(_ -> Colon(), M - 1)..., r)
+
 function DiskArrays.readblock!(var::CDFVariable{T, N}, dest::AbstractArray{T}, ranges::Vararg{AbstractUnitRange{<:Integer}, N}; nbuffers = nthreads()) where {T, N}
     N > 0 && @boundscheck checkbounds(var, ranges...)
     isempty(dest) && return dest
@@ -100,7 +142,7 @@ function DiskArrays.readblock!(var::CDFVariable{T, N}, dest::AbstractArray{T}, r
             if is_full_record && entry.first >= first_rec && entry.last <= last_rec
                 # full entry
                 dest_range = dst_src_ranges(first_rec, last_rec, entry)[1]
-                dest_view = selectdim(dest, N, dest_range)
+                dest_view = _record_view(dest, dest_range)
                 total_elems = record_size * length(entry)
                 decompressor = take!(decompressors())
                 load_cvvr_data!(dest_view, 1, buffer, entry.offset, total_elems, RecordSizeType, compression; decompressor)
@@ -109,7 +151,7 @@ function DiskArrays.readblock!(var::CDFVariable{T, N}, dest::AbstractArray{T}, r
             else
                 # partial entry
                 (dest_range, local_range) = dst_src_ranges(first_rec, last_rec, entry)
-                dest_view = selectdim(dest, N, dest_range)
+                dest_view = _record_view(dest, dest_range)
                 n_records = length(entry)
                 total_elems = record_size * n_records
                 chunk = Vector{T}(undef, total_elems)

diff --git a/src/parsing.jl b/src/parsing.jl
@@ -21,6 +21,11 @@ end
     return ntuple(j -> read_be(v, i + (j - 1) * S, T), n)
 end
 
+@inline function read_be(v::Vector{UInt8}, i, ::Val{M}, T) where {M}
+    S = sizeof(T)
+    return ntuple(j -> read_be(v, i + (j - 1) * S, T), Val(M))
+end
+
 @inline function read_be_i(v::Vector{UInt8}, i, T::Base.DataType)
     return read_be(v, i, T), i + _sizeof(T)
 end
@@ -32,58 +37,20 @@ end
 
 const name_bytes_buffer = Vector{UInt8}(undef, 256)
 
-"""
-    @read_be_fields buffer pos T1 T2 ...
-
-Unrolls sequential big-endian reads starting at `pos` within `buffer`.
-Returns a tuple of the parsed values and the updated position, mirroring
-`read_be_i` but without the runtime `ntuple`/offset bookkeeping.
-
-# Example
-
-```julia
-values, next = @read_be_fields buf pos UInt32 Int16
-```
-"""
-macro read_be_fields(buffer, pos, Ts...)
-    isempty(Ts) && error("@read_be_fields requires at least one field type")
-
-    types = flatten_field_types(__module__, Ts)
-    buf = esc(buffer)
-    start = esc(pos)
-    pos_sym = gensym(:pos)
-    value_syms = [gensym(:field) for _ in types]
-
-    stmts = Any[:(local $pos_sym = $start)]
-    for (sym, T) in zip(value_syms, types)
-        Tesc = esc(T)
-        push!(stmts, :(local $sym = read_be($buf, $pos_sym, $Tesc)))
-        push!(stmts, :($pos_sym += _sizeof($Tesc)))
-    end
-
-    tuple_expr = Expr(:tuple, value_syms...)
-    push!(stmts, :(($tuple_expr, $pos_sym)))
-
-    return Expr(:block, stmts...)
-end
-
 # Optimized version using loop unrolling for better performance
 @generated function read_be_fields(buffer::Vector{UInt8}, pos::Integer, ::Type{SType}, ::Val{indxs}) where {SType, indxs}
     exprs = Expr[]
     value_syms = [gensym(:field) for _ in 1:length(indxs)]
     pos_sym = gensym(:pos)
 
-    # Initialize position
     push!(exprs, :(local $pos_sym = pos))
 
-    # Read each field
     for (i, idx) in enumerate(indxs)
         T = fieldtype(SType, idx)
         push!(exprs, :(local $(value_syms[i]) = read_be(buffer, $pos_sym, $T)))
         push!(exprs, :($pos_sym += _sizeof($T)))
     end
 
-    # Return tuple of values and final position
     tuple_expr = Expr(:tuple, value_syms...)
     push!(exprs, :(($tuple_expr, $pos_sym)))
 

diff --git a/src/precompile.jl b/src/precompile.jl
@@ -1,6 +1,6 @@
-precompile(Array, (CDFVariable{TT2000, 1, VDR{Int64}, CDFDataset{NoCompression, Int64}},))
+precompile(Array, (CDFVariable{TT2000, 1, VDR{Int64}, CDFDataset{Int64}},))
 for T in (Float32, Float64), i in 1:3
-    precompile(Array, (CDFVariable{T, i, VDR{Int64}, CDFDataset{NoCompression, Int64}},))
+    precompile(Array, (CDFVariable{T, i, VDR{Int64}, CDFDataset{Int64}},))
 end
 
 PrecompileTools.@setup_workload begin

diff --git a/src/records/adr.jl b/src/records/adr.jl
@@ -29,7 +29,7 @@ is_global(buffer, offset, ::Type{Int64}) = read_be(buffer, offset + 29, Int32) =
 
 Load an Attribute Descriptor Record from the buffer at the specified position.
 """
-@inline function ADR(buffer::Vector{UInt8}, offset, RecordSizeType)
+@inline function ADR(buffer::Vector{UInt8}, offset, ::Type{RecordSizeType}) where {RecordSizeType}
     pos = check_record_type(4, buffer, offset, RecordSizeType)
     # Read ADR fields
     fields, pos = read_be_fields(buffer, pos, ADR{RecordSizeType, String}, Val(1:11))

diff --git a/src/records/aedr.jl b/src/records/aedr.jl
@@ -21,7 +21,7 @@ struct AEDR{FST, A}
     Value::A            # This consists of the number of elements (specified by the NumElems field) of the data type (specified by the DataType field). This can be thought of as a 1-dimensional array of values (stored contiguously). The size of this field is the product of the number of elements and the size in bytes of each element.
 end
 
-function load_aedr_data(buffer::Vector{UInt8}, offset, RecordSizeType, needs_byte_swap)
+function load_aedr_data(buffer::Vector{UInt8}, offset, ::Type{RecordSizeType}, needs_byte_swap) where {RecordSizeType}
     _datatype_offset = 9 + 2 * sizeof(RecordSizeType)
     _numelems_offset = 17 + 2 * sizeof(RecordSizeType)
     _data_offset = 41 + 2 * sizeof(RecordSizeType)

diff --git a/src/records/ccr.jl b/src/records/ccr.jl
@@ -1,13 +1,12 @@
 struct CCR <: Record
-    header::Header
     cpr_offset::UInt64
     uncompressed_size::UInt64 # uSize Size of the CDF in its uncompressed form. This byte count does NOT include the 8-byte magic numbers, and 16-byte checksum if it exists.
     rfu_a::RInt32
     data_offset::Int
     data_length::Int
 end
 
-@inline function CCR(buffer::Vector{UInt8}, offset, RecordSizeType)
+@inline function CCR(buffer::Vector{UInt8}, offset, ::Type{RecordSizeType}) where {RecordSizeType}
     pos = offset + 1
     header = Header(buffer, pos, RecordSizeType)
     @assert header.record_type == 10 "Invalid CCR record type"
@@ -18,7 +17,7 @@ end
     record_end = offset + header.record_size
     data_length = record_end - data_offset
     @assert data_length >= 0 "Invalid CCR data length"
-    return CCR(header, UInt64(cpr_offset), UInt64(uncompressed_size), rfu_a, data_offset, data_length)
+    return CCR(UInt64(cpr_offset), UInt64(uncompressed_size), rfu_a, data_offset, data_length)
 end
 
 @inline function data_view(ccr::CCR, buffer::Vector{UInt8})

diff --git a/src/records/cdr.jl b/src/records/cdr.jl
@@ -27,7 +27,7 @@ is_cdf_v3(cdr::CDR) = cdr.version == 3
 Load a CDF Descriptor Record from the IO stream at the specified offset.
 This follows the CDF specification for CDR record structure.
 """
-@inline function CDR(buffer::Vector{UInt8}, offset, FieldSizeT)
+@inline function CDR(buffer::Vector{UInt8}, offset, ::Type{FieldSizeT}) where {FieldSizeT}
     pos = check_record_type(1, buffer, offset, FieldSizeT)
     # Read remaining CDR fields in order as per CDF specification
     fields, pos = read_be_fields(buffer, pos, CDR{FieldSizeT}, Val(1:9))

diff --git a/src/records/cpr.jl b/src/records/cpr.jl
@@ -7,10 +7,9 @@ struct CPR <: Record
     # parameters::Tuple{Vararg{Int32}}
 end
 
-@inline function CPR(buffer::Vector{UInt8}, offset, FieldSizeT)
+@inline function CPR(buffer::Vector{UInt8}, offset, ::Type{FieldSizeT}) where {FieldSizeT}
     pos = check_record_type(11, buffer, offset, FieldSizeT)
-    fields, pos = @read_be_fields(buffer, pos, fieldtypes(CPR)...)
-    # parameter_count, pos = read_be_i(buffer, pos, Int32)
+    fields, pos = read_be_fields(buffer, pos, CPR, Val(1:3))
     # parameters = read_be(buffer, pos, parameter_count, Int32)
     return CPR(fields...)
 end