Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
[![Build Status](https://github.com/JuliaSpacePhysics/CommonDataFormat.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/JuliaSpacePhysics/CommonDataFormat.jl/actions/workflows/CI.yml?query=branch%3Amain)
[![Coverage](https://codecov.io/gh/JuliaSpacePhysics/CommonDataFormat.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/JuliaSpacePhysics/CommonDataFormat.jl)

A Julia package for reading [Common Data Format (CDF)](https://cdf.gsfc.nasa.gov/) files, widely used in space physics for storing multidimensional data arrays and metadata. See [CDFDatasets.jl](https://github.com/JuliaSpacePhysics/CDFDatasets.jl) for high-level interfaces.
A pure Julia implementation for reading [Common Data Format (CDF)](https://cdf.gsfc.nasa.gov/) files, widely used in space physics for storing multidimensional data arrays and metadata. See [CDFDatasets.jl](https://github.com/JuliaSpacePhysics/CDFDatasets.jl) for high-level interfaces.

## Quick Start

Expand All @@ -32,7 +32,6 @@ var = cdf["temperature"]

## Features

- **Pure Julia implementation** - No external dependencies on CDF libraries
- **Efficient data access** - Memory-mapped access for data and attributes, super fast decompression using [`LibDeflate`](https://github.com/jakobnissen/LibDeflate.jl)
- **[DiskArrays.jl](https://github.com/JuliaIO/DiskArrays.jl) integration** - Lazy representation of data on hard disk with AbstractDiskArray interface

Expand Down
File renamed without changes.
7 changes: 0 additions & 7 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,3 @@ perf:
@time var2 = ds["elb_pef_hs_epa_spec"]
@time Array(var2)
@time Array(ds["elb_pef_fs_time"])

snoop:
#!/usr/bin/env -S julia --threads=auto --project=. -i
using SnoopCompileCore
invs = @snoop_invalidations using CommonDataFormat
using SnoopCompile, AbstractTrees
trees = invalidation_trees(invs)
26 changes: 10 additions & 16 deletions src/enums.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,24 +45,18 @@ Base.:(==)(x::T, y::RecordType) where {T <: Integer} = x == T(y)
Base.:(==)(x::DataType, y::T) where {T <: Integer} = T(x) == y
Base.:(==)(x::T, y::DataType) where {T <: Integer} = x == T(y)

const type_map = Dict(
CDF_INT1 => Int8,
CDF_INT2 => Int16,
CDF_INT4 => Int32,
CDF_INT8 => Int64,
CDF_UINT1 => UInt8,
CDF_UINT2 => UInt16,
CDF_UINT4 => UInt32,
CDF_REAL4 => Float32,
CDF_REAL8 => Float64,
CDF_BYTE => Int8,
CDF_FLOAT => Float32,
CDF_DOUBLE => Float64,
# code → eltype for fixed-size types ordered roughly by frequency
const CODE_TYPE_PAIRS = (
(21, Float32), (22, Float64), (44, Float32), (45, Float64),
(33, TT2000), (31, Epoch), (32, Epoch16),
(1, Int8), (2, Int16), (4, Int32), (8, Int64),
(11, UInt8), (12, UInt16), (14, UInt32), (41, Int8),
)

const type_map = Dict{DataType, Type}(
Dict(DataType(c) => T for (c, T) in CODE_TYPE_PAIRS)...,
CDF_CHAR => UInt8,
CDF_UCHAR => UInt8,
CDF_EPOCH => Epoch,
CDF_EPOCH16 => Epoch16,
CDF_TIME_TT2000 => TT2000
)

function julia_type(cdf_type, num_elems)
Expand Down
37 changes: 28 additions & 9 deletions src/loading/variable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,41 @@ end
function variable(cdf::CDFDataset, name)
vdr = find_vdr(cdf, name)
isnothing(vdr) && throw(KeyError(name))
T = julia_type(vdr.data_type, vdr.num_elems)
dims = (record_sizes(vdr)..., vdr.max_rec + 1)
N = vdr isa VDR ? vdr.num_dims + 1 : length(dims)
return CDFVariable{T, N, typeof(vdr), typeof(cdf)}(
name, vdr, cdf, dims
return _variable(cdf, name, vdr)
end

# Branch over dimension count so each leaf builds dims tuple at compile time statically
function _variable(cdf, name, vdr)
M = num_record_dims(vdr)
return Base.Cartesian.@nif 12 d -> (M == d - 1) d -> (
d == 12 ? throw(ArgumentError("variable has $M dimensions; the CDF format allows at most 10")) :
_variable(cdf, name, vdr, Val(d - 1))
)
end

function _variable(cdf, name, vdr, ::Val{M}) where {M}
dims = (map(Int, record_sizes(vdr, Val(M)))..., Int(vdr.max_rec) + 1)
code = Int(vdr.data_type)
if code == 51 || code == 52 # CHAR/UCHAR: eltype depends on runtime num_elems
T = StaticString{Int(vdr.num_elems), UInt8}
return CDFVariable{T, M + 1, typeof(vdr), typeof(cdf)}(name, vdr, cdf, dims)
end
# Branch to static constructor per element type
return Base.Cartesian.@nif(
16,
d -> code == CODE_TYPE_PAIRS[d][1],
d -> _construct(cdf, name, vdr, dims, CODE_TYPE_PAIRS[d][2]),
d -> throw(ArgumentError("unsupported CDF data type $code"))
)
end

@inline _construct(cdf, name, vdr, dims::NTuple{N, Int}, ::Type{T}) where {N, T} =
CDFVariable{T, N, typeof(vdr), typeof(cdf)}(name, vdr, cdf, dims)

"""
read!(ds::CDFDataset, name, dest::AbstractArray{T, N}) -> dest

Read the full contents of variable `name` into the preallocated `dest`.

Statically-typed entry point: `T` and `N` come from `dest` instead of the file, so —
unlike `ds[name]`, whose type is only known at runtime — the call chain is resolvable
at compile time and survives `juliac --trim`.
"""
function Base.read!(ds::CDFDataset, name::String, dest::AbstractArray{T, N}) where {T, N}
vdr = find_vdr(ds, name)
Expand Down
10 changes: 10 additions & 0 deletions src/records/vdr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,16 @@ function record_sizes(vdr::rVDR, ::Val{M}) where {M}
return ntuple(i -> sizes[i], Val(M))
end

num_record_dims(vdr::VDR) = Int(vdr.num_dims)
function num_record_dims(vdr::rVDR)
n = 0
for i in 1:Int(vdr.gdr.r_num_dims)
n += read_be(vdr.buffer, vdr.pos + (i - 1) * 4, Int32) != 0
end
return n
end


function Base.size(vdr::AbstractVDR)
records = vdr.max_rec + 1
dims = (record_sizes(vdr)..., records)
Expand Down
49 changes: 20 additions & 29 deletions src/staticstring.jl
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
# https://github.com/mkitti/StaticStrings.jl
# https://github.com/JuliaPy/PythonCall.jl/blob/main/src/Utils/Utils.jl
using Base: between

struct StaticString{N, T} <: AbstractString
codeunits::NTuple{N, T}
StaticString{N, T}(codeunits::NTuple{N, T}) where {N, T} = new{N, T}(codeunits)
end

function Base.iterate(x::StaticString{N, UInt8}, i::Int = 1) where {N}
i > N && return
i > ncodeunits(x) && return
cs = x.codeunits
c = @inbounds cs[i]
if all(iszero, (cs[j] for j in i:N))
return
elseif (c & 0x80) == 0x00
if (c & 0x80) == 0x00
return (reinterpret(Char, UInt32(c) << 24), i + 1)
elseif (c & 0x40) == 0x00
nothing
Expand Down Expand Up @@ -56,12 +53,25 @@ function Base.iterate(x::StaticString{N, UInt8}, i::Int = 1) where {N}
throw(StringIndexError(x, i))
end

function Base.String(x::StaticString{N, T}) where {N, T}
b = Base.StringVector(N)
return String(b .= x.codeunits)
function Base.String(x::StaticString)
n = ncodeunits(x)
b = Base.StringVector(n)
@inbounds for i in 1:n
b[i] = x.codeunits[i]
end
return String(b)
end

@inline Base.ncodeunits(::StaticString{N}) where {N} = N
# CDF CHAR values are fixed-width null-padded; the string ends at the trailing-null run
# so length/collect/String agree with iterate truncating there.
@inline function Base.ncodeunits(s::StaticString{N}) where {N}
cs = s.codeunits
n = N
while n > 0 && iszero(@inbounds cs[n])
n -= 1
end
return n
end
Base.codeunit(::StaticString{N, T}) where {N, T} = T
Base.@propagate_inbounds Base.codeunit(s::StaticString, i::Int) = s.codeunits[i]

Expand All @@ -71,23 +81,4 @@ function StaticString(cu::Base.CodeUnits{T}) where {T}
end
StaticString(s::AbstractString) = StaticString(codeunits(s))

Base.isvalid(s::StaticString, i::Int) = checkbounds(Bool, s, i) && thisind(s, i) == i
Base.thisind(s::StaticString, i::Int) = _thisind_str(s, i)

@inline function _thisind_str(s, i::Int)
i == 0 && return 0
n = ncodeunits(s)
i == n + 1 && return i
@boundscheck between(i, 1, n) || throw(BoundsError(s, i))
@inbounds b = codeunit(s, i)
(b & 0xc0 == 0x80) & (i - 1 > 0) || return i
@inbounds b = codeunit(s, i - 1)
between(b, 0b11000000, 0b11110111) && return i - 1
(b & 0xc0 == 0x80) & (i - 2 > 0) || return i
@inbounds b = codeunit(s, i - 2)
between(b, 0b11100000, 0b11110111) && return i - 2
(b & 0xc0 == 0x80) & (i - 3 > 0) || return i
@inbounds b = codeunit(s, i - 3)
between(b, 0b11110000, 0b11110111) && return i - 3
return i
end
Base.isvalid(s::StaticString, i::Int) = checkbounds(Bool, s, i) && Base._thisind_str(s, i) == i
3 changes: 0 additions & 3 deletions test/debug.jl

This file was deleted.

4 changes: 3 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ include("epochs_test.jl")
include("comprehensive_test.jl")
include("cdf2_test.jl")
include("CommonDataModelExt_test.jl")
include("staticstring.jl")
@testset "StaticString" begin
include("staticstring.jl")
end

@testset "Aqua" begin
using Aqua
Expand Down
22 changes: 20 additions & 2 deletions test/staticstring.jl
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
using CommonDataFormat: StaticString
using Test

@testset "StaticString" begin
@testset "Basic operations" begin
s = "Hello, World!"
ss = StaticString(s)

StaticString(codeunits(s))
@test StaticString(codeunits(s)) == s
@test ss == s
@test String(ss) == s
@test !isempty(ss)
Expand All @@ -15,3 +15,21 @@ using Test

@test codeunit(ss) == UInt8
end

@testset "Null padding and UTF-8" begin
# null padding: iterate/length/collect/String must agree
pad = StaticString{8, UInt8}((UInt8('a'), UInt8('b'), zeros(UInt8, 6)...))
@test ncodeunits(pad) == 2
@test length(pad) == 2
@test collect(pad) == ['a', 'b']
@test String(pad) == "ab"
@test pad == "ab"
@test isempty(StaticString{4, UInt8}(ntuple(_ -> 0x00, 4)))

# multi-byte UTF-8 indexing
s = StaticString("héllo")
@test collect(s) == collect("héllo")
@test thisind(s, 3) == 2
@test isvalid(s, 2) && !isvalid(s, 3)
@test length(s) == 5 && ncodeunits(s) == 6
end
Loading