Interpolate Over Large HDF5 Arrays
This example demonstrates how to interpolate over large HDF5 arrays using DiskArrays.jl. This is useful when the field data is too large to fit in memory. The implementation is based on this Discourse post.
julia
using HDF5, DiskArrays
import DiskArrays: eachchunk, haschunks, readblock!, writeblock!, GridChunks, Chunked,
Unchunked
using InterpolationsImplement HDF5DiskArray
First, we define a wrapper around HDF5 Dataset that implements the DiskArrays interface.
julia
struct HDF5DiskArray{T, N, CS} <: AbstractDiskArray{T, N}
ds::HDF5.Dataset
cs::CS
end
Base.size(x::HDF5DiskArray) = size(x.ds)
haschunks(x::HDF5DiskArray{<:Any, <:Any, Nothing}) = Unchunked()
haschunks(x::HDF5DiskArray) = Chunked()
eachchunk(x::HDF5DiskArray{<:Any, <:Any, <:GridChunks}) = x.cs
readblock!(x::HDF5DiskArray, aout, r::AbstractUnitRange...) = aout .= x.ds[r...]
writeblock!(x::HDF5DiskArray, v, r::AbstractUnitRange...) = x.ds[r...] = v
function HDF5DiskArray(ds::HDF5.Dataset)
chunks = try
HDF5.get_chunk(ds)
catch
nothing
end
cs = isnothing(chunks) ? nothing : GridChunks(size(ds), chunks)
HDF5DiskArray{eltype(ds), ndims(ds), typeof(cs)}(ds, cs)
endMain.HDF5DiskArrayCreate Example Data
We create a dummy HDF5 file with some field data.
julia
filename = "example_field.h5"
h5open(filename, "w") do fid
g = create_group(fid, "mygroup")
# Create a dataset with chunking enabled
dset = create_dataset(g, "myvector", Float32, (10, 10, 10), chunk = (5, 5, 5))
values = [Float32(i + j + k) for i in 1:10, j in 1:10, k in 1:10]
write(dset, values)
endInterpolation Function
We define a function itp to query the field at a single location x.
Open the file and wrap the dataset
@example
h5open(filename, "r") do fid
da = HDF5DiskArray(fid["mygroup/myvector"])
cached = DiskArrays.cache(da)
itp = extrapolate(
interpolate(cached, BSpline(Linear(Periodic(OnCell())))), Periodic(OnCell()))Evaluate at a point
julia
loc_int = (5.0, 5.0, 5.0)
println("Value at $loc_int: ", itp(loc_int...))
loc_float = (5.5, 5.5, 5.5)
println("Value at $loc_float: ", itp(loc_float...))
loc_out = (-0.5, 1.0, 1.0)
val_periodic = itp(loc_out...)
println("Value at $loc_out (Periodic): ", val_periodic)
# Check correctness of Periodic
# Note that we assume cell center values. -0.5 wraps to 9.5 (since period is 10).
# Value at 9.5, 1.0, 1.0. Data is i+j+k.
println("Expected Periodic: ", 9.5 + 1 + 1)
endCleanup
Remove the temporary file.
julia
rm(filename)