Skip to content

Interpolate Over Large HDF5 Arrays

This example demonstrates how to interpolate over large HDF5 arrays using DiskArrays.jl. This is useful when the field data is too large to fit in memory. The implementation is based on this Discourse post.

julia
using HDF5, DiskArrays, StructArrays, StaticArrays
import DiskArrays: eachchunk, haschunks, readblock!, writeblock!, GridChunks, Chunked,
    Unchunked
using Interpolations

Implement HDF5DiskArray

First, we define a wrapper around HDF5 Dataset that implements the DiskArrays interface.

julia
struct HDF5DiskArray{T, N, CS} <: AbstractDiskArray{T, N}
    ds::HDF5.Dataset
    cs::CS
end

Base.size(x::HDF5DiskArray) = size(x.ds)
haschunks(x::HDF5DiskArray{<:Any, <:Any, Nothing}) = Unchunked()
haschunks(x::HDF5DiskArray) = Chunked()
eachchunk(x::HDF5DiskArray{<:Any, <:Any, <:GridChunks}) = x.cs
readblock!(x::HDF5DiskArray, aout, r::AbstractUnitRange...) = aout .= x.ds[r...]
writeblock!(x::HDF5DiskArray, v, r::AbstractUnitRange...) = x.ds[r...] = v

function HDF5DiskArray(ds::HDF5.Dataset)
    chunks = try
        HDF5.get_chunk(ds)
    catch
        nothing
    end
    cs = isnothing(chunks) ? nothing : GridChunks(size(ds), chunks)
    return HDF5DiskArray{eltype(ds), ndims(ds), typeof(cs)}(ds, cs)
end
Main.HDF5DiskArray

Create Example Data

We create a dummy HDF5 file with some field data.

julia
filename = "example_field.h5"

h5open(filename, "w") do fid
    g = create_group(fid, "mygroup")
    # Create a dataset with chunking enabled
    dset_a = create_dataset(g, "A", Float32, (10, 10, 10), chunk = (5, 5, 5))
    values_a = [Float32(i + j + k) for i in 1:10, j in 1:10, k in 1:10]
    write(dset_a, values_a)
    println("Wrote dataset A")

    # Adding Vector Field B (Bx, By, Bz)

    # Generate data for the vector components
    values_bx = [Float32(sin(i) * j) for i in 1:10, j in 1:10, k in 1:10]
    values_by = [Float32(cos(j) * k) for i in 1:10, j in 1:10, k in 1:10]
    values_bz = [Float32(i * k / 10) for i in 1:10, j in 1:10, k in 1:10]

    # Create datasets with the same chunking properties
    dset_bx = create_dataset(g, "Bx", Float32, (10, 10, 10), chunk = (5, 5, 5))
    dset_by = create_dataset(g, "By", Float32, (10, 10, 10), chunk = (5, 5, 5))
    dset_bz = create_dataset(g, "Bz", Float32, (10, 10, 10), chunk = (5, 5, 5))

    # Write the data
    write(dset_bx, values_bx)
    write(dset_by, values_by)
    write(dset_bz, values_bz)

    println("Wrote datasets Bx, By, Bz")
end

println("Successfully created $filename")
Wrote dataset A
Wrote datasets Bx, By, Bz
Successfully created example_field.h5

Interpolation Function

We define a function itp to query the field at a single location x.

julia
# Open the file and wrap the dataset
h5open(filename, "r") do fid
    da = HDF5DiskArray(fid["mygroup/A"]) |> DiskArrays.cache

    itp = extrapolate(
        interpolate(da, BSpline(Linear(Periodic(OnCell())))), Periodic(OnCell())
    )

    # Evaluate at a point
    loc_int = (5.0, 5.0, 5.0)
    println("Value at $loc_int: ", itp(loc_int...))

    loc_float = (5.5, 5.5, 5.5)
    println("Value at $loc_float: ", itp(loc_float...))

    loc_out = (-0.5, 1.0, 1.0)
    val_periodic = itp(loc_out...)
    println("Value at $loc_out (Periodic): ", val_periodic)
    # Check correctness of Periodic
    # Note that we assume cell center values. -0.5 wraps to 9.5 (since period is 10).
    # Value at 9.5, 1.0, 1.0. Data is i+j+k.
    println("Expected Periodic: ", 9.5 + 1 + 1)

    # vector field
    Bx = HDF5DiskArray(fid["mygroup/Bx"]) |> DiskArrays.cache
    By = HDF5DiskArray(fid["mygroup/By"]) |> DiskArrays.cache
    Bz = HDF5DiskArray(fid["mygroup/Bz"]) |> DiskArrays.cache
    B = StructArray{SVector{3, eltype(Bx)}}((Bx, By, Bz))
    itp = extrapolate(
        interpolate(B, BSpline(Linear(Periodic(OnCell())))), Periodic(OnCell())
    )
    println("Value at $loc_out (Periodic): ", itp(loc_out...))
end
Value at (5.0, 5.0, 5.0): 15.0
Value at (5.5, 5.5, 5.5): 16.5
Value at (-0.5, 1.0, 1.0) (Periodic): 11.5
Expected Periodic: 11.5
Value at (-0.5, 1.0, 1.0) (Periodic): [-0.06595131754875183, 0.5403022766113281, 0.949999988079071]

Cleanup

Remove the temporary file.

julia
rm(filename)