Interpolate Over Large HDF5 Arrays

This example demonstrates how to interpolate over large HDF5 arrays using DiskArrays.jl. This is useful when the field data is too large to fit in memory. The implementation is based on this Discourse post.

julia

using HDF5, DiskArrays, StructArrays, StaticArrays
import DiskArrays: eachchunk, haschunks, readblock!, writeblock!, GridChunks, Chunked,
                   Unchunked
using Interpolations

Implement HDF5DiskArray

First, we define a wrapper around HDF5 Dataset that implements the DiskArrays interface.

julia

struct HDF5DiskArray{T, N, CS} <: AbstractDiskArray{T, N}
   ds::HDF5.Dataset
   cs::CS
end

Base.size(x::HDF5DiskArray) = size(x.ds)
haschunks(x::HDF5DiskArray{<:Any, <:Any, Nothing}) = Unchunked()
haschunks(x::HDF5DiskArray) = Chunked()
eachchunk(x::HDF5DiskArray{<:Any, <:Any, <:GridChunks}) = x.cs
readblock!(x::HDF5DiskArray, aout, r::AbstractUnitRange...) = aout .= x.ds[r...]
writeblock!(x::HDF5DiskArray, v, r::AbstractUnitRange...) = x.ds[r...] = v

function HDF5DiskArray(ds::HDF5.Dataset)
   chunks = try
      HDF5.get_chunk(ds)
   catch
      nothing
   end
   cs = isnothing(chunks) ? nothing : GridChunks(size(ds), chunks)
   HDF5DiskArray{eltype(ds), ndims(ds), typeof(cs)}(ds, cs)
end

Main.HDF5DiskArray

Create Example Data

We create a dummy HDF5 file with some field data.

julia

filename = "example_field.h5"

h5open(filename, "w") do fid
   g = create_group(fid, "mygroup")
   # Create a dataset with chunking enabled
   dset_a = create_dataset(g, "A", Float32, (10, 10, 10), chunk = (5, 5, 5))
   values_a = [Float32(i + j + k) for i in 1:10, j in 1:10, k in 1:10]
   write(dset_a, values_a)
   println("Wrote dataset A")

   # Adding Vector Field B (Bx, By, Bz)

   # Generate data for the vector components
   values_bx = [Float32(sin(i) * j) for i in 1:10, j in 1:10, k in 1:10]
   values_by = [Float32(cos(j) * k) for i in 1:10, j in 1:10, k in 1:10]
   values_bz = [Float32(i * k / 10) for i in 1:10, j in 1:10, k in 1:10]

   # Create datasets with the same chunking properties
   dset_bx = create_dataset(g, "Bx", Float32, (10, 10, 10), chunk = (5, 5, 5))
   dset_by = create_dataset(g, "By", Float32, (10, 10, 10), chunk = (5, 5, 5))
   dset_bz = create_dataset(g, "Bz", Float32, (10, 10, 10), chunk = (5, 5, 5))

   # Write the data
   write(dset_bx, values_bx)
   write(dset_by, values_by)
   write(dset_bz, values_bz)

   println("Wrote datasets Bx, By, Bz")
end

println("Successfully created $filename")

Wrote dataset A
Wrote datasets Bx, By, Bz
Successfully created example_field.h5

Interpolation Function

We define a function itp to query the field at a single location x.

julia

# Open the file and wrap the dataset
h5open(filename, "r") do fid
   da = HDF5DiskArray(fid["mygroup/A"]) |> DiskArrays.cache

   itp = extrapolate(
      interpolate(da, BSpline(Linear(Periodic(OnCell())))), Periodic(OnCell()))

   # Evaluate at a point
   loc_int = (5.0, 5.0, 5.0)
   println("Value at $loc_int: ", itp(loc_int...))

   loc_float = (5.5, 5.5, 5.5)
   println("Value at $loc_float: ", itp(loc_float...))

   loc_out = (-0.5, 1.0, 1.0)
   val_periodic = itp(loc_out...)
   println("Value at $loc_out (Periodic): ", val_periodic)
   # Check correctness of Periodic
   # Note that we assume cell center values. -0.5 wraps to 9.5 (since period is 10).
   # Value at 9.5, 1.0, 1.0. Data is i+j+k.
   println("Expected Periodic: ", 9.5 + 1 + 1)

   # vector field
   Bx = HDF5DiskArray(fid["mygroup/Bx"]) |> DiskArrays.cache
   By = HDF5DiskArray(fid["mygroup/By"]) |> DiskArrays.cache
   Bz = HDF5DiskArray(fid["mygroup/Bz"]) |> DiskArrays.cache
   B = StructArray{SVector{3, eltype(Bx)}}((Bx, By, Bz))
   itp = extrapolate(
      interpolate(B, BSpline(Linear(Periodic(OnCell())))), Periodic(OnCell()))
   println("Value at $loc_out (Periodic): ", itp(loc_out...))
end

Value at (5.0, 5.0, 5.0): 15.0
Value at (5.5, 5.5, 5.5): 16.5
Value at (-0.5, 1.0, 1.0) (Periodic): 11.5
Expected Periodic: 11.5
Value at (-0.5, 1.0, 1.0) (Periodic): [-0.06595131754875183, 0.5403022766113281, 0.949999988079071]

Cleanup

Remove the temporary file.

julia

rm(filename)

Interpolate Over Large HDF5 Arrays ​

Implement HDF5DiskArray ​

Create Example Data ​

Interpolation Function ​

Cleanup ​

Interpolate Over Large HDF5 Arrays

Implement HDF5DiskArray

Create Example Data

Interpolation Function

Cleanup