diff --git a/src/Models/WaveGrowthModels1D.jl b/src/Models/WaveGrowthModels1D.jl
index b9d1c35..ed3e076 100644
--- a/src/Models/WaveGrowthModels1D.jl
+++ b/src/Models/WaveGrowthModels1D.jl
@@ -50,7 +50,8 @@ function WaveGrowth1D(; grid::CartesianGridMesh1D,
     currents=nothing,
     periodic_boundary=true,
     boundary_type="same",
-    CBsets=nothing)
+    CBsets=nothing,
+    spline_order::Int=1)
 
     return WaveGrowth2D(;
         grid=grid,
@@ -65,7 +66,8 @@ function WaveGrowth1D(; grid::CartesianGridMesh1D,
         currents=currents,
         periodic_boundary=periodic_boundary,
         boundary_type=boundary_type,
-        CBsets=CBsets)
+        CBsets=CBsets,
+        spline_order=spline_order)
 end
 
 # end of module
diff --git a/src/Models/WaveGrowthModels2D.jl b/src/Models/WaveGrowthModels2D.jl
index c30bae0..f7908ba 100644
--- a/src/Models/WaveGrowthModels2D.jl
+++ b/src/Models/WaveGrowthModels2D.jl
@@ -88,6 +88,8 @@ mutable struct WaveGrowth2D{Grid<:AbstractGrid,
 
     MovieState::Mstat     # state of of the model. Only used for producing movieframes
 
+    spline_order::Base.Int  # B-spline order for particle->grid deposition (1=CIC, 2=TSC, 3=cubic)
+
 end
 
 function Base.getproperty(ow::WaveGrowth2D, s::Symbol)
@@ -214,12 +216,22 @@ function WaveGrowth2D(; grid::GG,
     periodic_boundary=true,
     boundary_type="same", # or "minimal", "same", default is same, only used if periodic_boundary is false
     CBsets=nothing,
+    spline_order::Int=1,  # B-spline deposition order: 1=CIC (default), 2=TSC, 3=cubic
     movie=false) where {PP<:Union{ParticleDefaults2D,String},GG<:AbstractGrid}
 
     if !isnothing(winds) && !(haskey(winds, :u) && haskey(winds, :v))
         error("`winds` must provide fields `u` and `v`")
     end
 
+    if !(spline_order in (1, 2, 3))
+        error("spline_order must be 1, 2, or 3 (got $spline_order)")
+    end
+    # Higher-order deposition is not yet supported on tripolar grids (multi-point seam remap is a
+    # separate task); guard rather than silently deposit at the wrong order.
+    if spline_order > 1 && typeof(grid) <: MeshGrids && occursin("Tripolar", string(nameof(typeof(grid.stats.Ny))))
+        error("spline_order > 1 is not yet supported on tripolar grids; use spline_order=1.")
+    end
+
     # initialize state {SharedArray} given grid and layers
     # Number of state variables 
     Nstate = 3
@@ -356,7 +368,8 @@ function WaveGrowth2D(; grid::GG,
         ocean_points, boundary_points,
         winds,
         currents,
-        Mstat)
+        Mstat,
+        spline_order)
 end
 
 
diff --git a/src/Operators/TimeSteppers.jl b/src/Operators/TimeSteppers.jl
index 2d23b54..335b8fe 100644
--- a/src/Operators/TimeSteppers.jl
+++ b/src/Operators/TimeSteppers.jl
@@ -202,7 +202,11 @@ function time_step!(model::Abstract2DModel, Δt::Float64; forcing=nothing, callb
         @show model.ParticleCollection[:, 6].on
     end 
 
-    time_step!_advance(model, Δt, forcing_xy, FailedCollection)
+    # Resolve the B-spline deposition order Int -> Val{P} ONCE here (function barrier). Below this
+    # point everything specializes on Val{P} and compiles once per order (cached across steps);
+    # the only per-step cost is this single dispatch. See issues #59/#60.
+    spline_val = Val(hasproperty(model, :spline_order) ? model.spline_order : 1)
+    time_step!_advance(model, Δt, forcing_xy, FailedCollection, spline_val)
     # @threads for a_particle in model.ParticleCollection[model.ocean_points]
     #     #@info a_particle.position_ij
     #     mapping_2D.advance!(    a_particle, model.State, FailedCollection,
@@ -252,7 +256,7 @@ function time_step!(model::Abstract2DModel, Δt::Float64; forcing=nothing, callb
 
 end
 
-function time_step!_advance(model::Abstract2DModel, Δt::Float64, forcing_xy, FailedCollection::Vector{AbstractMarkedParticleInstance})
+function time_step!_advance(model::Abstract2DModel, Δt::Float64, forcing_xy, FailedCollection::Vector{AbstractMarkedParticleInstance}, spline::Val{P}=Val(1)) where {P}
 
     @threads for a_particle in model.ParticleCollection[model.ocean_points]
         #@info a_particle.position_ij
@@ -269,7 +273,8 @@ function time_step!_advance(model::Abstract2DModel, Δt::Float64, forcing_xy, Fa
                 model.ODEsettings.log_energy_maximum,
                 model.ODEsettings.wind_min_squared,
                 model.periodic_boundary,
-                model.ODEdefaults)
+                model.ODEdefaults,
+                spline)
 
         # if (a_particle.position_ij[2] == 6) & (particle_on != a_particle.on)            
         #     @info "after advance! outside: $(a_particle.position_ij) particle on change :$(particle_on) to $(a_particle.on)"
@@ -327,7 +332,9 @@ function movie_time_step!(model::Abstract2DModel, Δt; forcing=nothing, callback
     current_forcing === nothing && error("2D movie_time_step! requires forcing data (pass `forcing=...` to Simulation or set `model.winds`)")
     forcing_xy = time_slice_forcing(current_forcing, model.grid, model.clock.time)
 
-    time_step!_advance(model, Δt, forcing_xy, FailedCollection)
+    # resolve B-spline deposition order Int -> Val{P} once (function barrier); see time_step!
+    spline_val = Val(hasproperty(model, :spline_order) ? model.spline_order : 1)
+    time_step!_advance(model, Δt, forcing_xy, FailedCollection, spline_val)
 
     model.MovieState = copy(model.State)
 
diff --git a/src/Operators/mapping_2D.jl b/src/Operators/mapping_2D.jl
index 0990b17..68ac5f3 100644
--- a/src/Operators/mapping_2D.jl
+++ b/src/Operators/mapping_2D.jl
@@ -52,8 +52,10 @@ S       Shared array where particles are stored
 G       (TwoDGrid) Grid that defines the nodepositions
 """
 
-function ParticleToNode!(PI::AbstractParticleInstance, S::StateTypeL1, G::TwoDGrid, periodic_boundary::Bool)
-
+function ParticleToNode!(PI::AbstractParticleInstance, S::StateTypeL1, G::TwoDGrid, periodic_boundary::Bool, spline::Val=Val(1))
+        # NOTE: TwoDGrid is deprecated (issue #43); higher-order B-spline deposition is only
+        # supported on the live MeshGrids/CartesianGrid path. `spline` is accepted for dispatch
+        # compatibility but ignored here — this path always deposits at order 1 (CIC).
         #u[4], u[5] are the x and y positions of the particle
         #index_positions, weights = PIC.compute_weights_and_index(G, PI.ODEIntegrator.u[4], PI.ODEIntegrator.u[5])
         weights_and_index = PIC.compute_weights_and_index_mininal(G, PI.ODEIntegrator.u[4], PI.ODEIntegrator.u[5])
@@ -68,10 +70,10 @@ function ParticleToNode!(PI::AbstractParticleInstance, S::StateTypeL1, G::TwoDGr
         nothing
 end
 
-function ParticleToNode!(PI::AbstractParticleInstance, S::StateTypeL1, G::MeshGrids, periodic_boundary::Bool)
-        
+function ParticleToNode!(PI::AbstractParticleInstance, S::StateTypeL1, G::MeshGrids, periodic_boundary::Bool, spline::Val{P}=Val(1)) where {P}
+
         #u[4], u[5] are the x and y positions of the particle. For the CartesianGrid2D these are cooridnates relative to the particle node
-        weights_and_index = PIC.compute_weights_and_index_mininal(PI.position_ij, PI.ODEIntegrator.u[4], PI.ODEIntegrator.u[5])
+        weights_and_index = PIC.compute_weights_and_index_mininal(PI.position_ij, PI.ODEIntegrator.u[4], PI.ODEIntegrator.u[5], spline)
         # @info PI.position_ij, weights_and_index
 
         #ui[1:2] .= PI.position_xy
@@ -185,9 +187,10 @@ function advance!(PI::AbstractParticleInstance,
                         DT::Float64, 
                         log_energy_maximum::Float64,
                         wind_min_squared::Float64,
-                        periodic_boundary::Bool, 
+                        periodic_boundary::Bool,
                         default_particle::PP,
-                        ) where {PP<:Union{ParticleDefaults,Nothing},FF<:Union{ForcingCollection,ForcingData,NamedTuple{(:u, :v)},Tuple{Float64,Float64}}}
+                        spline::Val{P}=Val(1),
+                        ) where {PP<:Union{ParticleDefaults,Nothing},FF<:Union{ForcingCollection,ForcingData,NamedTuple{(:u, :v)},Tuple{Float64,Float64}},P}
         #@show PI.position_ij
 
         #if ~PI.boundary # if point is not a 
@@ -280,8 +283,8 @@ function advance!(PI::AbstractParticleInstance,
         end
 
         #if PI.ODEIntegrator.u[1] > -13.0 #ODEs.log_energy_minimum # the minimum enerçy is distributed to 4 neighbouring particles
-        if PI.on 
-                ParticleToNode!(PI, S, Grid, periodic_boundary)
+        if PI.on
+                ParticleToNode!(PI, S, Grid, periodic_boundary, spline)
         end
 
         return PI
diff --git a/src/ParticleInCell.jl b/src/ParticleInCell.jl
index cb87061..06667a3 100644
--- a/src/ParticleInCell.jl
+++ b/src/ParticleInCell.jl
@@ -88,9 +88,65 @@ function get_relative_i_and_w(zp_normed::Float64)
 end
 
 
+# ---------------------- B-spline deposition kernels ------------------
+# Generalize the linear (CIC, order 1) weighting above to arbitrary B-spline order P = 1, 2, 3,
+# selectable at model setup, to reduce grid-imprint anisotropy (see issues #59, #60).
+#
+# Centering convention (standard PIC, required for partition of unity Σw = 1):
+#   - odd order  (P = 1, 3): stencil centered on floor(z)
+#   - even order (P = 2):    stencil centered on round(z) (nearest node)
+# Stencil width is N = P + 1. Weights sum to 1 at every order, so the additive deposit conserves
+# total charge/energy exactly regardless of P.
+#
+# Two entry points mirror the linear functions above and keep P = 1 byte-identical:
+#   - get_absolute_i_and_w(z, ::Val{P})           absolute indices, δ = z - center (no rounding)
+#   - get_absolute_i_and_w(z, i_node, ::Val{P})   indices relative to particle node, δ rounded to 6 digits
+
+@inline _bspline_center(z::Float64, ::Val{1}) = floor(z)
+@inline _bspline_center(z::Float64, ::Val{2}) = round(z)
+@inline _bspline_center(z::Float64, ::Val{3}) = floor(z)
+
+@inline _bspline_offsets(::Val{1}) = SVector{2,Int64}(0, 1)
+@inline _bspline_offsets(::Val{2}) = SVector{3,Int64}(-1, 0, 1)
+@inline _bspline_offsets(::Val{3}) = SVector{4,Int64}(-1, 0, 1, 2)
+
+@inline _bspline_weights(δ::Float64, ::Val{1}) = SVector{2,Float64}(1.0 - δ, δ)
+@inline _bspline_weights(δ::Float64, ::Val{2}) =
+    SVector{3,Float64}(0.5 * (0.5 - δ)^2, 0.75 - δ^2, 0.5 * (0.5 + δ)^2)
+@inline function _bspline_weights(δ::Float64, ::Val{3})
+    ω = 1.0 / 6.0
+    return SVector{4,Float64}(ω * (1 - δ)^3, ω * (3δ^3 - 6δ^2 + 4), ω * (-3δ^3 + 3δ^2 + 3δ + 1), ω * δ^3)
+end
+
+"""
+get_absolute_i_and_w(zp_normed::Float64, spline::Val{P})
+B-spline generalization of the absolute-index variant. For P = 1 returns the same indices and
+weights as `get_absolute_i_and_w(zp_normed)`.
+"""
+@inline function get_absolute_i_and_w(zp_normed::Float64, spline::Val{P}) where {P}
+    center = _bspline_center(zp_normed, spline)
+    δ = zp_normed - center
+    i0 = Int(center) + 1
+    return i0 .+ _bspline_offsets(spline), _bspline_weights(δ, spline)
+end
+
+"""
+get_absolute_i_and_w(zp_normed::Float64, i_node::Int64, spline::Val{P})
+B-spline generalization of the particle-node-relative variant (used by MeshGrids/CartesianGrid).
+For P = 1 returns the same indices and weights as `get_absolute_i_and_w(zp_normed, i_node)`,
+including the `round(..., digits=6)` on the fractional offset.
+"""
+@inline function get_absolute_i_and_w(zp_normed::Float64, i_node::Int64, spline::Val{P}) where {P}
+    center = _bspline_center(zp_normed, spline)
+    δ = round(zp_normed - center, digits=6)
+    i0 = Int(center) + i_node
+    return i0 .+ _bspline_offsets(spline), _bspline_weights(δ, spline)
+end
+
+
 
 """
-norm_distance(xp::T, xmin::T, dx::T) 
+norm_distance(xp::T, xmin::T, dx::T)
 returns normalized distance
 """
 function norm_distance(xp::T, xmin::T, dx::T) where {T<:AbstractFloat}
@@ -156,6 +212,18 @@ function compute_weights_and_index_mininal(ij::II, xp::Float64, yp::Float64) whe
     return wni(xi, xw, yi, yw)
 end
 
+"""
+compute_weights_and_index_mininal(ij, xp, yp, spline::Val{P})
+B-spline (order P) variant of the particle-node-relative 2D wrapper. Returns a `wni{P+1}`.
+`spline = Val(1)` is bit-identical to the 3-argument method above.
+"""
+function compute_weights_and_index_mininal(ij::II, xp::Float64, yp::Float64, spline::Val{P}) where {II<:Union{Tuple{Int,Int},CartesianIndex},P}
+    xi, xw = get_absolute_i_and_w(xp, ij[1], spline)
+    yi, yw = get_absolute_i_and_w(yp, ij[2], spline)
+
+    return wni(xi, xw, yi, yw)
+end
+
 """
 compute_weights_and_index(g_pars::OneDGrid, xp::Float64 )
 returns indexes and weights for in 2D for single x point
@@ -322,7 +390,7 @@ function push_to_grid!(grid::StateTypeL1,
                             Nx::Int, Ny::Int,
                             periodic::Bool=true) where {CC<:Union{Vector{Float64},SVector{3,Float64},MVector{3,AbstractFloat}},
                                                             II<:Union{Tuple{Int,Int},SVector{2,Int64}},
-                                                            WW<:Union{Tuple{Float64,Float64},SVector{2,Float16}}}
+                                                            WW<:Tuple{Float64,Float64}}
     if periodic
         grid[ wrap_index!(index_pos[1], Nx) , wrap_index!(index_pos[2], Ny), : ] += weights[1] * weights[2] * charge
     else
@@ -345,7 +413,7 @@ function push_to_grid!(grid::StateTypeL1,
                             Nx::AbstractBoundary, 
                             Ny::AbstractBoundary) where {CC<:Union{Vector{Float64},SVector{3,Float64},MVector{3,AbstractFloat}},
                                                             II<:Union{Tuple{Int,Int},SVector{2,Int64}},
-                                                            WW<:Union{Tuple{Float64,Float64},SVector{2,Float16}}}
+                                                            WW<:Tuple{Float64,Float64}}
 
     # conditions where nothing should be returned
     if  (Nx isa N_NonPeriodic) & ~test_domain(index_pos[1], Nx.N) | # non-periodic in x and y-position is out of domain
@@ -501,10 +569,18 @@ function construct_loop(wni::FieldVector{4,SVector})
 #     return zip(idx, wtx)
 # end
 
-function construct_loop(wni::FieldVector{4,SVector})
-    idx = SVector{4,Tuple{Int,Int}}(                    (wni.xi[1], wni.yi[1]), (wni.xi[2], wni.yi[1]), (wni.xi[1], wni.yi[2]), (wni.xi[2], wni.yi[2]))
-    wtx = SVector{4,Tuple{AbstractFloat,AbstractFloat}}((wni.xw[1], wni.yw[1]), (wni.xw[2], wni.yw[1]), (wni.xw[1], wni.yw[2]), (wni.xw[2], wni.yw[2]))
-    return zip(idx, wtx)
+"""
+construct_loop(w::wni{N})
+Builds the full N×N separable stencil (N = spline order + 1) as a zip of (index, weight) pairs.
+Flatten is column-major (x fastest); for N = 2 this reproduces the original CIC ordering
+(x1,y1),(x2,y1),(x1,y2),(x2,y2) exactly. Weights are kept as the (xw, yw) 2-tuple so the
+single-point push_to_grid! arithmetic `weights[1]*weights[2]*charge` is unchanged.
+"""
+function construct_loop(w::wni{N}) where {N}
+    M = N * N
+    idx = ntuple(k -> (w.xi[(k - 1) % N + 1], w.yi[(k - 1) ÷ N + 1]), Val(M))
+    wtx = ntuple(k -> (w.xw[(k - 1) % N + 1], w.yw[(k - 1) ÷ N + 1]), Val(M))
+    return zip(SVector{M,Tuple{Int,Int}}(idx), SVector{M,Tuple{Float64,Float64}}(wtx))
 end
 
 
@@ -513,11 +589,11 @@ wrapper over FieldVector weight&index (wni),
 """
 function push_to_grid!(grid::StateTypeL1,
     charge::CC,
-    wni::FieldVector,
+    weights_and_index::wni,
     Nx::Int, Ny::Int,
     periodic::Bool=true) where CC <: Union{Vector{Float64}, SVector{3, Float64}}
     #@info "this is version D"
-    for (i, w) in construct_loop(wni)
+    for (i, w) in construct_loop(weights_and_index)
         push_to_grid!(grid, charge, i, w, Nx, Ny, periodic)
     end
 end
@@ -529,10 +605,10 @@ wrapper over FieldVector weight&index (wni),
 """
 function push_to_grid!(grid::StateTypeL1,
     charge::CC,
-    wni::FieldVector,
+    weights_and_index::wni,
     Nx::AbstractBoundary, Ny::AbstractBoundary) where {CC<:Union{Vector{Float64},SVector{3,Float64}}}
     #@info "this is version D"
-    for (i, w) in construct_loop(wni)
+    for (i, w) in construct_loop(weights_and_index)
         push_to_grid!(grid, charge, i, w, Nx, Ny)
     end
 end
diff --git a/src/custom_structures.jl b/src/custom_structures.jl
index 5208b46..56b25c2 100644
--- a/src/custom_structures.jl
+++ b/src/custom_structures.jl
@@ -60,14 +60,23 @@ Base.copy(s::ParticleInstance1D) = ParticleInstance1D(s.position_ij, s.position_
 Base.copy(s::ParticleInstance2D) = ParticleInstance2D(s.position_ij, s.position_xy, s.ODEIntegrator, s.boundary, s.on)
 
 # Regridding types:
-"""Weights & Index (wni) FieldVector """
-struct wni{TI<:SVector,TF<:SVector} <: FieldVector{4,SVector}
+"""
+Weights & Index (wni) for a separable N-point-per-axis deposition stencil (N = spline order + 1).
+`xi`/`yi` are the N integer node indices per axis, `xw`/`yw` the N B-spline weights per axis.
+N is encoded in the type so `construct_loop` unrolls the N^2 stencil per order.
+"""
+struct wni{N,TI<:SVector{N,Int64},TF<:SVector{N,Float64}}
         xi::TI
         xw::TF
         yi::TI
         yw::TF
 end
 
+# Explicit outer constructor so N is inferred from the stencil width (avoids relying on
+# static-parameter solving of N through the SVector{N,...} field constraints).
+wni(xi::SVector{N,Int64}, xw::SVector{N,Float64}, yi::SVector{N,Int64}, yw::SVector{N,Float64}) where {N} =
+        wni{N,SVector{N,Int64},SVector{N,Float64}}(xi, xw, yi, yw)
+
 # Define Boundary Types:
 
 struct N_Periodic{T} <: AbstractBoundary
diff --git a/test/Project.toml b/test/Project.toml
index 129c2a5..229ad57 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -2,5 +2,6 @@
 IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
 Oceananigans = "9e8cae18-63c1-5223-a75c-80ca9d6e9a09"
 SharedArrays = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/test/benchmark/bench_bspline_deposition.jl b/test/benchmark/bench_bspline_deposition.jl
new file mode 100644
index 0000000..92bbb0b
--- /dev/null
+++ b/test/benchmark/bench_bspline_deposition.jl
@@ -0,0 +1,96 @@
+"""
+Benchmark the particle->grid B-spline deposition across orders P = 1, 2, 3 (issues #59, #60).
+
+Run from the repo root:
+    julia --project=. test/benchmark/bench_bspline_deposition.jl
+
+Measures per-deposit wall time and allocations through the production path
+(`compute_weights_and_index_mininal` + the `wni` `push_to_grid!`) and writes a scaling plot to
+`plots/bspline_deposition_scaling.png`.
+
+Expectations:
+- cost scales roughly with the number of stencil points (P+1)^2 = 4 / 9 / 16 for P = 1 / 2 / 3,
+- allocations scale *linearly with the number of stencil points* and stay constant per point
+  across orders. That per-point cost is the pre-existing `grid[i,j,:] += w*charge` array-slice
+  assignment (unchanged by this work); the linear (not super-linear) scaling and the absence of
+  any extra per-order overhead is what confirms the Val{P} deposit specializes correctly with no
+  dynamic dispatch on the order.
+
+Not included in runtests.jl.
+"""
+
+using BenchmarkTools
+using SharedArrays
+using StaticArrays
+using Plots
+using Plots.PlotMeasures: mm
+import PiCLES.ParticleInCell as PIC
+using PiCLES.custom_structures: N_Periodic
+
+gr()
+
+const Nx, Ny = 64, 64
+const Bx, By = N_Periodic(Nx), N_Periodic(Ny)
+const CHARGE = SVector{3,Float64}(2.5, 0.3, -0.7)
+const IJ = (32, 32)
+const XP, YP = 0.37, 0.62
+const ORDERS = (1, 2, 3)
+
+# one deposit through the full production path (weights + push), order P
+function deposit!(S, spline::Val)
+    w = PIC.compute_weights_and_index_mininal(IJ, XP, YP, spline)
+    PIC.push_to_grid!(S, CHARGE, w, Bx, By)
+    return nothing
+end
+
+function main()
+    S = SharedArray{Float64,3}((Nx, Ny, 3))
+    pts = Int[]; tmin = Float64[]; tmed = Float64[]; allocs = Int[]
+
+    for P in ORDERS
+        spline = Val(P)
+        S .= 0.0
+        deposit!(S, spline)                       # warm up / compile
+        b = @benchmark deposit!($S, $spline) samples = 10000 evals = 100
+        a = @allocated deposit!(S, spline)
+        push!(pts, (P + 1)^2)
+        push!(tmin, minimum(b).time)
+        push!(tmed, median(b).time)
+        push!(allocs, a)
+        println("P=$P  (stencil $(P+1)^2 = $((P+1)^2) pts):  ",
+            "min ", minimum(b).time, " ns,  median ", median(b).time, " ns,  ",
+            "allocs/deposit = ", a)
+    end
+
+    # ---- scaling plot: time and allocations vs number of stencil points -------
+    # ideal references anchored at the P=1 point: cost proportional to stencil points.
+    ideal_t = tmed[1] .* pts ./ pts[1]
+    ideal_a = allocs[1] .* pts ./ pts[1]
+    labels = ["P=$P" for P in ORDERS]
+
+    p1 = plot(pts, tmed; marker=:circle, ms=6, lw=2, label="median",
+        xlabel="stencil points (P+1)²", ylabel="time per deposit [ns]",
+        title="deposit time", legend=:topleft,
+        left_margin=12mm, bottom_margin=10mm, top_margin=5mm, right_margin=5mm)
+    plot!(p1, pts, tmin; marker=:diamond, ms=5, lw=1, ls=:dash, label="min")
+    plot!(p1, pts, ideal_t; ls=:dot, lw=2, color=:gray, label="∝ stencil pts (ideal)")
+    annotate!(p1, [(pts[i], tmed[i], text("  " * labels[i], 8, :left, :bottom)) for i in eachindex(pts)])
+
+    p2 = plot(pts, allocs; marker=:circle, ms=6, lw=2, label="measured",
+        xlabel="stencil points (P+1)²", ylabel="allocations per deposit [bytes]",
+        title="deposit allocations", legend=:topleft,
+        left_margin=14mm, bottom_margin=10mm, top_margin=5mm, right_margin=5mm)
+    plot!(p2, pts, ideal_a; ls=:dot, lw=2, color=:gray, label="∝ stencil pts (ideal)")
+    annotate!(p2, [(pts[i], allocs[i], text("  " * labels[i], 8, :left, :bottom)) for i in eachindex(pts)])
+
+    fig = plot(p1, p2; layout=(1, 2), size=(1100, 480),
+        plot_title="B-spline PIC deposition scaling (P=1,2,3)")
+
+    outdir = joinpath(@__DIR__, "..", "..", "plots")
+    mkpath(outdir)
+    outpath = joinpath(outdir, "bspline_deposition_scaling.png")
+    savefig(fig, outpath)
+    println("wrote ", abspath(outpath))
+end
+
+main()
diff --git a/test/runtests.jl b/test/runtests.jl
index 9d4161c..c16be5f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -7,8 +7,9 @@ using Test
     include("unit/test_single_particle_2d_local_winds_smoke.jl")
     include("unit/test_single_particle_1d_alias_smoke.jl")
     include("unit/test_grids_homogeneous_forcing_smoke.jl")
+    include("unit/test_pic_bspline_deposition.jl")
     include("unit/test_pic_1d_charge_conservation.jl")
     include("unit/test_pic_2d_propagation_energy_conservation.jl")
-    # include("unit/test_1d_wave_growth_quantitative_periodic.jl")
+    include("unit/test_1d_wave_growth_quantitative_periodic.jl")
     include("unit/test_1d_wave_growth_quantitative_single_particle.jl")
 end
diff --git a/test/unit/test_1d_wave_growth_quantitative_periodic.jl b/test/unit/test_1d_wave_growth_quantitative_periodic.jl
index 774c9ae..6006f4b 100644
--- a/test/unit/test_1d_wave_growth_quantitative_periodic.jl
+++ b/test/unit/test_1d_wave_growth_quantitative_periodic.jl
@@ -81,8 +81,15 @@ end
         (u10 = 15.0, DT = 10minutes, Nx = 101),
     ]
 
-    @testset "Periodic case sweep" begin
-        for (i, case) in enumerate(case_list)
+    # B-spline deposition order sweep. The full case list runs at P=1 (CIC, the established
+    # coverage); higher orders P=2,3 are validated on a representative case to keep runtime
+    # reasonable while confirming the wave-growth physics is unchanged by the deposition order
+    # (issues #59, #60). Index 2 is the u10=15, DT=10min, Nx=101 case.
+    cases_for_order = Dict(1 => collect(eachindex(case_list)), 2 => [2], 3 => [2])
+
+    @testset "Periodic case sweep, spline_order=$(P)" for P in (1, 2, 3)
+        for i in cases_for_order[P]
+            case = case_list[i]
             u10, DT, Nx = case
             t_target_sec = T_TILDE_TARGET * u10 / 9.81
             stop_time = t_target_sec + DT
@@ -108,7 +115,8 @@ end
                 ODEinit_type="mininmal",
                 minimal_particle=FetchRelations.MinimalParticle(u10, 0, DT),
                 periodic_boundary=true,
-                boundary_type="same")
+                boundary_type="same",
+                spline_order=P)
 
             sim = Simulation(model, Δt=DT, stop_time=stop_time, verbose=false)
             initialize_simulation!(sim)
diff --git a/test/unit/test_pic_1d_charge_conservation.jl b/test/unit/test_pic_1d_charge_conservation.jl
index e035418..e03512b 100644
--- a/test/unit/test_pic_1d_charge_conservation.jl
+++ b/test/unit/test_pic_1d_charge_conservation.jl
@@ -1,51 +1,58 @@
 """
-Quantitative 1D PIC conservation checks.
-
-This test mirrors the non-divergent manual cases from
-test/manual/T01_test_PIC_1D.jl and verifies total charge remains within 2%
-of the initial value over an integration that is 4x longer than in the manual
-diagnostic script.
+Quantitative 1D PIC charge-conservation checks, swept over B-spline deposition order P=1,2,3.
+
+This mirrors the non-divergent manual cases from test/manual/T01_test_PIC_1D.jl, but deposits
+through the **production** particle->grid path used by the live engine
+(`ParticleInCell.compute_weights_and_index_mininal` + the `wni` `push_to_grid!`) on a thin
+periodic 2D mesh, rather than the legacy standalone 1D `push_to_grid!` (which no simulation uses;
+see issues #59, #60 and #38). Charge is laid out along x at a single y-row; higher orders may
+spread a little in y, but partition of unity keeps the total exactly conserved, so the same ±2%
+window holds for every P. Readback collapses the y-dimension to recover the per-x-node charge.
 """
 
 using Test
 using SharedArrays
+using StaticArrays
 
 using PiCLES.Grids.CartesianGrid: CartesianGridMesh1D, gridnotes_1d
 import PiCLES.ParticleInCell
+using PiCLES.custom_structures: N_Periodic
+
+const NY_DEPOSIT = 5      # thin-y mesh wide enough that the cubic (P=3) y-stencil never wraps
+const JROW = 3            # center row for the 1D charge line
 
 function grid1d_axes(grid1d)
     stats = grid1d.stats
     return (Nx=stats.Nx.N, xmin=stats.xmin, xmax=stats.xmax, dx=stats.dx, x=gridnotes_1d(grid1d))
 end
 
-function compute_weights_and_index_1d(grid1d, xp::Vector{Float64})
-    axes = grid1d_axes(grid1d)
-    index_list, weight_list = [], []
-    for xi in xp
-        xp_normed = (xi - axes.xmin) / axes.dx
-        idx, wtx = ParticleInCell.get_absolute_i_and_w(xp_normed)
-        push!(index_list, idx)
-        push!(weight_list, wtx)
-    end
-    return index_list, weight_list
-end
-
-function integrate_charge_history(grid1d, charges_1d, xp; N::Int, cg::Float64)
-    axes = grid1d_axes(grid1d)
+function integrate_charge_history(grid1d, charges_1d, xp; N::Int, cg::Float64, P::Int)
+    ax = grid1d_axes(grid1d)
+    Nx = ax.Nx
     q = copy(charges_1d)
     x = copy(xp)
 
-    state = SharedMatrix{Float64}(axes.Nx, 1)
+    Bx = N_Periodic(Nx)
+    By = N_Periodic(NY_DEPOSIT)
+    spline = Val(P)
+    S = SharedArray{Float64,3}((Nx, NY_DEPOSIT, 3))
+
     charge_hist = Vector{Float64}(undef, N + 1)
     charge_hist[1] = Float64(sum(q))
 
     for ti in 1:N
-        state .= 0
-        index_positions, weights = compute_weights_and_index_1d(grid1d, x)
-        ParticleInCell.push_to_grid!(state, q, index_positions, weights, axes.Nx, true)
-
-        q = dropdims(Array{Float64}(state), dims=2)
-        x = axes.x .+ cg .* q
+        S .= 0.0
+        for i in 1:Nx
+            # absolute normalized x position, offset relative to the parcel's home node i (1-based)
+            znorm = (x[i] - ax.xmin) / ax.dx
+            zrel = znorm - (i - 1)
+            w = ParticleInCell.compute_weights_and_index_mininal((i, JROW), zrel, 0.0, spline)
+            ParticleInCell.push_to_grid!(S, SVector{3,Float64}(q[i], 0.0, 0.0), w, Bx, By)
+        end
+
+        # recover per-x-node charge by collapsing the (small) y-spread, then self-advect
+        q = vec(sum(Array(S)[:, :, 1], dims=2))
+        x = ax.x .+ cg .* q
 
         charge_hist[ti + 1] = Float64(sum(q))
     end
@@ -82,10 +89,12 @@ end
         (name="sin left, nonlinear advection", N=200, cg=-0.3, init=init_sin_left_case),
     ]
 
-    for case in cases
-        xp, charges_1d = case.init(axes)
-        charge_hist = integrate_charge_history(grid1d, charges_1d, xp; N=case.N, cg=case.cg)
-        drift = max_relative_charge_drift(charge_hist)
-        @test drift <= 0.02
+    @testset "spline_order=$(P)" for P in (1, 2, 3)
+        for case in cases
+            xp, charges_1d = case.init(axes)
+            charge_hist = integrate_charge_history(grid1d, charges_1d, xp; N=case.N, cg=case.cg, P=P)
+            drift = max_relative_charge_drift(charge_hist)
+            @test drift <= 0.02
+        end
     end
 end
diff --git a/test/unit/test_pic_2d_propagation_energy_conservation.jl b/test/unit/test_pic_2d_propagation_energy_conservation.jl
index 7cc0b28..fc6df8f 100644
--- a/test/unit/test_pic_2d_propagation_energy_conservation.jl
+++ b/test/unit/test_pic_2d_propagation_energy_conservation.jl
@@ -21,11 +21,11 @@ using Oceananigans.Units
 function seed_blob!(sim; cgx, cgy, patch_i=5:15, patch_j=5:15)
     for PI in sim.model.ParticleCollection[patch_i, patch_j]
         reset_PI_u!(PI, ui=FetchRelations.get_initial_windsea(cgx, cgy, 2hour, particle_state=true))
-        ParticleToNode!(PI, sim.model.State, sim.model.grid, sim.model.periodic_boundary)
+        ParticleToNode!(PI, sim.model.State, sim.model.grid, sim.model.periodic_boundary, Val(sim.model.spline_order))
     end
 end
 
-function energy_series_for_case(; cgx, cgy, steps=100)
+function energy_series_for_case(; cgx, cgy, steps=100, spline_order=1)
     U10, V10 = 0.0, 0.0
     DT = Float64(20minutes)
 
@@ -69,6 +69,7 @@ function energy_series_for_case(; cgx, cgy, steps=100)
         ODEinit_type=ParticleDefaults(particle_min),
         periodic_boundary=true,
         boundary_type="same",
+        spline_order=spline_order,
         movie=false,
     )
 
@@ -93,17 +94,64 @@ end
         (name="lower-left", cgx=-10.0, cgy=-12.0),
     ]
 
-    for case in cases
-        e = energy_series_for_case(; cgx=case.cgx, cgy=case.cgy, steps=100)
-        @test length(e) == 100
+    # Sweep B-spline deposition order. Partition of unity makes the deposit conservative at every
+    # order, so the same ±2% window must hold for P = 1, 2, 3 (issues #59, #60).
+    @testset "spline_order=$(P)" for P in (1, 2, 3)
+        for case in cases
+            e = energy_series_for_case(; cgx=case.cgx, cgy=case.cgy, steps=100, spline_order=P)
+            @test length(e) == 100
 
-        e_ref = e[3]
-        e_final = e[end]
+            e_ref = e[3]
+            e_final = e[end]
 
-        @test isfinite(e_ref) && isfinite(e_final)
-        @test e_ref != 0.0
+            @test isfinite(e_ref) && isfinite(e_final)
+            @test e_ref != 0.0
 
-        rel_change = abs(e_final - e_ref) / abs(e_ref)
-        @test rel_change <= 0.02
+            rel_change = abs(e_final - e_ref) / abs(e_ref)
+            @test rel_change <= 0.02
+        end
     end
 end
+
+# Regression guard: movie_time_step! (the entry point used by the comparison harness and movie
+# output) is a separate code path from time_step! and must also honor spline_order. A bug where it
+# bypassed the Val barrier silently deposited at P=1 regardless of the model's spline_order.
+function movie_state_for_order(P; cgx=10.0, cgy=12.0, steps=8)
+    DT = Float64(20minutes)
+    ODEpars, Const_ID, _ = ODEParameters(r_g=0.85)
+    u(x, y, t) = 0.0 * x + 0.0 * y + 0.0 * t
+    v(x, y, t) = 0.0 * x + 0.0 * y + 0.0 * t
+    forcing = PiCLES.custom_structures.ForcingCollection(u_wind=u, v_wind=v)
+    grid = TwoDCartesianGridMesh(400e3, 41, 300e3, 31; periodic_boundary=(true, true))
+    particle_system = particle_equations(γ=Const_ID.γ, q=Const_ID.q,
+        propagation=true, input=false, dissipation=false, peak_shift=false, direction=false)
+    WindSeamin = FetchRelations.MinimalWindsea(0.0, 0.0, DT)
+    ODE_settings = ODESettings(Parameters=ODEpars, log_energy_minimum=log(WindSeamin["E"]),
+        log_energy_maximum=log(27), saving_step=6000, timestep=DT, total_time=12days,
+        dt=1e-3, dtmin=1e-4, dtmax=DT)
+    particle_min = FetchRelations.MinimalParticle(2, 0, DT)
+    model = WaveGrowthModels2D.WaveGrowth2D(grid=grid, ODEsys=particle_system, ODEsets=ODE_settings,
+        ODEinit_type=ParticleDefaults(particle_min), periodic_boundary=true, boundary_type="same",
+        spline_order=P, movie=true)
+    sim = Simulation(model; forcing=forcing, Δt=DT, stop_time=16hours)
+    initialize_simulation!(sim)
+    for PI in sim.model.ParticleCollection[5:15, 5:15]
+        reset_PI_u!(PI, ui=FetchRelations.get_initial_windsea(cgx, cgy, 2hour, particle_state=true))
+        ParticleToNode!(PI, sim.model.State, sim.model.grid, sim.model.periodic_boundary, Val(P))
+    end
+    local ms = copy(sim.model.State)
+    for _ in 1:steps
+        TimeSteppers.movie_time_step!(sim.model, sim.Δt; forcing=sim.forcing)
+        ms = copy(sim.model.MovieState)
+    end
+    return ms
+end
+
+@testset "movie_time_step! honors spline_order" begin
+    m1 = movie_state_for_order(1)
+    m3 = movie_state_for_order(3)
+    # conservative deposit at every order -> total energy matches across orders
+    @test sum(m1[:, :, 1]) ≈ sum(m3[:, :, 1]) rtol = 0.05
+    # but the fields must differ -> proves the movie path actually applies the spline order
+    @test !isapprox(m1[:, :, 1], m3[:, :, 1]; rtol=1e-6)
+end
diff --git a/test/unit/test_pic_bspline_deposition.jl b/test/unit/test_pic_bspline_deposition.jl
new file mode 100644
index 0000000..b3bf730
--- /dev/null
+++ b/test/unit/test_pic_bspline_deposition.jl
@@ -0,0 +1,108 @@
+"""
+Unit tests for the generalized B-spline particle->grid deposition (issues #59, #60).
+
+Covers:
+1. Partition of unity (Σw = 1) for orders P = 1, 2, 3, both the absolute and the
+   particle-node-relative weight kernels -> guarantees exact charge/energy conservation.
+2. P = 1 regression: the Val(1) kernels are bit-identical to the original linear
+   `get_absolute_i_and_w`, and a single-particle deposit at Val(1) reproduces the original
+   4-node CIC stencil exactly.
+3. Higher orders deposit onto the expected wider, conservative stencil.
+"""
+
+using Test
+using StaticArrays
+using SharedArrays
+
+import PiCLES.ParticleInCell as PIC
+using PiCLES.custom_structures: N_Periodic
+
+@testset "B-spline deposition kernels" begin
+
+    zs = collect(range(-7.3, 9.7, length=137))
+
+    @testset "partition of unity (absolute + relative), P=$(P)" for P in (1, 2, 3)
+        for z in zs
+            _, w = PIC.get_absolute_i_and_w(z, Val(P))
+            @test length(w) == P + 1
+            @test sum(w) ≈ 1.0 atol = 1e-12
+
+            for i_node in (1, 7, 23)
+                ir, wr = PIC.get_absolute_i_and_w(z, i_node, Val(P))
+                @test length(wr) == P + 1
+                @test sum(wr) ≈ 1.0 atol = 1e-12
+                # relative indices are the absolute ones shifted by the node position
+                ia, _ = PIC.get_absolute_i_and_w(z, Val(P))
+                @test ir == ia .+ (i_node - 1)
+            end
+        end
+    end
+
+    @testset "P=1 bit-identical to legacy linear kernel" begin
+        for z in zs
+            i1, w1 = PIC.get_absolute_i_and_w(z, Val(1))
+            i0, w0 = PIC.get_absolute_i_and_w(z)
+            @test i1 == i0
+            @test w1 == w0
+
+            for i_node in (1, 7, 23)
+                ir1, wr1 = PIC.get_absolute_i_and_w(z, i_node, Val(1))
+                ir0, wr0 = PIC.get_absolute_i_and_w(z, i_node)
+                @test ir1 == ir0
+                @test wr1 == wr0
+            end
+        end
+    end
+
+    @testset "non-negative weights" for P in (1, 2, 3)
+        for z in zs
+            _, w = PIC.get_absolute_i_and_w(z, Val(P))
+            @test all(w .>= 0.0)
+        end
+    end
+end
+
+@testset "B-spline single-particle deposit" begin
+    # Small periodic grid; deposit one particle well inside so even the cubic stencil stays
+    # interior. charge = (energy, mom_x, mom_y).
+    Nx, Ny = 16, 16
+    Bx, By = N_Periodic(Nx), N_Periodic(Ny)
+    charge = SVector{3,Float64}(2.5, 0.3, -0.7)
+    ij = (8, 8)
+    xp, yp = 0.37, 0.62   # fractional offsets relative to the particle node
+
+    @testset "total deposited charge conserved, P=$(P)" for P in (1, 2, 3)
+        S = SharedArray{Float64,3}((Nx, Ny, 3))
+        S .= 0.0
+        wni = PIC.compute_weights_and_index_mininal(ij, xp, yp, Val(P))
+        PIC.push_to_grid!(S, charge, wni, Bx, By)
+        for c in 1:3
+            @test sum(S[:, :, c]) ≈ charge[c] atol = 1e-12
+        end
+        # number of touched nodes is (P+1)^2 at most
+        @test count(!iszero, S[:, :, 1]) <= (P + 1)^2
+    end
+
+    @testset "P=1 reproduces the 4-node CIC stencil exactly" begin
+        S = SharedArray{Float64,3}((Nx, Ny, 3))
+        S .= 0.0
+        wni = PIC.compute_weights_and_index_mininal(ij, xp, yp, Val(1))
+        PIC.push_to_grid!(S, charge, wni, Bx, By)
+
+        # legacy CIC: node ij is floor; weights (1-δ, δ) per axis with δ = round(frac, 6)
+        dx = round(xp - floor(xp), digits=6)
+        dy = round(yp - floor(yp), digits=6)
+        i0 = ij[1] + Int(floor(xp))
+        j0 = ij[2] + Int(floor(yp))
+        expected = Dict(
+            (i0, j0)         => (1 - dx) * (1 - dy),
+            (i0 + 1, j0)     => dx * (1 - dy),
+            (i0, j0 + 1)     => (1 - dx) * dy,
+            (i0 + 1, j0 + 1) => dx * dy,
+        )
+        for ((i, j), wexp) in expected
+            @test S[i, j, 1] ≈ wexp * charge[1] atol = 1e-12
+        end
+        @test count(!iszero, S[:, :, 1]) == 4
+    end
+end