Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .github/build-ci/manifests/gcc-cuda.spack.yaml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
spack:
specs:
- issm @git.{{ ref }} +wrappers ~ad +cuda
packages:
# Workaround due to https://github.com/spack/spack-packages/pull/3644
re2c:
require:
- '@3.1'

gcc:
require:
- '@{{ gcc_compiler_version }}'
all:
require:
- '%access_gcc'
- 'target={{ target }}'
concretizer:
unify: false
view: false
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,6 @@ jobs:
spack-manifest-path: ${{ matrix.file }}
allow-ssh-into-spack-install: false # If true, PR author must ssh into instance to complete job
spack-manifest-data-path: .github/build-ci/data/standard.json
access-spack-packages-ref: justinh2002/cuda_issm_gpu
# Default args (including explicit spack/spack-packages/spack-config versions)
# are specified in https://github.com/ACCESS-NRI/build-ci/tree/v3/.github/workflows#inputs
20 changes: 19 additions & 1 deletion externalpackages/petsc/install-3.22-gadi-gpu.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,21 @@ if [ ! -d "${CUDA_DIR}" ]; then
exit 1
fi

# Build PETSc against the SAME MPI that ISSM uses (system OpenMPI), rather than
# letting PETSc download its own MPICH. A mismatched MPI causes issm.exe to be
# linked against two MPI runtimes at once, which breaks any np>1 (multi-rank)
# run: each rank initialises as its own standalone MPI_COMM_WORLD rank-0.
# The OpenMPI compiler wrappers must be on PATH (module load openmpi/4.1.3).
for w in mpicc mpicxx mpif90; do
if ! command -v ${w} &>/dev/null; then
echo "ERROR: ${w} not found. Load OpenMPI first: module load openmpi/4.1.3"
exit 1
fi
done
if ! mpicc --showme:version 2>/dev/null | grep -qi "open mpi"; then
echo "WARNING: mpicc does not appear to be OpenMPI; check your modules."
fi

# Environment
if [ -z ${LDFLAGS+x} ]; then
LDFLAGS=""
Expand Down Expand Up @@ -51,14 +66,17 @@ cd ${PETSC_DIR}
--prefix="${PREFIX}" \
--PETSC_DIR="${PETSC_DIR}" \
--LDFLAGS="${LDFLAGS}" \
--CFLAGS="-g -O2" --CXXFLAGS="-g -O2" --FFLAGS="-g -O2" \
--with-debugging=0 \
--with-valgrind=0 \
--with-x=0 \
--with-ssl=0 \
--with-pic=1 \
--with-cc=mpicc \
--with-cxx=mpicxx \
--with-fc=mpif90 \
--download-fblaslapack=1 \
--download-metis=1 \
--download-mpich=1 \
--download-mumps=1 \
--download-parmetis=1 \
--download-scalapack=1 \
Expand Down
14 changes: 14 additions & 0 deletions m4/issm_options.m4
Original file line number Diff line number Diff line change
Expand Up @@ -1394,6 +1394,20 @@ AC_DEFUN([ISSM_OPTIONS],[
AC_DEFINE([_HAVE_PETSC_], [1], [with PETSc in ISSM src])
AC_SUBST([PETSCINCL])
AC_SUBST([PETSCLIB])

dnl Detect CUDA support in the configured PETSc install {{{
AC_MSG_CHECKING(whether PETSc was built with CUDA support)
PETSC_HAS_CUDA=no
if test -f "${PETSC_ROOT}/include/petscconf.h"; then
if grep -q "PETSC_HAVE_CUDA" "${PETSC_ROOT}/include/petscconf.h"; then
PETSC_HAS_CUDA=yes
fi
fi
AC_MSG_RESULT([${PETSC_HAS_CUDA}])
if test "x${PETSC_HAS_CUDA}" == "xyes"; then
AC_DEFINE([_HAVE_PETSC_CUDA_], [1], [PETSc built with CUDA support])
fi
dnl }}}
fi
dnl }}}
dnl MPI{{{
Expand Down
1 change: 1 addition & 0 deletions src/c/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ issm_sources += \
./solutionsequences/solutionsequence_la_theta.cpp \
./solutionsequences/solutionsequence_linear.cpp \
./solutionsequences/solutionsequence_nonlinear.cpp \
./solutionsequences/AndersonAccelerator.cpp \
./solutionsequences/solutionsequence_newton.cpp \
./solutionsequences/solutionsequence_fct.cpp \
./solutionsequences/solutionsequence_schurcg.cpp \
Expand Down
1 change: 1 addition & 0 deletions src/c/shared/Enum/EnumDefinitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,7 @@ enum definitions{
StepEnum,
StepsEnum,
StressbalanceAbstolEnum,
StressbalanceAndersonDepthEnum,
StressbalanceFSreconditioningEnum,
StressbalanceIsHydrologyLayerEnum,
StressbalanceIsnewtonEnum,
Expand Down
1 change: 1 addition & 0 deletions src/c/shared/Enum/EnumToStringx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,7 @@ const char* EnumToStringx(int en){
case StepEnum : return "Step";
case StepsEnum : return "Steps";
case StressbalanceAbstolEnum : return "StressbalanceAbstol";
case StressbalanceAndersonDepthEnum : return "StressbalanceAndersonDepth";
case StressbalanceFSreconditioningEnum : return "StressbalanceFSreconditioning";
case StressbalanceIsHydrologyLayerEnum : return "StressbalanceIsHydrologyLayer";
case StressbalanceIsnewtonEnum : return "StressbalanceIsnewton";
Expand Down
1 change: 1 addition & 0 deletions src/c/shared/Enum/StringToEnumx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,7 @@ int StringToEnumx(const char* name,bool notfounderror){
else if (strcmp(name,"Step")==0) return StepEnum;
else if (strcmp(name,"Steps")==0) return StepsEnum;
else if (strcmp(name,"StressbalanceAbstol")==0) return StressbalanceAbstolEnum;
else if (strcmp(name,"StressbalanceAndersonDepth")==0) return StressbalanceAndersonDepthEnum;
else if (strcmp(name,"StressbalanceFSreconditioning")==0) return StressbalanceFSreconditioningEnum;
else if (strcmp(name,"StressbalanceIsHydrologyLayer")==0) return StressbalanceIsHydrologyLayerEnum;
else if (strcmp(name,"StressbalanceIsnewton")==0) return StressbalanceIsnewtonEnum;
Expand Down
202 changes: 202 additions & 0 deletions src/c/solutionsequences/AndersonAccelerator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
/*!\file: AndersonAccelerator.cpp
* \brief: Anderson acceleration for Picard fixed-point iterations.
*
* See AndersonAccelerator.h for theory and usage.
*/

#include "AndersonAccelerator.h"

/* ──────────────────────────────────────────────────────────────────────────
* Construction / destruction
* ────────────────────────────────────────────────────────────────────────── */

AndersonAccelerator::AndersonAccelerator(int m) : depth(m), iter(0) {
/*reserve to avoid repeated allocation for typical depths <= 10*/
if(m > 0){
f_hist.reserve(m+1);
G_hist.reserve(m+1);
}
}

AndersonAccelerator::~AndersonAccelerator(){
for(int i=0; i<(int)f_hist.size(); i++) delete f_hist[i];
for(int i=0; i<(int)G_hist.size(); i++) delete G_hist[i];
}

/* ──────────────────────────────────────────────────────────────────────────
* SmallSolve
*
* Gaussian elimination with partial pivoting for a dense mk x mk system.
* On entry: A[i*mk+j], b[i].
* On exit: theta[i] holds the solution.
* Returns false and leaves theta zeroed if A is numerically singular.
* ────────────────────────────────────────────────────────────────────────── */
bool AndersonAccelerator::SmallSolve(IssmDouble* A, IssmDouble* b,
IssmDouble* theta, int mk){

const IssmDouble tol = 1.0e-14;

/*copy b into theta; we will overwrite it as we go*/
for(int i=0; i<mk; i++) theta[i] = b[i];

/*forward elimination*/
for(int col=0; col<mk; col++){

/*partial pivot: find row with largest absolute value in this column*/
int pivot = col;
IssmDouble maxval = fabs(A[col*mk+col]);
for(int row=col+1; row<mk; row++){
IssmDouble val = fabs(A[row*mk+col]);
if(val > maxval){ maxval = val; pivot = row; }
}

if(maxval < tol){
/*singular or near-singular — fall back to Picard (theta = 0)*/
for(int i=0; i<mk; i++) theta[i] = 0.0;
return false;
}

/*swap rows col and pivot in A and theta*/
if(pivot != col){
for(int j=0; j<mk; j++){
IssmDouble tmp = A[col*mk+j];
A[col*mk+j] = A[pivot*mk+j];
A[pivot*mk+j] = tmp;
}
IssmDouble tmp = theta[col];
theta[col] = theta[pivot];
theta[pivot] = tmp;
}

/*eliminate column below the pivot*/
IssmDouble inv_diag = 1.0 / A[col*mk+col];
for(int row=col+1; row<mk; row++){
IssmDouble factor = A[row*mk+col] * inv_diag;
for(int j=col; j<mk; j++)
A[row*mk+j] -= factor * A[col*mk+j];
theta[row] -= factor * theta[col];
}
}

/*back substitution*/
for(int row=mk-1; row>=0; row--){
for(int j=row+1; j<mk; j++)
theta[row] -= A[row*mk+j] * theta[j];
theta[row] /= A[row*mk+row];
}

return true;
}

/* ──────────────────────────────────────────────────────────────────────────
* Apply
*
* Core of Anderson acceleration. Called once per nonlinear iteration
* after the linear solve has produced a new Picard update in *puf.
*
* Algorithm (Walker & Ni 2011, "Type 1"):
*
* f_k = G(u_k) - u_k Picard residual
* mk = min(iter, depth) effective window
*
* if mk == 0: u_{k+1} = G(u_k) plain Picard
* else:
* dF[:,j] = f_hist[j+1] - f_hist[j] mk difference columns
* dG[:,j] = G_hist[j+1] - G_hist[j]
* theta = argmin_t || f_k - dF t || (mk x mk normal equations)
* u_{k+1} = G(u_k) - dG theta accelerated update
* ────────────────────────────────────────────────────────────────────────── */
void AndersonAccelerator::Apply(Vector<IssmDouble>** puf,
Vector<IssmDouble>* old_uf){

if(depth == 0) return; /* disabled: leave *puf untouched */

Vector<IssmDouble>* G_k = *puf; /* Picard update G(u_k) from linear solve */

/* ── 1. Compute Picard residual f_k = G_k - u_k ───────────────────── */
Vector<IssmDouble>* f_k = G_k->Duplicate();
G_k->Copy(f_k);
f_k->AXPY(old_uf, -1.0); /* f_k = G_k - old_uf */

/* ── 2. Store G_k and f_k in history before any modification ─────────── */
Vector<IssmDouble>* G_store = G_k->Duplicate();
G_k->Copy(G_store);

f_hist.push_back(f_k);
G_hist.push_back(G_store);

/* trim history to at most depth+1 entries (depth+1 gives depth differences)*/
while((int)f_hist.size() > depth+1){
delete f_hist.front(); f_hist.erase(f_hist.begin());
delete G_hist.front(); G_hist.erase(G_hist.begin());
}

/* ── 3. Determine effective window size ──────────────────────────────── */
int mk = (int)f_hist.size() - 1; /* number of difference columns */

if(mk == 0){
/* first iteration or depth==1 first step: keep plain Picard update */
iter++;
return;
}

/* ── 4. Build difference vectors dF[j] and dG[j] ───────────────────── */
/* dF[j] = f_hist[j+1] - f_hist[j] (j = 0 ... mk-1)
* dG[j] = G_hist[j+1] - G_hist[j] */
std::vector<Vector<IssmDouble>*> dF(mk, NULL);
std::vector<Vector<IssmDouble>*> dG(mk, NULL);

for(int j=0; j<mk; j++){
dF[j] = f_hist[j+1]->Duplicate();
f_hist[j+1]->Copy(dF[j]);
dF[j]->AXPY(f_hist[j], -1.0); /* dF[j] = f_hist[j+1] - f_hist[j] */

dG[j] = G_hist[j+1]->Duplicate();
G_hist[j+1]->Copy(dG[j]);
dG[j]->AXPY(G_hist[j], -1.0); /* dG[j] = G_hist[j+1] - G_hist[j] */
}

/* ── 5. Form normal equations A theta = b where
* A[i][j] = dF[i] . dF[j]
* b[i] = dF[i] . f_k */
IssmDouble* A = new IssmDouble[mk*mk];
IssmDouble* b_rhs = new IssmDouble[mk];
IssmDouble* theta = new IssmDouble[mk]();

Vector<IssmDouble>* f_k_cur = f_hist.back(); /* = f_k we just pushed */

for(int i=0; i<mk; i++){
b_rhs[i] = dF[i]->Dot(f_k_cur);
for(int j=0; j<=i; j++){
A[i*mk+j] = A[j*mk+i] = dF[i]->Dot(dF[j]);
}
}

/* ── 6. Solve the mk x mk system ────────────────────────────────────── */
bool ok = SmallSolve(A, b_rhs, theta, mk);

/* ── 7. Anderson update: G_k -= sum_j theta[j] * dG[j] ─────────────
* If SmallSolve failed (singular) theta is zeroed so G_k is unchanged,
* effectively falling back to Picard for this iteration. */
if(ok){
for(int j=0; j<mk; j++){
G_k->AXPY(dG[j], -theta[j]); /* G_k -= theta[j] * dG[j] */
}
if(VerboseConvergence())
_printf0_(" Anderson(" << depth << "): mk=" << mk << " applied\n");
}
else{
if(VerboseConvergence())
_printf0_(" Anderson(" << depth << "): mk=" << mk
<< " singular, falling back to Picard\n");
}

/* ── 8. Clean up ─────────────────────────────────────────────────────── */
for(int j=0; j<mk; j++){ delete dF[j]; delete dG[j]; }
delete[] A;
delete[] b_rhs;
delete[] theta;

/* *puf still points to G_k which has been modified in-place */
iter++;
}
56 changes: 56 additions & 0 deletions src/c/solutionsequences/AndersonAccelerator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*!\file: AndersonAccelerator.h
* \brief: Anderson acceleration (depth-m mixing) for Picard fixed-point iterations.
*
* Theory: Walker & Ni (2011), SIAM J. Numer. Anal. 49(4), 1715-1735.
*
* Given the Picard map G : u -> u_new = K(u)^{-1} f(u),
* standard Picard sets u_{k+1} = G(u_k).
* Anderson(m) instead solves a small m x m least-squares problem each
* iteration to find the best linear combination of the last m Picard updates,
* replacing the plain update at negligible extra cost.
*
* Usage:
* AndersonAccelerator aa(m); // m=0 -> pure Picard (no-op)
* for(;;){
* ... linear solve -> uf ...
* aa.Apply(&uf, old_uf); // uf replaced with accelerated update
* ... UpdateInputFromSolution(uf) ...
* }
*/

#ifndef _ANDERSONACCELERATOR_H_
#define _ANDERSONACCELERATOR_H_

#include <vector>
#include "../toolkits/toolkits.h"
#include "../shared/shared.h"

class AndersonAccelerator {

public:

int depth; /* window size m; 0 = disabled (pure Picard) */
int iter; /* iteration counter (reset on construction) */

/* history of Picard residuals f_k = G(u_k) - u_k */
std::vector<Vector<IssmDouble>*> f_hist;
/* history of raw Picard updates G(u_k) */
std::vector<Vector<IssmDouble>*> G_hist;

AndersonAccelerator(int m);
~AndersonAccelerator();

/*! Replace *puf (= G(u_k) from linear solve) with the Anderson-accelerated
* update, given old_uf = u_k (solution vector from the previous iteration).
* On return *puf holds u_{k+1}. */
void Apply(Vector<IssmDouble>** puf, Vector<IssmDouble>* old_uf);

private:

/*! Solve the small mk x mk system A theta = b in-place via
* Gaussian elimination with partial pivoting. A and b are
* overwritten. Returns false if the system is singular. */
bool SmallSolve(IssmDouble* A, IssmDouble* b, IssmDouble* theta, int mk);
};

#endif /* _ANDERSONACCELERATOR_H_ */
Loading
Loading