From 51a5af4031365af07b3e26d4a502f72f926023db Mon Sep 17 00:00:00 2001
From: hm556 <hm556@exeter.ac.uk>
Date: Tue, 2 Sep 2025 13:05:51 +0100
Subject: [PATCH 01/28] Add ability to read velocity vector from mat.in

---
 src/heatflow/mod_constructions.f90 |  2 ++
 src/heatflow/mod_inputs.f90        | 23 +++++++++++++++++++++--
 src/heatflow/mod_material.f90      |  6 +++++-
 src/heatflow/mod_setup.f90         |  6 ++++--
 4 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/src/heatflow/mod_constructions.f90 b/src/heatflow/mod_constructions.f90
index b233f7c..384ce09 100644
--- a/src/heatflow/mod_constructions.f90
+++ b/src/heatflow/mod_constructions.f90
@@ -26,6 +26,7 @@ module constructions
      integer(int12) :: imaterial_type !what type of material it is in the mat.in
      real(real12) :: volume !volume of the block
      real(real12), dimension(3) :: Length !length of the block in 3 dimensions
+     real(real12), dimension(3) :: vel !center of the block in 3 dimensions
      integer(int12) :: iheater !whether the block is a heater
       real(real12) :: kappa, rho, heat_capacity, tau, em !physical properties of the material
   end type heatblock
@@ -39,6 +40,7 @@ module constructions
      real(real12) :: rho !density of the material
      real(real12) :: sound_speed   !speed of sound in the material
      real(real12) :: tau !relaxation time
+     real(real12), dimension(3) :: vel !velocity of the material
      real(real12) :: em !emisstivity
      logical :: source !whether the material is a source of heat ??
   end type material
diff --git a/src/heatflow/mod_inputs.f90 b/src/heatflow/mod_inputs.f90
index 4d29245..e8b3025 100644
--- a/src/heatflow/mod_inputs.f90
+++ b/src/heatflow/mod_inputs.f90
@@ -657,7 +657,7 @@ subroutine read_mat(unit)
     type(material), dimension(100) :: dum_mat
     character(1024) :: buffer
     integer :: reason, j
-    integer, dimension(8) :: readvarmat
+    integer, dimension(9) :: readvarmat
     integer :: i, index
 
     i=0
@@ -703,7 +703,8 @@ subroutine read_mat(unit)
        CALL assignD(buffer,"rho"          ,dum_mat(i)%rho          ,readvarmat(5))! assign rho
        CALL assignD(buffer,"sound_speed"  ,dum_mat(i)%sound_speed  ,readvarmat(6))! assign sound_speed
        CALL assignD(buffer,"tau"          ,dum_mat(i)%tau          ,readvarmat(7))! assign tau
-       CALL assignD(buffer,"em"            ,dum_mat(i)%em            ,readvarmat(8))! assign e
+       CALL assignD(buffer,"em"           ,dum_mat(i)%em           ,readvarmat(8))! assign e
+       CALL assignV(buffer,"vel"          ,dum_mat(i)%vel          ,readvarmat(9)) ! assign velocity
     end do read
     
     ! Check for duplicate indices
@@ -827,5 +828,23 @@ function val(buffer)
   end function val
 !!!#################################################################################################
 
+!!!#################################################################################################
+!!! assign velocity
+!!!#################################################################################################
+  subroutine assignV(buffer, keyword, variable, found)
+    implicit none
+    integer::found
+    character(1024)::buffer1,buffer2
+    character(*)::buffer,keyword
+    real(real12), dimension(3)::variable
+    buffer1=buffer(:scan(buffer,"=")-1)
+    if(scan("=",buffer).ne.0) buffer2=val(buffer)
+    if(trim(adjustl(buffer1)).eq.trim(adjustl(keyword))&
+         .and.trim(adjustl(buffer2)).ne.'') then
+       found=found+1
+       read(buffer2,*) variable(1), variable(2), variable(3)
+    end if
+  end subroutine assignV
+
 end module inputs
 
diff --git a/src/heatflow/mod_material.f90 b/src/heatflow/mod_material.f90
index 6ca7192..9dfcc8c 100644
--- a/src/heatflow/mod_material.f90
+++ b/src/heatflow/mod_material.f90
@@ -51,12 +51,13 @@ module materials
 !!!  - em, the emissivity of the material.
 !!!#########################################################################
 
-subroutine material(imaterial_type,kappa,kappa3D,h_conv,heat_capacity,rho,sound_speed,tau, em)
+subroutine material(imaterial_type,kappa,kappa3D,h_conv,heat_capacity,rho,sound_speed,tau, em, vel)
   
    integer(int12), intent(in) ::imaterial_type
    integer(int12) :: i, tmp
    real(real12), intent(inout) :: kappa3D, kappa, h_conv, heat_capacity, sound_speed, rho, tau
    real(real12), intent(inout) :: em
+   real(real12), dimension(3), intent(inout) :: vel
    logical :: found
 
 !!!=============================================
@@ -96,6 +97,9 @@ subroutine material(imaterial_type,kappa,kappa3D,h_conv,heat_capacity,rho,sound_
             rho           = input_materials(i)%rho
             sound_speed   = input_materials(i)%sound_speed
             tau           = input_materials(i)%tau
+            vel(1)        = input_materials(i)%vel(1)
+            vel(2)        = input_materials(i)%vel(2)
+            vel(3)        = input_materials(i)%vel(3)
             em   = input_materials(i)%em
 
             exit mat_loop
diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90
index 797fa35..b03906f 100644
--- a/src/heatflow/mod_setup.f90
+++ b/src/heatflow/mod_setup.f90
@@ -34,7 +34,7 @@ module setup
    subroutine set_global_variables()
       integer(int12) :: ix,iy,iz,index
       real(real12) :: kappa,kappa3D,h_conv,heat_capacity,rho,sound_speed,tau, em
-
+      real(real12), dimension(3) :: vel
       allocate(Temp_cur(nx, ny, nz))
       allocate(Temp_p(NA))
       allocate(Temp_pp(NA))
@@ -55,13 +55,15 @@ subroutine set_global_variables()
             do ix = 1, nx
             index = index + 1
             CALL material(grid(ix,iy,iz)%imaterial_type,&
-                 kappa,kappa3D,h_conv,heat_capacity,rho,sound_speed,tau, em)
+                 kappa,kappa3D,h_conv,heat_capacity,rho,sound_speed,tau, em, vel)
             grid(ix,iy,iz)%kappa = kappa
             grid(ix,iy,iz)%rho = rho
             grid(ix,iy,iz)%heat_capacity = heat_capacity
             grid(ix,iy,iz)%tau = tau*inverse_time*inverse_time
             grid(ix,iy,iz)%em = em
+            grid(ix,iy,iz)%vel(:) = vel(:)
             lin_rhoc(index) = rho*heat_capacity
+            print*, grid(ix,iy,iz)%vel(:)
             if (Check_Stability) CALL stability(kappa, rho, heat_capacity, ix, iy, iz)
             end do               
          end do

From 3bfe37a626be810799a55ce2f666a90615d11dd4 Mon Sep 17 00:00:00 2001
From: hm556 <hm556@exeter.ac.uk>
Date: Tue, 2 Sep 2025 14:25:49 +0100
Subject: [PATCH 02/28] Add convection to hmatrix and boundary vector

---
 src/heatflow/mod_boundary.f90 |  56 ++++++++++++++--
 src/heatflow/mod_hmatrix.f90  | 121 +++++++++++++++++++++++++++++++++-
 src/heatflow/mod_setup.f90    |   1 -
 3 files changed, 170 insertions(+), 8 deletions(-)

diff --git a/src/heatflow/mod_boundary.f90 b/src/heatflow/mod_boundary.f90
index d94a0bf..88ad7ba 100644
--- a/src/heatflow/mod_boundary.f90
+++ b/src/heatflow/mod_boundary.f90
@@ -84,7 +84,7 @@ subroutine boundary(B)
                             kappaHarm = (2*kappa*kappaBoundx1/(kappa+kappaBoundx1)) / &
                             (grid(ix, iy, iz)%Length(1)**2)
                             if (kappa .ne. kappaBoundx1) kappaHarm = kappaHarm*BR
-                            B(I) = B(I) + (kappaHarm) * T_Bathx1
+                            B(I) = B(I) + (kappaHarm) * T_Bathx1 !+ boundray_term_vel(1_int12,iy,iz,T_Bathx1)
                         end if
                     end if
                     if (ix .eq. nx) then
@@ -94,7 +94,7 @@ subroutine boundary(B)
                             kappaHarm = (2*kappa*kappaBoundNx/(kappa+kappaBoundNx)) / &
                             (grid(ix, iy, iz)%Length(1)**2)
                             if (kappa .ne. kappaBoundNx) kappaHarm = kappaHarm*BR
-                            B(I) = B(I) + (kappaHarm) * T_Bathx2
+                            B(I) = B(I) + (kappaHarm) * T_Bathx2 !+ boundray_term_vel(nx,iy,iz,T_Bathx2)
                         end if
                     end if
                 end if
@@ -107,7 +107,7 @@ subroutine boundary(B)
                             kappaHarm = (2*kappa*kappaBoundy1/(kappa+kappaBoundy1)) / &
                             (grid(ix, iy, iz)%Length(2)**2)
                             if (kappa .ne. kappaBoundy1) kappaHarm = kappaHarm*BR
-                            B(I) = B(I) + (kappaHarm) * T_Bathy1
+                            B(I) = B(I) + (kappaHarm) * T_Bathy1 !+ boundray_term_vel(ix,1_int12,iz,T_Bathy1)
                         end if
                     end if
                     if (iy .eq. ny) then
@@ -117,7 +117,7 @@ subroutine boundary(B)
                             kappaHarm = (2*kappa*kappaBoundNy/(kappa+kappaBoundNy)) / &
                             (grid(ix, iy, iz)%Length(2)**2)
                             if (kappa .ne. kappaBoundNy) kappaHarm = kappaHarm*BR
-                            B(I) = B(I) + (kappaHarm) * T_Bathy2
+                            B(I) = B(I) + (kappaHarm) * T_Bathy2 !+ boundray_term_vel(ix,ny,iz,T_Bathy2)
                         end if
                     end if
                 end if
@@ -130,7 +130,7 @@ subroutine boundary(B)
                             kappaHarm = (2*kappa*kappaBoundz1/(kappa+kappaBoundz1)) / &
                             (grid(ix, iy, iz)%Length(3)**2)
                             if (kappa .ne. kappaBoundz1) kappaHarm = kappaHarm*BR
-                            B(I) = B(I) + (kappaHarm) * T_Bathz1
+                            B(I) = B(I) + (kappaHarm) * T_Bathz1 !+ boundray_term_vel(ix,iy,1_int12,T_Bathz1)
                         end if
                     end if
                     if (iz .eq. nz) then
@@ -140,7 +140,7 @@ subroutine boundary(B)
                             kappaHarm = (2*kappa*kappaBoundNz/(kappa+kappaBoundNz)) / &
                             (grid(ix, iy, iz)%Length(3)**2)
                             if (kappa .ne. kappaBoundNz) kappaHarm = kappaHarm*BR
-                            B(I) = B(I) + (kappaHarm) * T_Bathz2
+                            B(I) = B(I) + (kappaHarm) * T_Bathz2 !+ boundray_term_vel(ix,iy,nz,T_Bathz2)
                         end if
                     end if
                 end if
@@ -180,5 +180,49 @@ function constantboundarytempgrad(I) result(temp)
     ! so we need to add T_BathCG to the temperature at the boundary 
   end function constantboundarytempgrad
   !!!###############################################################################################
+
+     !!!########################################################################
+   !!! This subroutine calculates the value of the convective term of the H matrix...
+   !!! ...at the boundary.
+   !!!########################################################################
+  function boundray_term_vel(x_b, y_b, z_b, TB) result(vel_conv)
+    integer(int12), intent(in) :: x_b, y_b, z_b
+    real(real12) :: vel_conv
+    real(real12) :: rho, CV, TB
+    real(real12), dimension(3) :: vel_in, vel_out 
+
+    !------------------------------------------------------------
+    ! The boundary term is calculated of the boundary grid point.
+    !------------------------------------------------------------
+
+    
+    rho = grid(x_b,y_b,z_b)%rho
+    CV = grid(x_b,y_b,z_b)%heat_capacity
+    vel_in = grid(x_b,y_b,z_b)%vel
+    vel_out = grid(x_b,y_b,z_b)%vel
+
+    if (x_b .eq. 1_int12) then
+        vel_conv = vel_in(1)*rho*CV*(1.0_real12/(2.0_real12*grid(x_b,y_b,z_b)%Length(1)))
+        vel_conv = -vel_conv
+    else if (x_b .eq. nx) then
+        vel_conv = vel_in(1)*rho*CV*(1.0_real12/(2.0_real12*grid(x_b,y_b,z_b)%Length(1)))
+    
+    else if (y_b .eq. 1_int12) then
+        vel_conv = vel_in(2)*rho*CV*(1.0_real12/(2.0_real12*grid(x_b,y_b,z_b)%Length(2)))
+        vel_conv = -vel_conv
+    else if (y_b .eq. ny) then
+        vel_conv = vel_in(2)*rho*CV*(1.0_real12/(2.0_real12*grid(x_b,y_b,z_b)%Length(2)))
+    else if (z_b .eq. 1_int12) then
+        vel_conv = vel_in(3)*rho*CV*(1.0_real12/(2.0_real12*grid(x_b,y_b,z_b)%Length(3)))
+        vel_conv = -vel_conv
+    else if (z_b .eq. nz) then
+        vel_conv = vel_in(3)*rho*CV*(1.0_real12/(2.0_real12*grid(x_b,y_b,z_b)%Length(3)))
+    else
+        vel_conv = 0.0_real12
+    end if
+    vel_conv = vel_conv*TB
+    !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    end function boundray_term_vel
+    !!!########################################################################
   
 end module boundary_vector
diff --git a/src/heatflow/mod_hmatrix.f90 b/src/heatflow/mod_hmatrix.f90
index 9e0b219..e1d268d 100644
--- a/src/heatflow/mod_hmatrix.f90
+++ b/src/heatflow/mod_hmatrix.f90
@@ -148,7 +148,8 @@ function hmatrixfunc(i, j) result(H)
       if (x .eq. 1) then
         H=0.0_real12
       else
-         H = A  ! X left neighbor (left cell interaction)
+         H = A ! X left neighbor (left cell interaction)
+         H = H + calculate_convective_conductivity(xm, y, z, x, y, z)
       end if
     end if 
 
@@ -157,6 +158,7 @@ function hmatrixfunc(i, j) result(H)
         H=0.0_real12
       else
         H = B  ! X right neighbor (right cell interaction)
+        H = H + calculate_convective_conductivity(xp, y, z, x, y, z)
       end if
     end if
 
@@ -165,6 +167,7 @@ function hmatrixfunc(i, j) result(H)
         H=0.0_real12
       else
         H = D  ! Y down neighbor (down cell interaction)
+        H = H + calculate_convective_conductivity(x, ym, z, x, y, z)
       end if 
     end if 
     if ((i-j) .eq. -nx) then
@@ -172,6 +175,7 @@ function hmatrixfunc(i, j) result(H)
         H=0.0_real12
       else
         H = E  ! Y up neighbor (up cell interaction)
+        H = H + calculate_convective_conductivity(x, yp, z, x, y, z)
       end if 
     end if 
 
@@ -181,6 +185,7 @@ function hmatrixfunc(i, j) result(H)
      else
         !write(*,*) 'F   this is forward (in) z',F
          H = F  ! Z in neighbor (forward cell interaction) !!!Frank had this as G during testing
+         H = H + calculate_convective_conductivity(x, y, zm, x, y, z)
       end if
     end if 
 
@@ -190,6 +195,7 @@ function hmatrixfunc(i, j) result(H)
      else  
         !write(*,*) 'G   this is backward (out) z?',G
         H = G  ! Z out neighbor (backward cell interaction) !!!Frank Had this as F during testing
+        H = H + calculate_convective_conductivity(x, y, zp, x, y, z)
       end if
    end if
    
@@ -391,5 +397,118 @@ subroutine boundry_diag_term(x_b, y_b, z_b, x, y, z, kappa_ab)
   end subroutine boundry_diag_term
   !!!########################################################################
   
+  !!!########################################################################
+  !!! subroutine to calculate the convective conductivity between two points
+  !!!########################################################################
+   function calculate_convective_conductivity(x_in, y_in, z_in, x_out, y_out, z_out) result(vel_conv)
+      integer(int12), intent(in) :: x_in, y_in, z_in, x_out, y_out, z_out
+      real(real12) :: vel_conv
+      real(real12) :: rho, CV
+      real(real12), dimension(3) :: vel_in, vel_out 
+   
+      vel_conv = 0.0_real12
+      
+      ! if not an edge element
+      if ((x_in .ge. 1) .and. (x_in .le. nx) .and. (y_in .ge. 1) .and. &
+          (y_in .le. ny) .and. (z_in .ge. 1) .and. (z_in .le. nz)) then
+
+         rho = grid(x_out,y_out,z_out)%rho
+         CV = grid(x_out,y_out,z_out)%heat_capacity
+         vel_in = grid(x_in,y_in,z_in)%vel
+         vel_out = grid(x_out,y_out,z_out)%vel
+   
+         if (x_in .ne. x_out) then
+            if (vel_in(1) .ne. vel_out(1)) then
+               vel_conv = 0.0_real12
+            else
+               vel_conv = vel_in(1)*rho*CV*(1.0_real12/(2.0_real12*grid(x_out,y_out,z_out)%Length(1)))
+               if (x_in .lt. x_out) then
+                  vel_conv = -vel_conv
+               end if
+            end if
+   
+         else if (y_in .ne. y_out) then
+            if (vel_in(2) .ne. vel_out(2)) then
+               vel_conv = 0.0_real12
+            else
+               vel_conv = vel_in(2)*rho*CV*(1.0_real12/(2.0_real12*grid(x_out,y_out,z_out)%Length(2)))
+               if (y_in .lt. y_out) then
+                  vel_conv = -vel_conv
+               end if
+            end if
+   
+         else if (z_in .ne. z_out) then
+            if (vel_in(3) .ne. vel_out(3)) then
+               vel_conv = 0.0_real12
+            else
+               vel_conv = vel_in(3)*rho*CV*(1.0_real12/(2.0_real12*grid(x_out,y_out,z_out)%Length(3)))
+               if (z_in .lt. z_out) then
+                  vel_conv = -vel_conv
+               end if
+            end if
+         end if
+   
+      else
+         vel_conv = 0.0_real12
+      end if
+      vel_conv = vel_conv
+   end function calculate_convective_conductivity
+
+   !!!########################################################################
+
+   !!!########################################################################
+   !!! This subroutine calculates the value of the convective term of the H matrix...
+   !!! ...at the boundary.
+   !!!########################################################################
+   subroutine boundry_diag_term_vel(x_b, y_b, z_b, x, y, z, vel_conv)
+      integer(int12), intent(in) :: x_b, y_b, z_b, x, y, z
+      real(real12), intent(out) :: vel_conv
+      real(real12) :: rho, CV
+      real(real12), dimension(3) :: vel_in, vel_out 
+  
+      !------------------------------------------------------------
+      ! The boundary term is calculated of the boundary grid point.
+      !------------------------------------------------------------
+  
+      
+      rho = grid(x,y,z)%rho
+      CV = grid(x,y,z)%heat_capacity
+      vel_in = grid(x_b,y_b,z_b)%vel
+      vel_out = grid(x,y,z)%vel
+
+      if (x_b .eq. 1) then
+         if (vel_in(1) .ne. vel_out(1)) then
+            vel_conv = 0.0_real12
+         else
+            vel_conv = vel_in(1)*rho*CV*(1.0_real12/(2.0_real12*grid(x,y,z)%Length(1)))
+            if (x_b .lt. x) then
+               vel_conv = -vel_conv
+            end if
+         end if
+
+      else if (y_b .ne. y) then
+         if (vel_in(2) .ne. vel_out(2)) then
+            vel_conv = 0.0_real12
+         else
+            vel_conv = vel_in(2)*rho*CV*(1.0_real12/(2.0_real12*grid(x,y,z)%Length(2)))
+            if (y_b .lt. y) then
+               vel_conv = -vel_conv
+            end if
+         end if
+
+      else if (z_b .ne. z) then
+         if (vel_in(3) .ne. vel_out(3)) then
+            vel_conv = 0.0_real12
+         else
+            vel_conv = vel_in(3)*rho*CV*(1.0_real12/(2.0_real12*grid(x,y,z)%Length(3)))
+            if (z_b .lt. z) then
+               vel_conv = -vel_conv
+            end if
+         end if
+      end if
+      !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    end subroutine boundry_diag_term_vel
+      !!!########################################################################
+
 
 end module hmatrixmod
diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90
index b03906f..accbaaf 100644
--- a/src/heatflow/mod_setup.f90
+++ b/src/heatflow/mod_setup.f90
@@ -63,7 +63,6 @@ subroutine set_global_variables()
             grid(ix,iy,iz)%em = em
             grid(ix,iy,iz)%vel(:) = vel(:)
             lin_rhoc(index) = rho*heat_capacity
-            print*, grid(ix,iy,iz)%vel(:)
             if (Check_Stability) CALL stability(kappa, rho, heat_capacity, ix, iy, iz)
             end do               
          end do

From 09face91e46ae9bdd5baf84d1ec684a501bee505 Mon Sep 17 00:00:00 2001
From: hm556 <hm556@exeter.ac.uk>
Date: Tue, 2 Sep 2025 14:44:16 +0100
Subject: [PATCH 03/28] Make fix bug, needs knew solver to work, current
 implementation of solver only works for symmetric Hmatrix

---
 src/heatflow/mod_boundary.f90 | 12 ++++++------
 src/heatflow/mod_setup.f90    |  1 +
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/heatflow/mod_boundary.f90 b/src/heatflow/mod_boundary.f90
index 88ad7ba..cb8ab30 100644
--- a/src/heatflow/mod_boundary.f90
+++ b/src/heatflow/mod_boundary.f90
@@ -84,7 +84,7 @@ subroutine boundary(B)
                             kappaHarm = (2*kappa*kappaBoundx1/(kappa+kappaBoundx1)) / &
                             (grid(ix, iy, iz)%Length(1)**2)
                             if (kappa .ne. kappaBoundx1) kappaHarm = kappaHarm*BR
-                            B(I) = B(I) + (kappaHarm) * T_Bathx1 !+ boundray_term_vel(1_int12,iy,iz,T_Bathx1)
+                            B(I) = B(I) + (kappaHarm) * T_Bathx1 + boundray_term_vel(1_int12,iy,iz,T_Bathx1)
                         end if
                     end if
                     if (ix .eq. nx) then
@@ -94,7 +94,7 @@ subroutine boundary(B)
                             kappaHarm = (2*kappa*kappaBoundNx/(kappa+kappaBoundNx)) / &
                             (grid(ix, iy, iz)%Length(1)**2)
                             if (kappa .ne. kappaBoundNx) kappaHarm = kappaHarm*BR
-                            B(I) = B(I) + (kappaHarm) * T_Bathx2 !+ boundray_term_vel(nx,iy,iz,T_Bathx2)
+                            B(I) = B(I) + (kappaHarm) * T_Bathx2 + boundray_term_vel(nx,iy,iz,T_Bathx2)
                         end if
                     end if
                 end if
@@ -107,7 +107,7 @@ subroutine boundary(B)
                             kappaHarm = (2*kappa*kappaBoundy1/(kappa+kappaBoundy1)) / &
                             (grid(ix, iy, iz)%Length(2)**2)
                             if (kappa .ne. kappaBoundy1) kappaHarm = kappaHarm*BR
-                            B(I) = B(I) + (kappaHarm) * T_Bathy1 !+ boundray_term_vel(ix,1_int12,iz,T_Bathy1)
+                            B(I) = B(I) + (kappaHarm) * T_Bathy1 + boundray_term_vel(ix,1_int12,iz,T_Bathy1)
                         end if
                     end if
                     if (iy .eq. ny) then
@@ -117,7 +117,7 @@ subroutine boundary(B)
                             kappaHarm = (2*kappa*kappaBoundNy/(kappa+kappaBoundNy)) / &
                             (grid(ix, iy, iz)%Length(2)**2)
                             if (kappa .ne. kappaBoundNy) kappaHarm = kappaHarm*BR
-                            B(I) = B(I) + (kappaHarm) * T_Bathy2 !+ boundray_term_vel(ix,ny,iz,T_Bathy2)
+                            B(I) = B(I) + (kappaHarm) * T_Bathy2 + boundray_term_vel(ix,ny,iz,T_Bathy2)
                         end if
                     end if
                 end if
@@ -130,7 +130,7 @@ subroutine boundary(B)
                             kappaHarm = (2*kappa*kappaBoundz1/(kappa+kappaBoundz1)) / &
                             (grid(ix, iy, iz)%Length(3)**2)
                             if (kappa .ne. kappaBoundz1) kappaHarm = kappaHarm*BR
-                            B(I) = B(I) + (kappaHarm) * T_Bathz1 !+ boundray_term_vel(ix,iy,1_int12,T_Bathz1)
+                            B(I) = B(I) + (kappaHarm) * T_Bathz1 + boundray_term_vel(ix,iy,1_int12,T_Bathz1)
                         end if
                     end if
                     if (iz .eq. nz) then
@@ -140,7 +140,7 @@ subroutine boundary(B)
                             kappaHarm = (2*kappa*kappaBoundNz/(kappa+kappaBoundNz)) / &
                             (grid(ix, iy, iz)%Length(3)**2)
                             if (kappa .ne. kappaBoundNz) kappaHarm = kappaHarm*BR
-                            B(I) = B(I) + (kappaHarm) * T_Bathz2 !+ boundray_term_vel(ix,iy,nz,T_Bathz2)
+                            B(I) = B(I) + (kappaHarm) * T_Bathz2 + boundray_term_vel(ix,iy,nz,T_Bathz2)
                         end if
                     end if
                 end if
diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90
index accbaaf..11dc0bf 100644
--- a/src/heatflow/mod_setup.f90
+++ b/src/heatflow/mod_setup.f90
@@ -141,6 +141,7 @@ subroutine sparse_Hmatrix()
              ra%irow(count) = i ! The row of the H matrix
              ra%jcol(count) = j ! The column of the H matrix
              count = count + 1 ! The number of non-zero elements in the H matrix
+             H0=hmatrixfunc(j,i) ! The value of the H matrix
              ra%val(count) = H0 ! The value of the H matrix
              ra%irow(count) = j ! The row of the H matrix
              ra%jcol(count) = i ! The column of the H matrix

From 247f862b80448d0775ba0a33cbab42928d1b44ee Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Tue, 2 Sep 2025 22:38:08 +0100
Subject: [PATCH 04/28] Add non symmatrix sparse matrix solver from the intel
 library mkl

---
 Makefile                           |  80 ++---
 src/heatflow/mkl_pardiso.f90       | 452 +++++++++++++++++++++++++++++
 src/heatflow/mod_constructions.f90 |   4 +-
 src/heatflow/mod_evolve.f90        |  21 +-
 src/heatflow/mod_global.f90        |   5 +-
 src/heatflow/mod_setup.f90         |   5 +
 src/heatflow/mod_sparse_solver.f90 | 334 +++++++++++++++++++++
 src/heatflow/mod_tempdep.f90       |  58 ++--
 8 files changed, 885 insertions(+), 74 deletions(-)
 create mode 100644 src/heatflow/mkl_pardiso.f90
 create mode 100644 src/heatflow/mod_sparse_solver.f90

diff --git a/Makefile b/Makefile
index 57e89be..af38c3a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,15 +1,8 @@
 ####################################################################
 #   11 Jun 2024                                                    #
 ####################################################################
-#
 SHELL = /bin/sh
-#
-#  The machine (platform) identifier to append to the library names
-#
 PLAT = _linux
-#
-#
-
 
 ##########################################
 # CODE DIRECTORIES AND FILES
@@ -20,38 +13,46 @@ BIN_DIR := ./bin
 SRC_DIR := ./src
 BUILD_DIR = ./obj
 
-SRCS := /heatflow/mod_constants.f90 \
-		/heatflow/mod_constructions.f90 \
-		/heatflow/mod_SPtype.f90 \
-		/heatflow/mod_global.f90 \
-		/heatflow/mod_Sparse.f90 \
-		/heatflow/mod_inputs.f90 \
-		/heatflow/mod_material.f90 \
-		/heatflow/mod_hmatrix.f90 \
-		/heatflow/mod_init_evolve.f90 \
-		/heatflow/mod_setup.f90 \
-		/heatflow/mod_boundary.f90 \
-		/heatflow/mod_heating.f90 \
-		/heatflow/mod_cattaneo.f90 \
-		/heatflow/mod_tempdep.f90 \
-		/heatflow/mod_evolve.f90 \
-        /heatflow/mod_output.f90 \
+SRCS := heatflow/mod_constants.f90 \
+		heatflow/mod_constructions.f90 \
+		heatflow/mod_SPtype.f90 \
+		heatflow/mod_global.f90 \
+		heatflow/mod_Sparse.f90 \
+		heatflow/mod_inputs.f90 \
+		heatflow/mod_material.f90 \
+		heatflow/mod_hmatrix.f90 \
+		heatflow/mod_init_evolve.f90 \
+		heatflow/mkl_pardiso.f90 \
+		heatflow/mod_sparse_solver.f90 \
+		heatflow/mod_setup.f90 \
+		heatflow/mod_boundary.f90 \
+		heatflow/mod_heating.f90 \
+		heatflow/mod_cattaneo.f90 \
+		heatflow/mod_tempdep.f90 \
+		heatflow/mod_evolve.f90 \
+        heatflow/mod_output.f90 \
         heatflow.f90
-OBJS := $(addprefix $(SRC_DIR)/,$(SRCS))
 
+OBJS := $(addprefix $(BUILD_DIR)/,$(notdir $(SRCS:.f90=.o)))
+
+# MKL configuration
+MKLROOT ?= /opt/intel/oneapi/mkl/latest
+MKL_LIB_DIR = $(MKLROOT)/lib/intel64
+MKL_INCLUDE_DIR = $(MKLROOT)/include
+MKL_FLAGS = -L$(MKL_LIB_DIR) -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl
 
-FFLAGS = -O3 
-MODULEFLAGS = -J
+FFLAGS = -O3 -I$(MKL_INCLUDE_DIR)
+MODULEFLAGS = -J$(BUILD_DIR)
 FC = gfortran
 
 ##########################################
-# LIBRARY SECTION
+# TARGETS
 ##########################################
-MKLROOT?="/usr/local/intel/parallel_studio_xe_2017/compilers_and_libraries_2017/linux/mkl/lib/intel64_lin"
-
-
 NAME = ThermalFlow.x
 programs = $(BIN_DIR)/$(NAME)
+
+.PHONY: all debug clean OMP
+
 all: $(programs)
 
 $(BIN_DIR):
@@ -60,11 +61,22 @@ $(BIN_DIR):
 $(BUILD_DIR):
 	mkdir -p $@
 
-$(programs) : $(OBJS) | $(BIN_DIR) $(BUILD_DIR)
-	$(FC) -O3 -fopenmp $(MODULEFLAGS) $(BUILD_DIR) $(OBJS) -o $@
+# Pattern rule for compiling Fortran files
+$(BUILD_DIR)/%.o: $(SRC_DIR)/heatflow/%.f90 | $(BUILD_DIR)
+	$(FC) $(FFLAGS) $(MODULEFLAGS) -c $< -o $@
+
+$(BUILD_DIR)/%.o: $(SRC_DIR)/%.f90 | $(BUILD_DIR)
+	$(FC) $(FFLAGS) $(MODULEFLAGS) -c $< -o $@
 
-debug :  $(OBJS)
-	$(FC) -O0 -Wall -g -ffpe-trap=invalid,zero,overflow,underflow -fbacktrace -fcheck=all -fbounds-check  $(MODULEFLAGS) $(BUILD_DIR) $(OBJS) -o $(programs)
+$(programs): $(OBJS) | $(BIN_DIR)
+	$(FC) -O3 -fopenmp $(OBJS) -o $@ $(MKL_FLAGS)
+
+debug: FFLAGS = -O0 -Wall -g -ffpe-trap=invalid,zero,overflow,underflow -fbacktrace -fcheck=all -fbounds-check -I$(MKL_INCLUDE_DIR)
+debug: $(OBJS) | $(BIN_DIR)
+	$(FC) $(FFLAGS) $(OBJS) -o $(programs) $(MKL_FLAGS)
 
 OMP: $(programs)
 	./util/DShell/omp_exec.sh
+
+clean:
+	rm -f $(BUILD_DIR)/*.o $(BUILD_DIR)/*.mod $(programs)
\ No newline at end of file
diff --git a/src/heatflow/mkl_pardiso.f90 b/src/heatflow/mkl_pardiso.f90
new file mode 100644
index 0000000..0ff7527
--- /dev/null
+++ b/src/heatflow/mkl_pardiso.f90
@@ -0,0 +1,452 @@
+!*******************************************************************************
+!   Copyright(C) 2004-2013 Intel Corporation. All Rights Reserved.
+!
+!   The source code, information  and  material ("Material") contained herein is
+!   owned  by Intel Corporation or its suppliers or licensors, and title to such
+!   Material remains  with Intel Corporation  or its suppliers or licensors. The
+!   Material  contains proprietary information  of  Intel or  its  suppliers and
+!   licensors. The  Material is protected by worldwide copyright laws and treaty
+!   provisions. No  part  of  the  Material  may  be  used,  copied, reproduced,
+!   modified, published, uploaded, posted, transmitted, distributed or disclosed
+!   in any way  without Intel's  prior  express written  permission. No  license
+!   under  any patent, copyright  or  other intellectual property rights  in the
+!   Material  is  granted  to  or  conferred  upon  you,  either  expressly,  by
+!   implication, inducement,  estoppel or  otherwise.  Any  license  under  such
+!   intellectual  property  rights must  be express  and  approved  by  Intel in
+!   writing.
+!
+!   *Third Party trademarks are the property of their respective owners.
+!
+!   Unless otherwise  agreed  by Intel  in writing, you may not remove  or alter
+!   this  notice or  any other notice embedded  in Materials by Intel or Intel's
+!   suppliers or licensors in any way.
+!
+!*******************************************************************************
+!   Content : MKL PARDISO Fortran-90 header file
+!
+!           Contains PARDISO routine definition.
+!           For CDECL use only.
+!
+!*******************************************************************************
+!DEC$ IF .NOT. DEFINED( __MKL_PARDISO_F90 )
+
+!DEC$ DEFINE __MKL_PARDISO_F90
+
+      MODULE MKL_PARDISO_PRIVATE
+
+        TYPE MKL_PARDISO_HANDLE; INTEGER(KIND=8) DUMMY; END TYPE
+
+        INTEGER, PARAMETER :: PARDISO_OOC_FILE_NAME = 1
+
+      END MODULE MKL_PARDISO_PRIVATE
+
+      MODULE MKL_PARDISO
+        USE MKL_PARDISO_PRIVATE
+
+!
+! Subroutine prototype for PARDISO
+!
+
+      INTERFACE PARDISO
+        SUBROUTINE PARDISO_S( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          INTEGER,          INTENT(IN)    :: MAXFCT
+          INTEGER,          INTENT(IN)    :: MNUM
+          INTEGER,          INTENT(IN)    :: MTYPE
+          INTEGER,          INTENT(IN)    :: PHASE
+          INTEGER,          INTENT(IN)    :: N
+          INTEGER,          INTENT(IN)    :: IA(*)
+          INTEGER,          INTENT(IN)    :: JA(*)
+          INTEGER,          INTENT(INOUT) :: PERM(*)
+          INTEGER,          INTENT(IN)    :: NRHS
+          INTEGER,          INTENT(INOUT) :: IPARM(*)
+          INTEGER,          INTENT(IN)    :: MSGLVL
+          INTEGER,          INTENT(OUT)   :: ERROR
+
+          REAL(KIND=4),     INTENT(IN)    :: A(*)
+          REAL(KIND=4),     INTENT(INOUT) :: B(*)
+          REAL(KIND=4),     INTENT(OUT)   :: X(*)
+        END SUBROUTINE PARDISO_S
+
+        SUBROUTINE PARDISO_D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          INTEGER,          INTENT(IN)    :: MAXFCT
+          INTEGER,          INTENT(IN)    :: MNUM
+          INTEGER,          INTENT(IN)    :: MTYPE
+          INTEGER,          INTENT(IN)    :: PHASE
+          INTEGER,          INTENT(IN)    :: N
+          INTEGER,          INTENT(IN)    :: IA(*)
+          INTEGER,          INTENT(IN)    :: JA(*)
+          INTEGER,          INTENT(INOUT) :: PERM(*)
+          INTEGER,          INTENT(IN)    :: NRHS
+          INTEGER,          INTENT(INOUT) :: IPARM(*)
+          INTEGER,          INTENT(IN)    :: MSGLVL
+          INTEGER,          INTENT(OUT)   :: ERROR
+
+          REAL(KIND=8),     INTENT(IN)    :: A(*)
+          REAL(KIND=8),     INTENT(INOUT) :: B(*)
+          REAL(KIND=8),     INTENT(OUT)   :: X(*)
+        END SUBROUTINE PARDISO_D
+
+        SUBROUTINE PARDISO_SC( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          INTEGER,          INTENT(IN)    :: MAXFCT
+          INTEGER,          INTENT(IN)    :: MNUM
+          INTEGER,          INTENT(IN)    :: MTYPE
+          INTEGER,          INTENT(IN)    :: PHASE
+          INTEGER,          INTENT(IN)    :: N
+          INTEGER,          INTENT(IN)    :: IA(*)
+          INTEGER,          INTENT(IN)    :: JA(*)
+          INTEGER,          INTENT(INOUT) :: PERM(*)
+          INTEGER,          INTENT(IN)    :: NRHS
+          INTEGER,          INTENT(INOUT) :: IPARM(*)
+          INTEGER,          INTENT(IN)    :: MSGLVL
+          INTEGER,          INTENT(OUT)   :: ERROR
+
+          COMPLEX(KIND=4),  INTENT(IN)    :: A(*)
+          COMPLEX(KIND=4),  INTENT(INOUT) :: B(*)
+          COMPLEX(KIND=4),  INTENT(OUT)   :: X(*)
+        END SUBROUTINE PARDISO_SC
+
+        SUBROUTINE PARDISO_DC( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          INTEGER,          INTENT(IN)    :: MAXFCT
+          INTEGER,          INTENT(IN)    :: MNUM
+          INTEGER,          INTENT(IN)    :: MTYPE
+          INTEGER,          INTENT(IN)    :: PHASE
+          INTEGER,          INTENT(IN)    :: N
+          INTEGER,          INTENT(IN)    :: IA(*)
+          INTEGER,          INTENT(IN)    :: JA(*)
+          INTEGER,          INTENT(INOUT) :: PERM(*)
+          INTEGER,          INTENT(IN)    :: NRHS
+          INTEGER,          INTENT(INOUT) :: IPARM(*)
+          INTEGER,          INTENT(IN)    :: MSGLVL
+          INTEGER,          INTENT(OUT)   :: ERROR
+          COMPLEX(KIND=8),  INTENT(IN)    :: A(*)
+          COMPLEX(KIND=8),  INTENT(INOUT) :: B(*)
+          COMPLEX(KIND=8),  INTENT(OUT)   :: X(*)
+        END SUBROUTINE PARDISO_DC
+
+
+        SUBROUTINE PARDISO_S_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          INTEGER,          INTENT(IN)    :: MAXFCT
+          INTEGER,          INTENT(IN)    :: MNUM
+          INTEGER,          INTENT(IN)    :: MTYPE
+          INTEGER,          INTENT(IN)    :: PHASE
+          INTEGER,          INTENT(IN)    :: N
+          INTEGER,          INTENT(IN)    :: IA(*)
+          INTEGER,          INTENT(IN)    :: JA(*)
+          INTEGER,          INTENT(INOUT) :: PERM(*)
+          INTEGER,          INTENT(IN)    :: NRHS
+          INTEGER,          INTENT(INOUT) :: IPARM(*)
+          INTEGER,          INTENT(IN)    :: MSGLVL
+          INTEGER,          INTENT(OUT)   :: ERROR
+          REAL(KIND=4),     INTENT(IN)    :: A(*)
+          REAL(KIND=4),     INTENT(INOUT) :: B(N,*)
+          REAL(KIND=4),     INTENT(OUT)   :: X(N,*)
+        END SUBROUTINE PARDISO_S_2D
+
+        SUBROUTINE PARDISO_D_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          INTEGER,          INTENT(IN)    :: MAXFCT
+          INTEGER,          INTENT(IN)    :: MNUM
+          INTEGER,          INTENT(IN)    :: MTYPE
+          INTEGER,          INTENT(IN)    :: PHASE
+          INTEGER,          INTENT(IN)    :: N
+          INTEGER,          INTENT(IN)    :: IA(*)
+          INTEGER,          INTENT(IN)    :: JA(*)
+          INTEGER,          INTENT(INOUT) :: PERM(*)
+          INTEGER,          INTENT(IN)    :: NRHS
+          INTEGER,          INTENT(INOUT) :: IPARM(*)
+          INTEGER,          INTENT(IN)    :: MSGLVL
+          INTEGER,          INTENT(OUT)   :: ERROR
+          REAL(KIND=8),     INTENT(IN)    :: A(*)
+          REAL(KIND=8),     INTENT(INOUT) :: B(N,*)
+          REAL(KIND=8),     INTENT(OUT)   :: X(N,*)
+        END SUBROUTINE PARDISO_D_2D
+
+        SUBROUTINE PARDISO_SC_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          INTEGER,          INTENT(IN)    :: MAXFCT
+          INTEGER,          INTENT(IN)    :: MNUM
+          INTEGER,          INTENT(IN)    :: MTYPE
+          INTEGER,          INTENT(IN)    :: PHASE
+          INTEGER,          INTENT(IN)    :: N
+          INTEGER,          INTENT(IN)    :: IA(*)
+          INTEGER,          INTENT(IN)    :: JA(*)
+          INTEGER,          INTENT(INOUT) :: PERM(*)
+          INTEGER,          INTENT(IN)    :: NRHS
+          INTEGER,          INTENT(INOUT) :: IPARM(*)
+          INTEGER,          INTENT(IN)    :: MSGLVL
+          INTEGER,          INTENT(OUT)   :: ERROR
+          COMPLEX(KIND=4),  INTENT(IN)    :: A(*)
+          COMPLEX(KIND=4),  INTENT(INOUT) :: B(N,*)
+          COMPLEX(KIND=4),  INTENT(OUT)   :: X(N,*)
+        END SUBROUTINE PARDISO_SC_2D
+
+        SUBROUTINE PARDISO_DC_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          INTEGER,          INTENT(IN)    :: MAXFCT
+          INTEGER,          INTENT(IN)    :: MNUM
+          INTEGER,          INTENT(IN)    :: MTYPE
+          INTEGER,          INTENT(IN)    :: PHASE
+          INTEGER,          INTENT(IN)    :: N
+          INTEGER,          INTENT(IN)    :: IA(*)
+          INTEGER,          INTENT(IN)    :: JA(*)
+          INTEGER,          INTENT(INOUT) :: PERM(*)
+          INTEGER,          INTENT(IN)    :: NRHS
+          INTEGER,          INTENT(INOUT) :: IPARM(*)
+          INTEGER,          INTENT(IN)    :: MSGLVL
+          INTEGER,          INTENT(OUT)   :: ERROR
+          COMPLEX(KIND=8),  INTENT(IN)    :: A(*)
+          COMPLEX(KIND=8),  INTENT(INOUT) :: B(N,*)
+          COMPLEX(KIND=8),  INTENT(OUT)   :: X(N,*)
+        END SUBROUTINE PARDISO_DC_2D
+      END INTERFACE
+!
+! Subroutine prototype for PARDISO_64
+!
+! Note: The pardiso_64 interface is not supported on IA-32 architecture.
+!       If called on IA-32, error = -12 is returned.
+!
+      INTERFACE PARDISO_64
+        SUBROUTINE PARDISO_S_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
+          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
+          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
+          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
+          INTEGER(KIND=8),          INTENT(IN)    :: N
+          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
+          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
+          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
+          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
+          REAL(KIND=4),             INTENT(IN)    :: A(*)
+          REAL(KIND=4),             INTENT(INOUT) :: B(*)
+          REAL(KIND=4),             INTENT(OUT)   :: X(*)
+        END SUBROUTINE PARDISO_S_64
+
+        SUBROUTINE PARDISO_D_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
+          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
+          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
+          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
+          INTEGER(KIND=8),          INTENT(IN)    :: N
+          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
+          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
+          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
+          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
+          REAL(KIND=8),             INTENT(IN)    :: A(*)
+          REAL(KIND=8),             INTENT(INOUT) :: B(*)
+          REAL(KIND=8),             INTENT(OUT)   :: X(*)
+        END SUBROUTINE PARDISO_D_64
+
+        SUBROUTINE PARDISO_SC_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
+          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
+          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
+          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
+          INTEGER(KIND=8),          INTENT(IN)    :: N
+          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
+          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
+          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
+          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
+          COMPLEX(KIND=4),          INTENT(IN)    :: A(*)
+          COMPLEX(KIND=4),          INTENT(INOUT) :: B(*)
+          COMPLEX(KIND=4),          INTENT(OUT)   :: X(*)
+        END SUBROUTINE PARDISO_SC_64
+
+        SUBROUTINE PARDISO_DC_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
+          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
+          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
+          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
+          INTEGER(KIND=8),          INTENT(IN)    :: N
+          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
+          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
+          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
+          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
+          COMPLEX(KIND=8),          INTENT(IN)    :: A(*)
+          COMPLEX(KIND=8),          INTENT(INOUT) :: B(*)
+          COMPLEX(KIND=8),          INTENT(OUT)   :: X(*)
+        END SUBROUTINE PARDISO_DC_64
+
+        SUBROUTINE PARDISO_S_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
+          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
+          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
+          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
+          INTEGER(KIND=8),          INTENT(IN)    :: N
+          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
+          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
+          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
+          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
+          REAL(KIND=4),             INTENT(IN)    :: A(*)
+          REAL(KIND=4),             INTENT(INOUT) :: B(N,*)
+          REAL(KIND=4),             INTENT(OUT)   :: X(N,*)
+        END SUBROUTINE PARDISO_S_64_2D
+
+        SUBROUTINE PARDISO_D_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
+          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
+          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
+          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
+          INTEGER(KIND=8),          INTENT(IN)    :: N
+          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
+          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
+          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
+          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
+          REAL(KIND=8),             INTENT(IN)    :: A(*)
+          REAL(KIND=8),             INTENT(INOUT) :: B(N,*)
+          REAL(KIND=8),             INTENT(OUT)   :: X(N,*)
+        END SUBROUTINE PARDISO_D_64_2D
+
+        SUBROUTINE PARDISO_SC_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
+          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
+          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
+          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
+          INTEGER(KIND=8),          INTENT(IN)    :: N
+          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
+          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
+          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
+          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
+          COMPLEX(KIND=4),          INTENT(IN)    :: A(*)
+          COMPLEX(KIND=4),          INTENT(INOUT) :: B(N,*)
+          COMPLEX(KIND=4),          INTENT(OUT)   :: X(N,*)
+        END SUBROUTINE PARDISO_SC_64_2D
+
+        SUBROUTINE PARDISO_DC_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
+          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
+          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
+          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
+          INTEGER(KIND=8),          INTENT(IN)    :: N
+          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
+          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
+          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
+          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
+          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
+          COMPLEX(KIND=8),          INTENT(IN)    :: A(*)
+          COMPLEX(KIND=8),          INTENT(INOUT) :: B(N,*)
+          COMPLEX(KIND=8),          INTENT(OUT)   :: X(N,*)
+        END SUBROUTINE PARDISO_DC_64_2D
+
+      END INTERFACE
+
+      INTERFACE
+
+       SUBROUTINE PARDISOINIT(PT, MTYPE, IPARM)
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(OUT) :: PT(*)
+          INTEGER,          INTENT(IN)    :: MTYPE
+          INTEGER,          INTENT(OUT) :: IPARM(*)
+       END SUBROUTINE PARDISOINIT
+
+      END INTERFACE
+
+      INTERFACE PARDISO_GET
+
+       FUNCTION PARDISO_GETENV(PT, OptName, StrVal)
+          USE MKL_PARDISO_PRIVATE
+          INTEGER PARDISO_GETENV
+          TYPE(MKL_PARDISO_HANDLE), INTENT(IN)  :: PT(*)
+          INTEGER,                  INTENT(IN)  :: OptName
+          CHARACTER(*),             INTENT(OUT)  :: StrVal
+       END FUNCTION PARDISO_GETENV
+
+      END INTERFACE
+
+      INTERFACE PARDISO_SET
+
+       FUNCTION PARDISO_SETENV(PT, OptName, StrVal)
+          USE MKL_PARDISO_PRIVATE
+          INTEGER PARDISO_SETENV
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          INTEGER,                  INTENT(IN)    :: OptName
+          INTEGER,                  INTENT(IN)    :: StrVal(*)
+       END FUNCTION PARDISO_SETENV
+
+      END INTERFACE
+
+      INTERFACE PARDISO_PIV
+      
+      FUNCTION MKL_PARDISO_PIVOT( AII, BII, EPS)
+         REAL(KIND=8)   :: AII, BII, EPS
+         INTEGER   MKL_PARDISO_PIVOT
+         END
+      END INTERFACE PARDISO_PIV
+
+      INTERFACE PARDISO_GETDIAG
+
+         SUBROUTINE PARDISO_GETDIAG_D(PT, DIAG_FACT, DIAG_A, MNUM, EPS)
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          REAL(KIND=8), INTENT(INOUT)             :: DIAG_FACT, DIAG_A
+          INTEGER, INTENT(IN)            :: MNUM
+          INTEGER, INTENT(INOUT)         :: EPS
+         END
+
+         SUBROUTINE PARDISO_GETDIAG_Z(PT, DIAG_FACT, DIAG_A, MNUM, EPS)
+          USE MKL_PARDISO_PRIVATE
+          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
+          COMPLEX(KIND=8), INTENT(INOUT)          :: DIAG_FACT, DIAG_A
+          INTEGER, INTENT(IN)            :: MNUM
+          INTEGER, INTENT(INOUT)         :: EPS
+         END
+
+      END INTERFACE PARDISO_GETDIAG
+
+
+
+
+      END MODULE MKL_PARDISO
+
+!DEC$ ENDIF
diff --git a/src/heatflow/mod_constructions.f90 b/src/heatflow/mod_constructions.f90
index 384ce09..b0de444 100644
--- a/src/heatflow/mod_constructions.f90
+++ b/src/heatflow/mod_constructions.f90
@@ -26,9 +26,9 @@ module constructions
      integer(int12) :: imaterial_type !what type of material it is in the mat.in
      real(real12) :: volume !volume of the block
      real(real12), dimension(3) :: Length !length of the block in 3 dimensions
-     real(real12), dimension(3) :: vel !center of the block in 3 dimensions
+     real(real12), dimension(3) :: vel !vel of the block in 3 dimensions
      integer(int12) :: iheater !whether the block is a heater
-      real(real12) :: kappa, rho, heat_capacity, tau, em !physical properties of the material
+     real(real12) :: kappa, rho, heat_capacity, tau, em !physical properties of the material
   end type heatblock
 !!Defines the material to be used in the simulation
   type material
diff --git a/src/heatflow/mod_evolve.f90 b/src/heatflow/mod_evolve.f90
index 1209b25..74f3951 100644
--- a/src/heatflow/mod_evolve.f90
+++ b/src/heatflow/mod_evolve.f90
@@ -26,10 +26,12 @@ module evolution
   use sptype, only: I4B
   use solver, only: linbcg
   use globe_data, only: Temp_p, Temp_pp, inverse_time, heat, lin_rhoc
+  use globe_data, only: acsr, ja, ia
   use heating, only: heater
   use boundary_vector, only: boundary
   use cattaneo, only: S_catS
-  use tempdep, only: ChangeProp
+!   use tempdep, only: ChangeProp 
+  use sparse_solver, only: bicgstab
   implicit none
 
   private
@@ -45,8 +47,9 @@ module evolution
   subroutine simulate(itime)
     integer(int12), intent(in) :: itime
     real(real12), dimension(NA) :: S, x, Q, Qdens, S_CAT, B
-    integer(int12) :: ncg, itol, itmax !, iss
-    integer(I4B) :: iter
+    real(real12), dimension(:), allocatable :: x0
+    integer:: ncg, itol, itmax !, iss
+    integer :: iter
     real(real12) :: e, err, tol
     
     !----------------------
@@ -147,9 +150,11 @@ subroutine simulate(itime)
     err=E
 
 
-    CALL linbcg(S,x,itol=int(itol,I4B),tol=tol, itmax=int(itmax,I4B), iter=iter, &
-         err=E)
+   CALL bicgstab(acsr, ia, ja, S, itmax, x, x0, iter)
+   !  CALL linbcg(S,x,itol=int(itol,I4B),tol=tol, itmax=int(itmax,I4B), iter=iter, &
+   !       err=E)
          
+   x=x0
     if (any(isnan(x(:)))) then
        write(0,*) "fatal error: NAN in x tempurature vector"
        write(0,*) 'time step ', itime, "      T   ", sum(Temp_p)/size(Temp_p), E ,iter
@@ -169,9 +174,9 @@ subroutine simulate(itime)
     Temp_pp = Temp_p
     Temp_p = x
 
-    if (TempDepProp .eq. 1) then
-      CALL ChangeProp()
-    end if
+   !  if (TempDepProp .eq. 1) then
+   !    CALL ChangeProp()
+   !  end if
    !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
   end subroutine simulate
diff --git a/src/heatflow/mod_global.f90 b/src/heatflow/mod_global.f90
index 5207578..94aeb26 100644
--- a/src/heatflow/mod_global.f90
+++ b/src/heatflow/mod_global.f90
@@ -4,7 +4,7 @@
 !!!#################################################################################################
 module globe_data
   use sptype, only: sprs2_dp, diag_sprs_dp
-  use constants, only: real12
+  use constants, only: real12, int12
   implicit none
   real(real12), dimension(:,:,:), allocatable :: Temp_cur ! Current temperature
   real(real12), dimension(:), allocatable :: Temp_p, Temp_pp ! Previous and previous previous temperature
@@ -13,6 +13,9 @@ module globe_data
   real(real12), dimension(:), allocatable :: lin_rhoc ! 1D array for HeatCapacity*Rho
   TYPE(sprs2_dp) :: ra !Techniqually rH
   TYPE(diag_sprs_dp) :: da !Techniqually dH
+  real(real12), dimension(:), allocatable :: acsr
+  integer (kind=8), dimension(:), allocatable :: ja
+  integer (kind=8), dimension(:), allocatable :: ia
    
 end module globe_data
  
diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90
index 11dc0bf..d0011ab 100644
--- a/src/heatflow/mod_setup.f90
+++ b/src/heatflow/mod_setup.f90
@@ -21,8 +21,10 @@ module setup
   use inputs, only: Periodicz ! 
   use hmatrixmod, only: hmatrixfunc
   use globe_data, only:  ra, Temp_cur, Temp_p, Temp_pp,inverse_time, heat, lin_rhoc, Q_P
+  use globe_data, only: acsr, ja, ia
   use solver, only: SRSin
   use materials, only: material
+  use sparse_solver, only: coo2csr
   implicit none
   
    contains
@@ -77,6 +79,9 @@ subroutine set_global_variables()
          CALL build_Hmatrix()
       else
          CALL sparse_Hmatrix()
+         ! Allocate the arrays to hold the H matrix in CSR format
+         allocate(acsr(ra%len), ja(ra%len), ia(ra%n+1))
+         CALL coo2csr(ra%n, ra%len, ra%val, ra%irow, ra%jcol, acsr, ja, ia)               
       end if
       !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/src/heatflow/mod_sparse_solver.f90 b/src/heatflow/mod_sparse_solver.f90
new file mode 100644
index 0000000..95a925d
--- /dev/null
+++ b/src/heatflow/mod_sparse_solver.f90
@@ -0,0 +1,334 @@
+module sparse_solver
+
+  use mkl_pardiso
+  implicit none
+
+  private
+
+  public :: coo2csr, &
+            bicgstab, &
+            solve_pardiso
+  
+contains
+
+  subroutine coo2csr( nrow, &
+                      nnz, &
+                      a, &
+                      ir, &
+                      jc, &
+                      acsr, &
+                      ja, &
+                      ia )
+    
+    !--------------------------------------------------------------------------!
+    !! coocsr converts coo to csr.
+    !
+    !  discussion:
+    !
+    !    this routine converts a matrix that is stored in coo coordinate format
+    !    a, ir, jc into a csr row general sparse acsr, ja, ia format.
+    !
+    !  parameters:
+    !
+    !    input, integer nrow, the row dimension of the matrix.
+    !
+    !    input, integer nnz, the number of nonzero elements in the matrix.
+    !
+    ! a,
+    ! ir,
+    ! jc    = matrix in coordinate format. a(k), ir(k), jc(k) store the nnz
+    !         nonzero elements of the matrix with a(k) = actual real value of
+    !         the elements, ir(k) = its row number and jc(k) = its column
+    !        number. the order of the elements is arbitrary.
+    !
+    ! on return:
+    !
+    ! ir       is destroyed
+    !
+    !    output, real acsr(nnz), ja(nnz), ia(nrow+1), the matrix in csr
+    !    compressed sparse row format.
+    !--------------------------------------------------------------------------!
+
+    ! Arguments.
+    integer (kind=8), intent(in) :: nrow
+    integer (kind=8), intent(in) :: nnz
+    real(8), dimension(nnz), intent(in) :: a
+    integer (kind=8), dimension(nnz), intent(in) :: ir
+    integer (kind=8), dimension(nnz), intent(in) :: jc   
+    real(8), dimension(nnz), intent(out) :: acsr
+    integer (kind=8), dimension(nnz), intent(out) :: ja
+    integer (kind=8), dimension(nrow+1), intent(out) :: ia
+
+    ! Local variables.
+    integer (kind=8) :: i, iad, j, k, k0
+    real(kind=8) :: x
+    
+    ia(1:nrow+1) = 0
+    
+    ! determine the row lengths.
+    
+    do k = 1, nnz
+       ia(ir(k)) = ia(ir(k)) + 1
+    end do
+    
+    ! the starting position of each row.
+    
+    k = 1
+
+    do j = 1, nrow + 1
+
+       k0 = ia(j)
+       ia(j) = k
+       k = k + k0
+
+    end do
+    
+    ! go through the structure once more. fill in output matrix.
+    
+    do k = 1, nnz
+
+       i = ir(k)
+       j = jc(k)
+       x = a(k)
+       iad = ia(i)
+       acsr(iad) = x
+       ja(iad) = j
+       ia(i) = iad + 1
+
+    end do
+    
+    ! shift back ia.
+    
+    do j = nrow, 1, -1
+       ia(j+1) = ia(j)
+    end do
+
+    ia(1) = 1
+    
+    return
+    
+  end subroutine coo2csr
+  
+  !-------------------------------------------------------------------
+  !	BiConjugate Gradient (Stabilised) Method
+  !-------------------------------------------------------------------
+  
+  subroutine bicgstab( acsr, &
+                       ia, &
+                       ja, &
+                       b, &
+                       maxiter, &
+                       initGuess, &
+                       x, &
+                       iter )
+
+    ! Arguments
+    real(8), dimension(:), intent(in) :: acsr
+    integer (kind=8), dimension(:), intent(in) :: ia
+    integer (kind=8), dimension(:), intent(in) :: ja
+    real(8), dimension(:), intent(in) :: b
+    integer, intent(in) :: maxiter
+    real(8), dimension(:), intent(in) :: initGuess
+    real(8), dimension(:), allocatable, intent(out) :: x
+    integer, intent(out) :: iter
+
+    ! Local variables
+    integer :: i, j, k, n
+    real(8), parameter :: cc = 1.0e-9
+    real(8) :: alpha,beta,delta0,delta,delta_old,omega
+    real(8), dimension(:), allocatable :: r, p, s, rst, temp1, temp2
+    
+    n = size(b,1)
+    
+    allocate(x(n))
+    allocate(r(n))
+    allocate(p(n))
+    allocate(s(n))
+    allocate(rst(n))
+    allocate(temp1(n))
+    allocate(temp2(n))
+    
+    call mkl_dcsrgemv("N",n,acsr,ia,ja,x,temp1)
+    
+    r = b - temp1
+  
+    call random_number(rst)
+    
+    p = r
+    delta = dot_product(rst,r)
+    
+    write(*,'(a,1x,f15.3)') "Starting delta: ", delta
+    
+    delta0 = delta
+    
+    do i = 1, maxiter
+       
+       if ( norm2(r) /= norm2(r) ) then
+          write(*,'(a)') "Error in solver: residual NaN"
+          exit
+       end if
+
+       if(mod(i,1000).eq.0) then
+          write(*,'(a,1x,i6)') 'Iteration number: ',i
+          write(*,'(a,1x,f15.3)') "Residual ratio: ", norm2(r)/cc
+       end if
+       
+       call mkl_dcsrgemv("N",n,acsr,ia,ja,p,temp1)	! temp1=A*p
+       
+       alpha = delta/dot_product(rst,temp1)
+       s = r - alpha*temp1
+       
+       call mkl_dcsrgemv("N",n,acsr,ia,ja,s,temp2)	! temp2=A*s
+       
+       omega = dot_product(s,temp2)/dot_product(temp2,temp2)
+       x = x + alpha*p + omega*s
+       r = s - omega*temp2
+       delta_old = delta
+       delta = dot_product(rst,r)
+       beta = (delta/delta_old)*(alpha/omega)
+       p = r + beta*(p - omega*temp1)
+       
+       if(norm2(r) .lt. cc) then
+          iter = i
+          return
+       end if
+       
+       if(i.eq.maxiter) then
+          write(*,'(a)') "Maximum iterations reached."
+          write(*,'(a)') "Convergence not achieved."
+          write(*,'(a,1x,f15.3)') "Norm of residual: ", norm2(r)
+          write(*,'(a,1x,f15.3)') "Convergence criterion: ", cc
+          if((norm2(r)/cc) .lt. 2.d0) then
+             write(*,'(a)') "The residual is within a small",&
+                  "range of the convergence criterion."
+             write(*,'(a)') "Perhaps increasing iteration ",	&
+                  "count may help."
+          end if
+       end if
+       
+    end do
+    
+  end subroutine bicgstab
+  
+  !-------------------------------------------------------------------
+  !	END BiConjugate Gradient (Stabilised) Method
+  !-------------------------------------------------------------------
+  
+  !-------------------------------------------------------------------
+  !	PARDISO Direct Solver
+  !-------------------------------------------------------------------
+  
+  subroutine solve_pardiso( acsr, &
+                            b, &
+                            ia, &
+                            ja, &
+                            x )
+    
+    use mkl_pardiso
+
+    ! Arguments
+    real(8), dimension(:), intent(in) :: acsr
+    real(8), dimension(:), intent(inout) :: b
+    integer,dimension(:), intent(in) :: ia
+    integer,dimension(:), intent(in) :: ja
+    real(8), dimension(:), allocatable, intent(out) :: x
+
+    ! Local variables
+    type(mkl_pardiso_handle), dimension(:), allocatable  :: pt
+    integer :: i,maxfct,mnum,mtype,phase,n,nrhs,error,msglvl,nnz,error1
+    integer, dimension(:), allocatable :: iparm  
+    integer,dimension(1) :: idum
+    real(8),dimension(1) :: ddum
+    
+    n = size(b,1)
+    nnz = size(acsr,1)
+    nrhs = 1
+    maxfct = 1
+    mnum = 1
+    
+    if (.not.(allocated(x))) allocate(x(n))
+    
+    allocate(iparm(64))		!set up pardiso control parameter
+    
+    do i=1,64
+       iparm(i) = 0
+    end do
+    
+    iparm(1) = 1 ! no solver default
+    iparm(2) = 2 ! fill-in reordering from metis
+    iparm(4) = 0 ! no iterative-direct algorithm
+    iparm(5) = 0 ! no user fill-in reducing permutation
+    iparm(6) = 0 ! =0 solution on the first n compoments of x
+    iparm(8) = 2 ! numbers of iterative refinement steps
+    iparm(10) = 13 ! perturbe the pivot elements with 1e-13
+    iparm(11) = 1 ! use nonsymmetric permutation and scaling mps
+    iparm(13) = 0 ! maximum weighted matching algorithm is
+    !switched-off (default for symmetric).
+    ! try iparm(13) = 1 in case of inaccuracy
+    iparm(14) = 0 ! output: number of perturbed pivots
+    iparm(18) = -1 ! output: number of nonzeros in the factor lu
+    iparm(19) = -1 ! output: mflops for lu factorization
+    iparm(20) = 0 ! output: numbers of cg iterations
+    
+    error  = 0 ! initialize error flag
+    msglvl = 0 ! 0=no output, 1=print statistical information
+    mtype  = 11 ! real and unsymmetric matrix
+    
+    ! Initiliaze the internal solver memory pointer.
+    ! This is only necessary for the first call of the solver.
+    
+    allocate (pt(64))
+    do i=1,64
+       pt(i)%dummy =  0
+    end do
+    
+    phase = 11 ! Only reordering and symbolic factorization
+    
+    call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, &
+         idum, nrhs, iparm, msglvl, ddum, ddum, error)
+    
+    if (error /= 0) then
+       write(*,*) 'the following error was detected: ', error
+       goto 1000
+    end if
+    
+    phase = 22 ! only factorization
+    call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, &
+         idum, nrhs, iparm, msglvl, ddum, ddum, error)
+    if (error /= 0) then
+       write(*,*) 'the following error was detected: ', error
+       goto 1000
+    endif
+    
+    ! back substitution and iterative refinement
+    iparm(8) = 2 ! max numbers of iterative refinement steps
+    phase = 33 ! only solving
+    call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, &
+         idum, nrhs, iparm, msglvl, b, x, error)
+    write(*,*) 'solve completed ... '
+    if (error /= 0) then
+       write(*,*) 'the following error was detected: ', error
+       goto 1000
+    endif
+    
+1000 continue
+    ! termination and release of memory
+    phase = -1 ! release internal memory
+    call pardiso (pt,maxfct,mnum,mtype,phase,n,ddum,idum,idum, 	&
+         idum, nrhs, iparm, msglvl, ddum, ddum, error1)
+    
+    if (error1 /= 0) then
+       write(*,*) 'the following release error was detected: ',	&
+            error1
+       stop 1
+    endif
+    
+    if ( error /= 0 ) stop 1
+    
+  end subroutine solve_pardiso
+  
+  !-------------------------------------------------------------------
+  !	END PARDISO Direct Solver
+  !-------------------------------------------------------------------
+  
+end module sparse_solver
diff --git a/src/heatflow/mod_tempdep.f90 b/src/heatflow/mod_tempdep.f90
index 475617c..b8b936a 100755
--- a/src/heatflow/mod_tempdep.f90
+++ b/src/heatflow/mod_tempdep.f90
@@ -27,7 +27,7 @@
 
 module TempDep
     use inputs, only: Grid, TempDepProp, Nz, Ny, Nx
-    use setup, only: sparse_Hmatrix
+    ! use setup, only: sparse_Hmatrix
     use globe_data, only:  Temp_p, lin_rhoc
     use constants, only: real12, int12
     
@@ -35,34 +35,34 @@ module TempDep
     
     contains
 
-    subroutine ChangeProp()
-        character(len=100) :: filename
-        integer(int12) :: ix,iy,iz, index
-        logical :: res
-        index = 1
-        !Loop over all the grid points
-        do iz = 1, Nz
-            do iy = 1, Ny
-                do ix = 1, Nx
-                    ! Construct the filename for the Material table asscoiated with the grid point
-                    filename = trim('./inputs/MatTable' // & 
-                         trim(adjustl(char(Grid(ix, iy, iz)%imaterial_type))))
-                    inquire(file=filename, exist=res)
-                    if (res) then
-                        ! Read the temperature dependent properties from the file
-                        ! CALL ReadTempDepTable(filename, ix, iy, iz, index)
-                    else
-                        ! File does not exist, continue to the next grid point
-                        continue
-                    end if
-                    ! Read the temperature dependent properties from the file
-                    index = index + 1
-                end do
-            end do
-        end do
-        ! Construct the sparse matrix
-        CALL sparse_Hmatrix()
-    end subroutine ChangeProp
+    ! subroutine ChangeProp()
+    !     character(len=100) :: filename
+    !     integer(int12) :: ix,iy,iz, index
+    !     logical :: res
+    !     index = 1
+    !     !Loop over all the grid points
+    !     do iz = 1, Nz
+    !         do iy = 1, Ny
+    !             do ix = 1, Nx
+    !                 ! Construct the filename for the Material table asscoiated with the grid point
+    !                 filename = trim('./inputs/MatTable' // & 
+    !                      trim(adjustl(char(Grid(ix, iy, iz)%imaterial_type))))
+    !                 inquire(file=filename, exist=res)
+    !                 if (res) then
+    !                     ! Read the temperature dependent properties from the file
+    !                     ! CALL ReadTempDepTable(filename, ix, iy, iz, index)
+    !                 else
+    !                     ! File does not exist, continue to the next grid point
+    !                     continue
+    !                 end if
+    !                 ! Read the temperature dependent properties from the file
+    !                 index = index + 1
+    !             end do
+    !         end do
+    !     end do
+    !     ! Construct the sparse matrix
+    !     CALL sparse_Hmatrix()
+    ! end subroutine ChangeProp
 
     ! subroutine ReadTempDepTable(filename, ix, iy, iz, index)
     !     character(len=*), intent(in) :: filename

From 2fe28bc7c551e8ab498f42d77923dbd0a45f1661 Mon Sep 17 00:00:00 2001
From: Harry Mclean <130392970+HarryMclean@users.noreply.github.com>
Date: Tue, 2 Sep 2025 22:45:01 +0100
Subject: [PATCH 05/28] Fix typo

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 0c78934..1dcc697 100644
--- a/README.md
+++ b/README.md
@@ -77,15 +77,15 @@ make install
 
 This will build and install the executable in the following directory:
 ```
-${HOME}/.local/HeatFLow/bin/HeatFlow
+${HOME}/.local/HeatFlow/bin/HeatFlow
 ```
 
 This executable can now be called to run the HeatFlow software package and simulate heat transport.
-If the `${HOME}/.local/HeatFLow/bin` is added to your `PATH` environment variable, then the program can be called as a terminal command.
+If the `${HOME}/.local/HeatFlow/bin` is added to your `PATH` environment variable, then the program can be called as a terminal command.
 This can be done with the following command (works on a per-terminal basis, if you want to update it for all, include this in your source shell file):
 
 ```
-export PATH="${PATH}:${HOME}/.local/HeatFLow/bin"
+export PATH="${PATH}:${HOME}/.local/HeatFlow/bin"
 ```
 
 To execute the program, use the following command:

From 0f1ddc0e1fc0c690338a02ac01e460a404370680 Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Tue, 2 Sep 2025 23:11:02 +0100
Subject: [PATCH 06/28] Remove not used mat.in variables

---
 src/heatflow/mod_constructions.f90 |  6 +++---
 src/heatflow/mod_inputs.f90        | 15 ++++++---------
 src/heatflow/mod_material.f90      |  6 +++---
 3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/src/heatflow/mod_constructions.f90 b/src/heatflow/mod_constructions.f90
index b0de444..6e8afc0 100644
--- a/src/heatflow/mod_constructions.f90
+++ b/src/heatflow/mod_constructions.f90
@@ -34,11 +34,11 @@ module constructions
   type material
      integer(int12) :: index !identifier for the material
      real(real12) :: heat_capacity !heat capacity of the material
-     real(real12) :: h_conv !convective heat transfer coefficient
+    !  real(real12) :: h_conv !convective heat transfer coefficient
      real(real12) :: kappa !thermal conductivity
-     real(real12) :: kappa3D !three-dimensional thermal conductivity
+    !  real(real12) :: kappa3D !three-dimensional thermal conductivity
      real(real12) :: rho !density of the material
-     real(real12) :: sound_speed   !speed of sound in the material
+    !  real(real12) :: sound_speed   !speed of sound in the material
      real(real12) :: tau !relaxation time
      real(real12), dimension(3) :: vel !velocity of the material
      real(real12) :: em !emisstivity
diff --git a/src/heatflow/mod_inputs.f90 b/src/heatflow/mod_inputs.f90
index e8b3025..553e925 100644
--- a/src/heatflow/mod_inputs.f90
+++ b/src/heatflow/mod_inputs.f90
@@ -657,7 +657,7 @@ subroutine read_mat(unit)
     type(material), dimension(100) :: dum_mat
     character(1024) :: buffer
     integer :: reason, j
-    integer, dimension(9) :: readvarmat
+    integer, dimension(6) :: readvarmat
     integer :: i, index
 
     i=0
@@ -697,14 +697,11 @@ subroutine read_mat(unit)
 
     
        CALL assignD(buffer,"heat_capacity",dum_mat(i)%heat_capacity,readvarmat(1))! assign heatCapacity
-       CALL assignD(buffer,"h_conv"       ,dum_mat(i)%h_conv       ,readvarmat(2))! assign h_conv
-       CALL assignD(buffer,"kappa"        ,dum_mat(i)%kappa        ,readvarmat(3))! assign kappa
-       CALL assignD(buffer,"kappa3D"      ,dum_mat(i)%kappa3D      ,readvarmat(4))! assign kappa3D
-       CALL assignD(buffer,"rho"          ,dum_mat(i)%rho          ,readvarmat(5))! assign rho
-       CALL assignD(buffer,"sound_speed"  ,dum_mat(i)%sound_speed  ,readvarmat(6))! assign sound_speed
-       CALL assignD(buffer,"tau"          ,dum_mat(i)%tau          ,readvarmat(7))! assign tau
-       CALL assignD(buffer,"em"           ,dum_mat(i)%em           ,readvarmat(8))! assign e
-       CALL assignV(buffer,"vel"          ,dum_mat(i)%vel          ,readvarmat(9)) ! assign velocity
+       CALL assignD(buffer,"kappa"        ,dum_mat(i)%kappa        ,readvarmat(2))! assign kappa
+       CALL assignD(buffer,"rho"          ,dum_mat(i)%rho          ,readvarmat(3))! assign rho
+       CALL assignD(buffer,"tau"          ,dum_mat(i)%tau          ,readvarmat(4))! assign tau
+       CALL assignD(buffer,"em"           ,dum_mat(i)%em           ,readvarmat(5))! assign e
+       CALL assignV(buffer,"vel"          ,dum_mat(i)%vel          ,readvarmat(6)) ! assign velocity
     end do read
     
     ! Check for duplicate indices
diff --git a/src/heatflow/mod_material.f90 b/src/heatflow/mod_material.f90
index 9dfcc8c..65b9fdf 100644
--- a/src/heatflow/mod_material.f90
+++ b/src/heatflow/mod_material.f90
@@ -91,11 +91,11 @@ subroutine material(imaterial_type,kappa,kappa3D,h_conv,heat_capacity,rho,sound_
          if (tmp .eq. imaterial_type) then
             found = .true.
             heat_capacity = input_materials(i)%heat_capacity
-            h_conv        = input_materials(i)%h_conv
+            ! h_conv        = input_materials(i)%h_conv
             kappa         = input_materials(i)%kappa
-            kappa3D       = input_materials(i)%kappa3D
+            ! kappa3D       = input_materials(i)%kappa3D
             rho           = input_materials(i)%rho
-            sound_speed   = input_materials(i)%sound_speed
+            ! sound_speed   = input_materials(i)%sound_speed
             tau           = input_materials(i)%tau
             vel(1)        = input_materials(i)%vel(1)
             vel(2)        = input_materials(i)%vel(2)

From 31cf45dec51bbcba482d2656735bdec01a9627c9 Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Wed, 3 Sep 2025 09:19:43 +0100
Subject: [PATCH 07/28] Make with the parallel libraries

---
 Makefile | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index af38c3a..a4750e5 100644
--- a/Makefile
+++ b/Makefile
@@ -39,7 +39,8 @@ OBJS := $(addprefix $(BUILD_DIR)/,$(notdir $(SRCS:.f90=.o)))
 MKLROOT ?= /opt/intel/oneapi/mkl/latest
 MKL_LIB_DIR = $(MKLROOT)/lib/intel64
 MKL_INCLUDE_DIR = $(MKLROOT)/include
-MKL_FLAGS = -L$(MKL_LIB_DIR) -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl
+#MKL_FLAGS = -L$(MKL_LIB_DIR) -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl
+MKL_FLAGS = -L$(MKL_LIB_DIR) -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl
 
 FFLAGS = -O3 -I$(MKL_INCLUDE_DIR)
 MODULEFLAGS = -J$(BUILD_DIR)
@@ -79,4 +80,4 @@ OMP: $(programs)
 	./util/DShell/omp_exec.sh
 
 clean:
-	rm -f $(BUILD_DIR)/*.o $(BUILD_DIR)/*.mod $(programs)
\ No newline at end of file
+	rm -f $(BUILD_DIR)/*.o $(BUILD_DIR)/*.mod $(programs)

From 440e16840eb13c4a9bed2754809afe6b5131cad8 Mon Sep 17 00:00:00 2001
From: hm556 <hm556@exeter.ac.uk>
Date: Thu, 4 Sep 2025 11:56:54 +0100
Subject: [PATCH 08/28] Switch to pardiso solver

---
 src/heatflow/mod_evolve.f90        | 20 +++++++++++---------
 src/heatflow/mod_global.f90        |  4 ++--
 src/heatflow/mod_setup.f90         |  3 ++-
 src/heatflow/mod_sparse_solver.f90 | 17 ++++++++++-------
 4 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/src/heatflow/mod_evolve.f90 b/src/heatflow/mod_evolve.f90
index 74f3951..ada2569 100644
--- a/src/heatflow/mod_evolve.f90
+++ b/src/heatflow/mod_evolve.f90
@@ -31,7 +31,7 @@ module evolution
   use boundary_vector, only: boundary
   use cattaneo, only: S_catS
 !   use tempdep, only: ChangeProp 
-  use sparse_solver, only: bicgstab
+  use sparse_solver, only: bicgstab, solve_pardiso
   implicit none
 
   private
@@ -46,8 +46,8 @@ module evolution
 !!!#################################################################################################
   subroutine simulate(itime)
     integer(int12), intent(in) :: itime
-    real(real12), dimension(NA) :: S, x, Q, Qdens, S_CAT, B
-    real(real12), dimension(:), allocatable :: x0
+    real(real12), dimension(NA) :: S, Q, Qdens, S_CAT, B
+    real(real12), dimension(:), allocatable :: x
     integer:: ncg, itol, itmax !, iss
     integer :: iter
     real(real12) :: e, err, tol
@@ -140,21 +140,23 @@ subroutine simulate(itime)
    ! iter:  Output - gives the number of the final iteration.
    ! err:   Output - records the error of the final iteration.
    ! iss:   Input - sets the Sparse Storage type (1=SRS, 2=SDS).
-    x=Temp_p+(Temp_p-Temp_pp) 
-    if (any(x-Temp_p .lt. TINY)) x=x+TINY !avoid nan solver issue
+   !  x=Temp_p+(Temp_p-Temp_pp)
+   !  if (any(x-Temp_p .lt. TINY)) x=x+TINY !avoid nan solver issue
     itol=1
     tol=1.e-32_real12
     itmax=50000
     ncg = 0
-    iter=ncg
+    iter= 0
     err=E
 
 
-   CALL bicgstab(acsr, ia, ja, S, itmax, x, x0, iter)
+   !  CALL bicgstab(acsr, ia, ja, S, itmax, x, x0, iter)
+
+   CALL solve_pardiso(acsr, S, ia, ja, x)
    !  CALL linbcg(S,x,itol=int(itol,I4B),tol=tol, itmax=int(itmax,I4B), iter=iter, &
-   !       err=E)
+         ! err=E)
          
-   x=x0
+   ! 
     if (any(isnan(x(:)))) then
        write(0,*) "fatal error: NAN in x tempurature vector"
        write(0,*) 'time step ', itime, "      T   ", sum(Temp_p)/size(Temp_p), E ,iter
diff --git a/src/heatflow/mod_global.f90 b/src/heatflow/mod_global.f90
index 94aeb26..f31ce2e 100644
--- a/src/heatflow/mod_global.f90
+++ b/src/heatflow/mod_global.f90
@@ -14,8 +14,8 @@ module globe_data
   TYPE(sprs2_dp) :: ra !Techniqually rH
   TYPE(diag_sprs_dp) :: da !Techniqually dH
   real(real12), dimension(:), allocatable :: acsr
-  integer (kind=8), dimension(:), allocatable :: ja
-  integer (kind=8), dimension(:), allocatable :: ia
+  integer, dimension(:), allocatable :: ja
+  integer, dimension(:), allocatable :: ia
    
 end module globe_data
  
diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90
index d0011ab..40c53bd 100644
--- a/src/heatflow/mod_setup.f90
+++ b/src/heatflow/mod_setup.f90
@@ -81,7 +81,8 @@ subroutine set_global_variables()
          CALL sparse_Hmatrix()
          ! Allocate the arrays to hold the H matrix in CSR format
          allocate(acsr(ra%len), ja(ra%len), ia(ra%n+1))
-         CALL coo2csr(ra%n, ra%len, ra%val, ra%irow, ra%jcol, acsr, ja, ia)               
+         CALL coo2csr(ra%n, ra%len, ra%val, ra%irow, ra%jcol, acsr, ja, ia)
+         ! print*, ra%val               
       end if
       !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/src/heatflow/mod_sparse_solver.f90 b/src/heatflow/mod_sparse_solver.f90
index 95a925d..7242657 100644
--- a/src/heatflow/mod_sparse_solver.f90
+++ b/src/heatflow/mod_sparse_solver.f90
@@ -56,8 +56,8 @@ subroutine coo2csr( nrow, &
     integer (kind=8), dimension(nnz), intent(in) :: ir
     integer (kind=8), dimension(nnz), intent(in) :: jc   
     real(8), dimension(nnz), intent(out) :: acsr
-    integer (kind=8), dimension(nnz), intent(out) :: ja
-    integer (kind=8), dimension(nrow+1), intent(out) :: ia
+    integer, dimension(nnz), intent(out) :: ja
+    integer, dimension(nrow+1), intent(out) :: ia
 
     ! Local variables.
     integer (kind=8) :: i, iad, j, k, k0
@@ -151,16 +151,17 @@ subroutine bicgstab( acsr, &
     call mkl_dcsrgemv("N",n,acsr,ia,ja,x,temp1)
     
     r = b - temp1
-  
+
     call random_number(rst)
     
     p = r
+
     delta = dot_product(rst,r)
-    
+
     write(*,'(a,1x,f15.3)') "Starting delta: ", delta
     
     delta0 = delta
-    
+
     do i = 1, maxiter
        
        if ( norm2(r) /= norm2(r) ) then
@@ -187,7 +188,7 @@ subroutine bicgstab( acsr, &
        delta = dot_product(rst,r)
        beta = (delta/delta_old)*(alpha/omega)
        p = r + beta*(p - omega*temp1)
-       
+
        if(norm2(r) .lt. cc) then
           iter = i
           return
@@ -247,7 +248,9 @@ subroutine solve_pardiso( acsr, &
     mnum = 1
     
     if (.not.(allocated(x))) allocate(x(n))
-    
+
+
+
     allocate(iparm(64))		!set up pardiso control parameter
     
     do i=1,64

From 96c2ee3732b0711f5f3ff6b25ae9bac86fd3e4ae Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Thu, 4 Sep 2025 12:36:18 +0100
Subject: [PATCH 09/28] Fix bug with output file

---
 src/heatflow/mod_global.f90 |  1 +
 src/heatflow/mod_output.f90 | 17 +++++++++++++----
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/heatflow/mod_global.f90 b/src/heatflow/mod_global.f90
index f31ce2e..0fb61d6 100644
--- a/src/heatflow/mod_global.f90
+++ b/src/heatflow/mod_global.f90
@@ -16,6 +16,7 @@ module globe_data
   real(real12), dimension(:), allocatable :: acsr
   integer, dimension(:), allocatable :: ja
   integer, dimension(:), allocatable :: ia
+  character(len=1024) :: logname ! Log file name
    
 end module globe_data
  
diff --git a/src/heatflow/mod_output.f90 b/src/heatflow/mod_output.f90
index 5a024b1..8482ef7 100644
--- a/src/heatflow/mod_output.f90
+++ b/src/heatflow/mod_output.f90
@@ -46,7 +46,7 @@ module output
   use inputs, only: nx,ny,nz, time_step, grid, NA, Check_Steady_State, ntime, WriteToTxt
   use inputs, only: Test_Run, freq, RunName, FullRestart, IVERB, write_every
   use inputs, only: start_ix, end_ix, start_iy, end_iy, start_iz, end_iz
-  use globe_data, only: Temp_p,Temp_pp, heat, heated_volume
+  use globe_data, only: Temp_p,Temp_pp, heat, heated_volume, logname
   implicit none
   
 contains
@@ -54,9 +54,11 @@ subroutine data_write(itime)
     implicit none
     integer(int12), intent(in) :: itime
     real(real12), dimension(nx,ny,nz) :: CT, Temp_cur
-    integer(int12) :: ix, iy, iz, indexA, logunit
-    character(len=1024) :: file_prefix, file_extension, outdir, logname
+    integer(int12) :: ix, iy, iz, indexA
+    character(len=1024) :: file_prefix, file_extension, outdir
+    integer :: logunit
     
+    logunit = 20
     file_prefix = 'Temperture_'
     outdir='./outputs/'
     file_extension = '.out'
@@ -90,9 +92,12 @@ subroutine data_write(itime)
           ! find most recent log file and open it
           !---------------------------------------
           CALL last_log(logname,outdir)
-          open(newunit=logunit,file=logname)
+          open(logunit,file=logname)
+         !  print*, logunit
+         !  print*, logname
           write(logunit,*) real((itime-1)*(time_step)), &
                (Temp_cur(start_ix:end_ix, start_iy:end_iy, start_iz:end_iz))
+          close(logunit)
           !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        end if
     end if
@@ -108,8 +113,12 @@ subroutine data_write(itime)
       if (WriteToTxt) then
          if (mod(itime, write_every) .eq. 0) then
             write(*, *) 'Writing Temperature difference to file'
+            ! print*, logunit
+            ! print*, logname
+            open(logunit,file=logname, status='old', position='append')
             write(logunit,*) real((itime-1)*(time_step)), &
                (Temp_cur(start_ix:end_ix, start_iy:end_iy, start_iz:end_iz))
+            close(logunit)
          end if
       endif
     end if

From 95f9009b0162e7b88df9e97aa781217a8afa5b70 Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Thu, 11 Sep 2025 13:58:09 +0100
Subject: [PATCH 10/28] Fix typo

---
 src/heatflow/mod_heating.f90 | 33 +++++++++++++++++++++++----------
 src/heatflow/mod_output.f90  |  2 +-
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/src/heatflow/mod_heating.f90 b/src/heatflow/mod_heating.f90
index 831396f..bb4a1d3 100644
--- a/src/heatflow/mod_heating.f90
+++ b/src/heatflow/mod_heating.f90
@@ -30,7 +30,7 @@ subroutine heater(itime, Q, Qdens)
     integer(int12) :: ix, iy, iz, IA ,heated_num
     real(real12) :: time, POWER, time_pulse, x, x2
     real(real12) :: rho, volume, heat_capacity, area, tau, sum_temp
-
+    logical :: shared_power
     ! Initialize variables
     IA = 0
     Q = 0._real12
@@ -53,7 +53,7 @@ subroutine heater(itime, Q, Qdens)
              heat_capacity = grid(ix,iy,iz)%heat_capacity
              area = grid(ix,iy,iz)%Length(1)*grid(ix,iy,iz)%Length(2) !???
              !tau divided by time_step squared in setup.f90
-             tau = grid(ix,iy,iz)%tau*(time_step**2.0_real12) 
+             tau = grid(ix,iy,iz)%tau*(time_step) 
              ! select heater case
 
              select case(grid(ix,iy,iz)%iheater)
@@ -114,13 +114,22 @@ subroutine heater(itime, Q, Qdens)
                    Q(IA) = 0.0
                 end if
                 !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+               case(10)
+                  Q(IA) = POWER - (tau*(POWER))
+
+               case(11)
+                  Q(IA) = POWER
+                  
+               case(12)
+                  Q(IA) = POWER + (tau*(POWER))
              end select
              !------------------------------
              ! If emissitivity is not zero, then calculate the radiative heating
              !------------------------------
-               Q(IA) = Q(IA) - grid(ix,iy,iz)%em * grid(ix,iy,iz)%length(1)*&
-                       grid(ix,iy,iz)%length(2)*StefBoltz &
-                       * ((Temp_p(IA)**4.0_real12) - (T_Bath**4.0_real12)) 
+               ! Q(IA) = Q(IA) - grid(ix,iy,iz)%em * grid(ix,iy,iz)%length(1)*&
+               !         grid(ix,iy,iz)%length(2)*StefBoltz &
+               !         * ((Temp_p(IA)**4.0_real12) - (T_Bath**4.0_real12)) 
 
              !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -161,11 +170,15 @@ subroutine heater(itime, Q, Qdens)
    
 
     ! Normalize all heat sources by the heated volume
-    if (heated_volume .gt. 0.0) then
-      Qdens(:) = Q(:) / heated_volume
-      heated_temp = sum_temp / heated_volume
-    end if
-
+    shared_power = .False.
+    if (shared_power) then
+      if (heated_volume .gt. 0.0) then
+         Qdens(:) = Q(:) / heated_volume
+         heated_temp = sum_temp / heated_volume
+      end if
+    else
+      Qdens(:) = Q(:) / grid(1,1,1)%volume 
+    end if 
   end subroutine heater
 
 
diff --git a/src/heatflow/mod_output.f90 b/src/heatflow/mod_output.f90
index 8482ef7..0235e4e 100644
--- a/src/heatflow/mod_output.f90
+++ b/src/heatflow/mod_output.f90
@@ -70,7 +70,7 @@ subroutine data_write(itime)
     do iz = 1, nz
        do iy = 1, ny
           do ix = 1, nx
-             Temp_cur(ix,iy,iz) = Temp_pp(indexA)
+             Temp_cur(ix,iy,iz) = Temp_p(indexA)
              indexA = indexA+1
           end do
        end do

From 072c957ed83b1d6910499d0077010d05c740a91e Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Thu, 11 Sep 2025 15:17:11 +0100
Subject: [PATCH 11/28] Add cattaneo heating

---
 src/heatflow/mod_heating.f90 | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/heatflow/mod_heating.f90 b/src/heatflow/mod_heating.f90
index bb4a1d3..5d95655 100644
--- a/src/heatflow/mod_heating.f90
+++ b/src/heatflow/mod_heating.f90
@@ -14,7 +14,7 @@ module Heating
   use constants, only: real12, int12, pi, StefBoltz
   use globe_data, only: Temp_p, Temp_pp, Heat, heated_volume, Q_P, heated_temp
   use inputs, only: nx,ny,nz, grid, NA, power_in, time_step, heated_steps, T_System, freq, ntime, &
-       T_Bath
+       T_Bath, icattaneo
   use materials, only: material
   implicit none
 contains
@@ -81,6 +81,15 @@ subroutine heater(itime, Q, Qdens)
                 else
                    Q(IA) = 0.0_real12
                 end if
+               
+                if (icattaneo .eq. 1) then
+                if (itime .eq. 1) then
+                  Q(IA) = Q(IA) + (tau*(POWER))
+                  end if
+                if (itime .eq. heated_steps+1) then
+                  Q(IA) = Q(IA) - (tau*(POWER))
+                end if
+                end if 
                 !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
              case(3)
                 !------------------------------

From 5a375f5a4636f6cdc7bacb999d08bae445f997f7 Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Thu, 11 Sep 2025 19:57:00 +0100
Subject: [PATCH 12/28] Adjust heating

---
 src/heatflow/mod_heating.f90 | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/heatflow/mod_heating.f90 b/src/heatflow/mod_heating.f90
index 5d95655..f33eb5f 100644
--- a/src/heatflow/mod_heating.f90
+++ b/src/heatflow/mod_heating.f90
@@ -78,10 +78,6 @@ subroutine heater(itime, Q, Qdens)
                 !------------------------------
                 if ( time .le. time_pulse ) then
                    Q(IA) = POWER
-                else
-                   Q(IA) = 0.0_real12
-                end if
-               
                 if (icattaneo .eq. 1) then
                 if (itime .eq. 1) then
                   Q(IA) = Q(IA) + (tau*(POWER))
@@ -90,6 +86,12 @@ subroutine heater(itime, Q, Qdens)
                   Q(IA) = Q(IA) - (tau*(POWER))
                 end if
                 end if 
+                
+                else
+                   Q(IA) = 0.0_real12
+                end if
+               
+
                 !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
              case(3)
                 !------------------------------

From 3b4da8aa5a0ef966a7ad8cea39f66699447bc72c Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Wed, 1 Oct 2025 18:22:07 +0100
Subject: [PATCH 13/28] Working Bicgstab

---
 Makefile                           |  8 +++-
 src/heatflow/mod_boundary.f90      | 12 +++---
 src/heatflow/mod_evolve.f90        |  4 +-
 src/heatflow/mod_global.f90        |  4 +-
 src/heatflow/mod_hmatrix.f90       | 12 +++---
 src/heatflow/mod_sparse_solver.f90 | 62 ++++++++++++++++++++++++++++--
 6 files changed, 81 insertions(+), 21 deletions(-)

diff --git a/Makefile b/Makefile
index a4750e5..7c65e97 100644
--- a/Makefile
+++ b/Makefile
@@ -42,9 +42,11 @@ MKL_INCLUDE_DIR = $(MKLROOT)/include
 #MKL_FLAGS = -L$(MKL_LIB_DIR) -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl
 MKL_FLAGS = -L$(MKL_LIB_DIR) -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl
 
-FFLAGS = -O3 -I$(MKL_INCLUDE_DIR)
+#FFLAGS = -O3 -I$(MKL_INCLUDE_DIR)
 MODULEFLAGS = -J$(BUILD_DIR)
 FC = gfortran
+NCORES := $(shell nproc)
+FFLAGS = -O3 -fopenmp -I$(MKL_INCLUDE_DIR)
 
 ##########################################
 # TARGETS
@@ -72,6 +74,10 @@ $(BUILD_DIR)/%.o: $(SRC_DIR)/%.f90 | $(BUILD_DIR)
 $(programs): $(OBJS) | $(BIN_DIR)
 	$(FC) -O3 -fopenmp $(OBJS) -o $@ $(MKL_FLAGS)
 
+.PHONY: run
+run: $(programs)
+    OMP_NUM_THREADS=$(NCORES) MKL_NUM_THREADS=$(NCORES) MKL_DYNAMIC=FALSE OMP_PROC_BIND=spread OMP_PLACES=cores ./bin/$(NAME)
+
 debug: FFLAGS = -O0 -Wall -g -ffpe-trap=invalid,zero,overflow,underflow -fbacktrace -fcheck=all -fbounds-check -I$(MKL_INCLUDE_DIR)
 debug: $(OBJS) | $(BIN_DIR)
 	$(FC) $(FFLAGS) $(OBJS) -o $(programs) $(MKL_FLAGS)
diff --git a/src/heatflow/mod_boundary.f90 b/src/heatflow/mod_boundary.f90
index cb8ab30..88ad7ba 100644
--- a/src/heatflow/mod_boundary.f90
+++ b/src/heatflow/mod_boundary.f90
@@ -84,7 +84,7 @@ subroutine boundary(B)
                             kappaHarm = (2*kappa*kappaBoundx1/(kappa+kappaBoundx1)) / &
                             (grid(ix, iy, iz)%Length(1)**2)
                             if (kappa .ne. kappaBoundx1) kappaHarm = kappaHarm*BR
-                            B(I) = B(I) + (kappaHarm) * T_Bathx1 + boundray_term_vel(1_int12,iy,iz,T_Bathx1)
+                            B(I) = B(I) + (kappaHarm) * T_Bathx1 !+ boundray_term_vel(1_int12,iy,iz,T_Bathx1)
                         end if
                     end if
                     if (ix .eq. nx) then
@@ -94,7 +94,7 @@ subroutine boundary(B)
                             kappaHarm = (2*kappa*kappaBoundNx/(kappa+kappaBoundNx)) / &
                             (grid(ix, iy, iz)%Length(1)**2)
                             if (kappa .ne. kappaBoundNx) kappaHarm = kappaHarm*BR
-                            B(I) = B(I) + (kappaHarm) * T_Bathx2 + boundray_term_vel(nx,iy,iz,T_Bathx2)
+                            B(I) = B(I) + (kappaHarm) * T_Bathx2 !+ boundray_term_vel(nx,iy,iz,T_Bathx2)
                         end if
                     end if
                 end if
@@ -107,7 +107,7 @@ subroutine boundary(B)
                             kappaHarm = (2*kappa*kappaBoundy1/(kappa+kappaBoundy1)) / &
                             (grid(ix, iy, iz)%Length(2)**2)
                             if (kappa .ne. kappaBoundy1) kappaHarm = kappaHarm*BR
-                            B(I) = B(I) + (kappaHarm) * T_Bathy1 + boundray_term_vel(ix,1_int12,iz,T_Bathy1)
+                            B(I) = B(I) + (kappaHarm) * T_Bathy1 !+ boundray_term_vel(ix,1_int12,iz,T_Bathy1)
                         end if
                     end if
                     if (iy .eq. ny) then
@@ -117,7 +117,7 @@ subroutine boundary(B)
                             kappaHarm = (2*kappa*kappaBoundNy/(kappa+kappaBoundNy)) / &
                             (grid(ix, iy, iz)%Length(2)**2)
                             if (kappa .ne. kappaBoundNy) kappaHarm = kappaHarm*BR
-                            B(I) = B(I) + (kappaHarm) * T_Bathy2 + boundray_term_vel(ix,ny,iz,T_Bathy2)
+                            B(I) = B(I) + (kappaHarm) * T_Bathy2 !+ boundray_term_vel(ix,ny,iz,T_Bathy2)
                         end if
                     end if
                 end if
@@ -130,7 +130,7 @@ subroutine boundary(B)
                             kappaHarm = (2*kappa*kappaBoundz1/(kappa+kappaBoundz1)) / &
                             (grid(ix, iy, iz)%Length(3)**2)
                             if (kappa .ne. kappaBoundz1) kappaHarm = kappaHarm*BR
-                            B(I) = B(I) + (kappaHarm) * T_Bathz1 + boundray_term_vel(ix,iy,1_int12,T_Bathz1)
+                            B(I) = B(I) + (kappaHarm) * T_Bathz1 !+ boundray_term_vel(ix,iy,1_int12,T_Bathz1)
                         end if
                     end if
                     if (iz .eq. nz) then
@@ -140,7 +140,7 @@ subroutine boundary(B)
                             kappaHarm = (2*kappa*kappaBoundNz/(kappa+kappaBoundNz)) / &
                             (grid(ix, iy, iz)%Length(3)**2)
                             if (kappa .ne. kappaBoundNz) kappaHarm = kappaHarm*BR
-                            B(I) = B(I) + (kappaHarm) * T_Bathz2 + boundray_term_vel(ix,iy,nz,T_Bathz2)
+                            B(I) = B(I) + (kappaHarm) * T_Bathz2 !+ boundray_term_vel(ix,iy,nz,T_Bathz2)
                         end if
                     end if
                 end if
diff --git a/src/heatflow/mod_evolve.f90 b/src/heatflow/mod_evolve.f90
index ada2569..3156bd2 100644
--- a/src/heatflow/mod_evolve.f90
+++ b/src/heatflow/mod_evolve.f90
@@ -150,9 +150,9 @@ subroutine simulate(itime)
     err=E
 
 
-   !  CALL bicgstab(acsr, ia, ja, S, itmax, x, x0, iter)
+    call bicgstab(acsr, ia, ja, S, itmax, Temp_p, x, iter)
 
-   CALL solve_pardiso(acsr, S, ia, ja, x)
+   ! CALL solve_pardiso(acsr, S, ia, ja, x)
    !  CALL linbcg(S,x,itol=int(itol,I4B),tol=tol, itmax=int(itmax,I4B), iter=iter, &
          ! err=E)
          
diff --git a/src/heatflow/mod_global.f90 b/src/heatflow/mod_global.f90
index 0fb61d6..439952c 100644
--- a/src/heatflow/mod_global.f90
+++ b/src/heatflow/mod_global.f90
@@ -14,8 +14,8 @@ module globe_data
   TYPE(sprs2_dp) :: ra !Techniqually rH
   TYPE(diag_sprs_dp) :: da !Techniqually dH
   real(real12), dimension(:), allocatable :: acsr
-  integer, dimension(:), allocatable :: ja
-  integer, dimension(:), allocatable :: ia
+  integer(kind=8), dimension(:), allocatable :: ja
+  integer(kind=8), dimension(:), allocatable :: ia
   character(len=1024) :: logname ! Log file name
    
 end module globe_data
diff --git a/src/heatflow/mod_hmatrix.f90 b/src/heatflow/mod_hmatrix.f90
index e1d268d..fb7b747 100644
--- a/src/heatflow/mod_hmatrix.f90
+++ b/src/heatflow/mod_hmatrix.f90
@@ -149,7 +149,7 @@ function hmatrixfunc(i, j) result(H)
         H=0.0_real12
       else
          H = A ! X left neighbor (left cell interaction)
-         H = H + calculate_convective_conductivity(xm, y, z, x, y, z)
+         !H = H + calculate_convective_conductivity(xm, y, z, x, y, z)
       end if
     end if 
 
@@ -158,7 +158,7 @@ function hmatrixfunc(i, j) result(H)
         H=0.0_real12
       else
         H = B  ! X right neighbor (right cell interaction)
-        H = H + calculate_convective_conductivity(xp, y, z, x, y, z)
+        !H = H + calculate_convective_conductivity(xp, y, z, x, y, z)
       end if
     end if
 
@@ -167,7 +167,7 @@ function hmatrixfunc(i, j) result(H)
         H=0.0_real12
       else
         H = D  ! Y down neighbor (down cell interaction)
-        H = H + calculate_convective_conductivity(x, ym, z, x, y, z)
+        !H = H + calculate_convective_conductivity(x, ym, z, x, y, z)
       end if 
     end if 
     if ((i-j) .eq. -nx) then
@@ -175,7 +175,7 @@ function hmatrixfunc(i, j) result(H)
         H=0.0_real12
       else
         H = E  ! Y up neighbor (up cell interaction)
-        H = H + calculate_convective_conductivity(x, yp, z, x, y, z)
+        !H = H + calculate_convective_conductivity(x, yp, z, x, y, z)
       end if 
     end if 
 
@@ -185,7 +185,7 @@ function hmatrixfunc(i, j) result(H)
      else
         !write(*,*) 'F   this is forward (in) z',F
          H = F  ! Z in neighbor (forward cell interaction) !!!Frank had this as G during testing
-         H = H + calculate_convective_conductivity(x, y, zm, x, y, z)
+         !H = H + calculate_convective_conductivity(x, y, zm, x, y, z)
       end if
     end if 
 
@@ -195,7 +195,7 @@ function hmatrixfunc(i, j) result(H)
      else  
         !write(*,*) 'G   this is backward (out) z?',G
         H = G  ! Z out neighbor (backward cell interaction) !!!Frank Had this as F during testing
-        H = H + calculate_convective_conductivity(x, y, zp, x, y, z)
+        !H = H + calculate_convective_conductivity(x, y, zp, x, y, z)
       end if
    end if
    
diff --git a/src/heatflow/mod_sparse_solver.f90 b/src/heatflow/mod_sparse_solver.f90
index 7242657..f085b87 100644
--- a/src/heatflow/mod_sparse_solver.f90
+++ b/src/heatflow/mod_sparse_solver.f90
@@ -56,8 +56,8 @@ subroutine coo2csr( nrow, &
     integer (kind=8), dimension(nnz), intent(in) :: ir
     integer (kind=8), dimension(nnz), intent(in) :: jc   
     real(8), dimension(nnz), intent(out) :: acsr
-    integer, dimension(nnz), intent(out) :: ja
-    integer, dimension(nrow+1), intent(out) :: ia
+    integer(kind=8), dimension(nnz), intent(out) :: ja
+    integer(kind=8), dimension(nrow+1), intent(out) :: ia
 
     ! Local variables.
     integer (kind=8) :: i, iad, j, k, k0
@@ -240,7 +240,8 @@ subroutine solve_pardiso( acsr, &
     integer, dimension(:), allocatable :: iparm  
     integer,dimension(1) :: idum
     real(8),dimension(1) :: ddum
-    
+        integer :: badcol, missing_diag, k
+    logical :: found
     n = size(b,1)
     nnz = size(acsr,1)
     nrhs = 1
@@ -284,7 +285,60 @@ subroutine solve_pardiso( acsr, &
     do i=1,64
        pt(i)%dummy =  0
     end do
-    
+
+    !---------------- CSR integrity / diagnostic checks ----------------
+
+    badcol = 0
+
+    write(*,*) 'PARDISO debug:'
+    write(*,*) ' n       =', n
+    write(*,*) ' nnz     =', nnz
+    write(*,*) ' ia(1)   =', ia(1), ' ia(n+1)=', ia(n+1), ' ia(n+1)-1=', ia(n+1)-1
+
+    if (ia(1) /= 1) stop 'ERROR: ia(1) must be 1'
+    if (ia(n+1)-1 /= nnz) stop 'ERROR: ia end mismatch'
+
+    do i=1,n
+       if (ia(i) > ia(i+1)) then
+          write(*,*) 'Row pointer decreases at row', i
+          stop 'ERROR: ia not monotone'
+       end if
+    end do
+
+    do k=1,nnz
+       if (ja(k) < 1 .or. ja(k) > n) then
+          badcol = badcol + 1
+          if (badcol <= 10) write(*,*) 'Bad column index k=',k,' ja=',ja(k)
+       end if
+    end do
+    if (badcol > 0) then
+       write(*,*) 'Total bad columns =', badcol
+       stop 'ERROR: invalid ja entries'
+    end if
+
+    ! Check each row has a diagonal and (optionally) detect duplicates
+    missing_diag = 0
+    do i=1,n
+       found = .false.
+       if (ia(i) < ia(i+1)) then
+          ! simple duplicate check (requires row segment unsorted ascending to be meaningful)
+          do k = ia(i), ia(i+1)-1
+             if (ja(k) == i) then
+                if (acsr(k) == 0.0d0) then
+                   write(*,*) 'Zero diagonal at row', i
+                   stop 'ERROR: zero diagonal'
+                end if
+                found = .true.
+             end if
+          end do
+       end if
+       if (.not. found) then
+          missing_diag = missing_diag + 1
+          if (missing_diag <= 10) write(*,*) 'Missing diagonal at row', i
+       end if
+    end do
+    if (missing_diag > 0) stop 'ERROR: missing diagonals'
+    !-------------------------------------------------------------------
     phase = 11 ! Only reordering and symbolic factorization
     
     call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, &

From 2bd3703af911b879ce0c7fd1da5c275057c6c801 Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Wed, 1 Oct 2025 19:15:36 +0100
Subject: [PATCH 14/28] Switch to petsc

---
 Makefile                           | 188 ++++++++++++++++++-----------
 src/heatflow.f90                   |   6 +-
 src/heatflow/mod_evolve.f90        |  22 +++-
 src/heatflow/mod_petsc_solver.f90  |  94 +++++++++++++++
 src/heatflow/mod_sparse_solver.f90 |   4 +-
 5 files changed, 235 insertions(+), 79 deletions(-)
 create mode 100644 src/heatflow/mod_petsc_solver.f90

diff --git a/Makefile b/Makefile
index 7c65e97..840a5a8 100644
--- a/Makefile
+++ b/Makefile
@@ -1,89 +1,133 @@
 ####################################################################
-#   11 Jun 2024                                                    #
+# HeatFlow Build System (MKL + optional PETSc + OpenMP)
 ####################################################################
-SHELL = /bin/sh
-PLAT = _linux
-
-##########################################
-# CODE DIRECTORIES AND FILES
-##########################################
-mkfile_path := $(abspath $(firstword $(MAKEFILE_LIST)))
-mkfile_dir := $(dir $(mkfile_path))
-BIN_DIR := ./bin
-SRC_DIR := ./src
-BUILD_DIR = ./obj
-
-SRCS := heatflow/mod_constants.f90 \
-		heatflow/mod_constructions.f90 \
-		heatflow/mod_SPtype.f90 \
-		heatflow/mod_global.f90 \
-		heatflow/mod_Sparse.f90 \
-		heatflow/mod_inputs.f90 \
-		heatflow/mod_material.f90 \
-		heatflow/mod_hmatrix.f90 \
-		heatflow/mod_init_evolve.f90 \
-		heatflow/mkl_pardiso.f90 \
-		heatflow/mod_sparse_solver.f90 \
-		heatflow/mod_setup.f90 \
-		heatflow/mod_boundary.f90 \
-		heatflow/mod_heating.f90 \
-		heatflow/mod_cattaneo.f90 \
-		heatflow/mod_tempdep.f90 \
-		heatflow/mod_evolve.f90 \
-        heatflow/mod_output.f90 \
-        heatflow.f90
 
-OBJS := $(addprefix $(BUILD_DIR)/,$(notdir $(SRCS:.f90=.o)))
-
-# MKL configuration
-MKLROOT ?= /opt/intel/oneapi/mkl/latest
-MKL_LIB_DIR = $(MKLROOT)/lib/intel64
-MKL_INCLUDE_DIR = $(MKLROOT)/include
-#MKL_FLAGS = -L$(MKL_LIB_DIR) -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl
-MKL_FLAGS = -L$(MKL_LIB_DIR) -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl
+SHELL        = /bin/sh
+
+# Directories
+SRC_DIR      := ./src
+BUILD_DIR    := ./obj
+BIN_DIR      := ./bin
+
+# Compiler
+FC           := gfortran
+
+# Core count
+NCORES       := $(shell nproc)
+
+# MKL
+MKLROOT      ?= /opt/intel/oneapi/mkl/latest
+MKL_LIB_DIR  := $(MKLROOT)/lib/intel64
+MKL_INCLUDE  := $(MKLROOT)/include
+MKL_FLAGS    := -L$(MKL_LIB_DIR) -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl
+
+# PETSc (manual fallback if petsc-config missing)
+PETSC_PREFIX    := /usr/lib/petscdir/petsc3.15/x86_64-linux-gnu-real
+PETSC_FINCLUDE  := /usr/share/petsc/3.15/include
+PETSC_AINCLUDE  := $(PETSC_PREFIX)/include
+PETSC_LIBDIR    := $(PETSC_PREFIX)/lib
+
+PETSC_CONFIG := $(shell command -v petsc-config 2>/dev/null)
+ifeq ($(PETSC_CONFIG),)
+  PETSC_INC  := -I$(PETSC_FINCLUDE) -I$(PETSC_AINCLUDE)
+  PETSC_LIB  := -L$(PETSC_LIBDIR) -lpetsc
+  PETSC_NOTE := (PETSc manual paths)
+else
+  PETSC_INC  := $(shell petsc-config --cflags)
+  PETSC_LIB  := $(shell petsc-config --libs)
+  PETSC_NOTE := (petsc-config)
+endif
+
+# Flags
+OPTFLAGS    := -O3
+OMPFLAGS    := -fopenmp
+WARNFLAGS   := -Wall
+MODDIR_FLAG := -J$(BUILD_DIR)
+
+FFLAGS      := -cpp $(OPTFLAGS) $(OMPFLAGS) $(WARNFLAGS) -I$(MKL_INCLUDE) $(PETSC_INC) $(MODDIR_FLAG)
+DEBUGFLAGS  := -cpp -O0 -g -fcheck=all -fbacktrace -ffpe-trap=invalid,zero,overflow,underflow -fbounds-check -I$(MKL_INCLUDE) $(PETSC_INC) $(MODDIR_FLAG)
+
+# Program
+NAME    := ThermalFlow.x
+TARGET  := $(BIN_DIR)/$(NAME)
+
+# Sources (module order)
+SRCS := \
+  heatflow/mod_constants.f90 \
+  heatflow/mod_constructions.f90 \
+  heatflow/mod_SPtype.f90 \
+  heatflow/mod_global.f90 \
+  heatflow/mod_Sparse.f90 \
+  heatflow/mod_inputs.f90 \
+  heatflow/mod_material.f90 \
+  heatflow/mod_hmatrix.f90 \
+  heatflow/mod_init_evolve.f90 \
+  heatflow/mkl_pardiso.f90 \
+  heatflow/mod_sparse_solver.f90 \
+  heatflow/mod_petsc_solver.f90 \
+  heatflow/mod_boundary.f90 \
+  heatflow/mod_heating.f90 \
+  heatflow/mod_cattaneo.f90 \
+  heatflow/mod_tempdep.f90 \
+  heatflow/mod_evolve.f90 \
+  heatflow/mod_output.f90 \
+  heatflow/mod_setup.f90 \
+  heatflow.f90
 
-#FFLAGS = -O3 -I$(MKL_INCLUDE_DIR)
-MODULEFLAGS = -J$(BUILD_DIR)
-FC = gfortran
-NCORES := $(shell nproc)
-FFLAGS = -O3 -fopenmp -I$(MKL_INCLUDE_DIR)
-
-##########################################
-# TARGETS
-##########################################
-NAME = ThermalFlow.x
-programs = $(BIN_DIR)/$(NAME)
+OBJS := $(addprefix $(BUILD_DIR)/,$(notdir $(SRCS:.f90=.o)))
 
-.PHONY: all debug clean OMP
+.PHONY: all debug clean distclean run help show
 
-all: $(programs)
+all: show $(TARGET)
 
-$(BIN_DIR):
-	mkdir -p $@
+show:
+	@printf 'Building %s %s\n' '$(NAME)' '$(PETSC_NOTE)'
 
-$(BUILD_DIR):
+$(BIN_DIR) $(BUILD_DIR):
 	mkdir -p $@
 
-# Pattern rule for compiling Fortran files
+# Compile module sources
 $(BUILD_DIR)/%.o: $(SRC_DIR)/heatflow/%.f90 | $(BUILD_DIR)
-	$(FC) $(FFLAGS) $(MODULEFLAGS) -c $< -o $@
-
-$(BUILD_DIR)/%.o: $(SRC_DIR)/%.f90 | $(BUILD_DIR)
-	$(FC) $(FFLAGS) $(MODULEFLAGS) -c $< -o $@
+	$(FC) $(FFLAGS) -c $< -o $@
 
-$(programs): $(OBJS) | $(BIN_DIR)
-	$(FC) -O3 -fopenmp $(OBJS) -o $@ $(MKL_FLAGS)
+# Main program
+$(BUILD_DIR)/heatflow.o: $(SRC_DIR)/heatflow.f90 | $(BUILD_DIR)
+	$(FC) $(FFLAGS) -c $< -o $@
 
-.PHONY: run
-run: $(programs)
-    OMP_NUM_THREADS=$(NCORES) MKL_NUM_THREADS=$(NCORES) MKL_DYNAMIC=FALSE OMP_PROC_BIND=spread OMP_PLACES=cores ./bin/$(NAME)
+# Link (single definition)
+$(TARGET): $(BIN_DIR) $(OBJS)
+	$(FC) $(OPTFLAGS) $(OMPFLAGS) $(OBJS) -o $@ $(MKL_FLAGS) $(PETSC_LIB) -Wl,-rpath,$(PETSC_LIBDIR)
 
-debug: FFLAGS = -O0 -Wall -g -ffpe-trap=invalid,zero,overflow,underflow -fbacktrace -fcheck=all -fbounds-check -I$(MKL_INCLUDE_DIR)
-debug: $(OBJS) | $(BIN_DIR)
-	$(FC) $(FFLAGS) $(OBJS) -o $(programs) $(MKL_FLAGS)
+debug: FFLAGS = $(DEBUGFLAGS)
+debug: clean show $(TARGET)
 
-OMP: $(programs)
-	./util/DShell/omp_exec.sh
+run: $(TARGET)
+	OMP_NUM_THREADS=$(NCORES) \
+	MKL_NUM_THREADS=$(NCORES) \
+	MKL_DYNAMIC=FALSE \
+	OMP_PROC_BIND=spread \
+	OMP_PLACES=cores \
+	$< $(RUN_ARGS)
 
 clean:
-	rm -f $(BUILD_DIR)/*.o $(BUILD_DIR)/*.mod $(programs)
+	@echo "[CLEAN] objects and modules"
+	@rm -f $(BUILD_DIR)/*.o $(BUILD_DIR)/*.mod
+
+distclean: clean
+	@echo "[CLEAN] executable"
+	@rm -f $(TARGET)
+
+help:
+	@echo "Targets:"
+	@echo "  make / make all    - build optimized"
+	@echo "  make debug         - debug build"
+	@echo "  make run           - run with all cores"
+	@echo "  make clean         - remove objects/modules"
+	@echo "  make distclean     - remove executable"
+	@echo "Variables:"
+	@echo "  RUN_ARGS='-ksp_type cg -pc_type gamg -ksp_rtol 1e-8 -ksp_monitor'"
+	@echo "Parallel build: make -j$(NCORES)"
+
+####################################################################
+# End
+####################################################################
\ No newline at end of file
diff --git a/src/heatflow.f90 b/src/heatflow.f90
index 4b4df6e..135482b 100644
--- a/src/heatflow.f90
+++ b/src/heatflow.f90
@@ -28,6 +28,7 @@ program HEATFLOW_V0_3
   use evolution, only: simulate
   use setup, only: set_global_variables
   use INITIAL, only: initial_evolve
+  use petsc_solver, only: petsc_init, petsc_finalize
 
   implicit none
    real(real12) :: cpustart, cpuend, cpustart2, progress
@@ -79,8 +80,11 @@ program HEATFLOW_V0_3
       ! CALL initial_evolve to set systems initial Temperature conditions      
       if (itime .eq. 1) CALL initial_evolve                      
       
-      ! run the time evolution                                   
+      ! run the time evolution  
+      CALL petsc_init()                                 
       CALL simulate(itime)
+      CALL petsc_finalize()
+
                                                 
                              
       ! Write results                           
diff --git a/src/heatflow/mod_evolve.f90 b/src/heatflow/mod_evolve.f90
index 3156bd2..22aac5a 100644
--- a/src/heatflow/mod_evolve.f90
+++ b/src/heatflow/mod_evolve.f90
@@ -31,7 +31,8 @@ module evolution
   use boundary_vector, only: boundary
   use cattaneo, only: S_catS
 !   use tempdep, only: ChangeProp 
-  use sparse_solver, only: bicgstab, solve_pardiso
+   use petsc_solver, only: solve_petsc_csr
+
   implicit none
 
   private
@@ -51,7 +52,9 @@ subroutine simulate(itime)
     integer:: ncg, itol, itmax !, iss
     integer :: iter
     real(real12) :: e, err, tol
-    
+    integer, allocatable :: ia32(:), ja32(:)   ! 32-bit copies for PETSc
+    integer :: NA32
+
     !----------------------
     ! Initialize vectors
     !----------------------
@@ -149,8 +152,19 @@ subroutine simulate(itime)
     iter= 0
     err=E
 
-
-    call bicgstab(acsr, ia, ja, S, itmax, Temp_p, x, iter)
+   print *, "Calling solver..."
+   !  call bicgstab(acsr, ia, ja, S, itmax, Temp_p, x, iter)
+  if (.not. allocated(ia32)) then
+     allocate(ia32(size(ia)), ja32(size(ja)))
+     ia32 = int(ia, kind=kind(ia32))
+     ja32 = int(ja, kind=kind(ja32))
+  end if
+   NA32 = int(NA, kind=kind(NA32))
+   allocate(x(NA))
+   call solve_petsc_csr(NA32, ia32, ja32, acsr, S, x, tol, itmax)
+  if (allocated(ia32)) deallocate(ia32, ja32)
+
+   print *, "Solver finished."
 
    ! CALL solve_pardiso(acsr, S, ia, ja, x)
    !  CALL linbcg(S,x,itol=int(itol,I4B),tol=tol, itmax=int(itmax,I4B), iter=iter, &
diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90
new file mode 100644
index 0000000..ceda8a6
--- /dev/null
+++ b/src/heatflow/mod_petsc_solver.f90
@@ -0,0 +1,94 @@
+module petsc_solver
+#include "petsc/finclude/petscsys.h"
+#include "petsc/finclude/petscksp.h"
+  use petscksp
+  implicit none
+  private
+  public :: petsc_init, petsc_finalize, solve_petsc_csr
+
+contains
+
+  subroutine petsc_init()
+    integer :: ierr
+    call PetscInitialize(PETSC_NULL_CHARACTER, ierr)
+  end subroutine petsc_init
+
+  subroutine petsc_finalize()
+    integer :: ierr
+    call PetscFinalize(ierr)
+  end subroutine petsc_finalize
+
+  subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
+    integer,  intent(in) :: n
+    integer,  intent(in) :: ia(:), ja(:)
+    real(8),  intent(in) :: aval(:), b(:)
+    real(8),  intent(inout) :: x(:)
+    real(8),  intent(in) :: rtol
+    integer,  intent(in) :: maxit
+    Mat :: A
+    Vec :: bb, xx
+    KSP :: ksp
+    PC  :: pc
+    integer :: ierr, i, row_nz, start_k
+    integer, allocatable :: cols0(:), idx(:)
+    real(8), allocatable :: vals(:)
+    real(8), pointer :: xptr(:)
+
+    if (size(ia) /= n+1) stop 'solve_petsc_csr: ia size mismatch'
+    if (size(b) /= n .or. size(x) /= n) stop 'solve_petsc_csr: vector size mismatch'
+
+    ! Create matrix with an estimated 7 nonzeros/row (adjust if needed)
+    call MatCreateSeqAIJ(PETSC_COMM_SELF, n, n, 7, PETSC_NULL_INTEGER, A, ierr)
+
+    do i = 1, n
+       row_nz = ia(i+1) - ia(i)
+       if (row_nz > 0) then
+          start_k = ia(i)
+          allocate(cols0(row_nz), vals(row_nz))
+          cols0 = ja(start_k:start_k+row_nz-1) - 1      ! zero-based
+          vals  = aval(start_k:start_k+row_nz-1)
+          call MatSetValues(A, 1, (/i-1/), row_nz, cols0, vals, INSERT_VALUES, ierr)
+          deallocate(cols0, vals)
+       end if
+    end do
+    call MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY, ierr)
+    call MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY, ierr)
+
+    ! Create vectors
+    call VecCreateSeq(PETSC_COMM_SELF, n, bb, ierr)
+    call VecCreateSeq(PETSC_COMM_SELF, n, xx, ierr)
+
+    ! Set RHS and initial guess
+    allocate(idx(n))
+    idx = [(i-1, i=1,n)]
+    call VecSetValues(bb, n, idx, b, INSERT_VALUES, ierr)
+    call VecAssemblyBegin(bb,ierr); call VecAssemblyEnd(bb,ierr)
+
+    call VecSetValues(xx, n, idx, x, INSERT_VALUES, ierr)
+    call VecAssemblyBegin(xx,ierr); call VecAssemblyEnd(xx,ierr)
+
+    ! KSP setup
+    call KSPCreate(PETSC_COMM_SELF, ksp, ierr)
+    call KSPSetOperators(ksp, A, A, ierr)     ! 3-arg form (reuse automatically)
+    call KSPGetPC(ksp, pc, ierr)
+    call PCSetType(pc, PCILU, ierr)           ! Override at runtime with -pc_type
+    call KSPSetType(ksp, KSPCG, ierr)         ! Use -ksp_type bcgs if not SPD
+    call KSPSetTolerances(ksp, rtol, PETSC_DEFAULT_REAL, PETSC_DEFAULT_REAL, maxit, ierr)
+    call KSPSetFromOptions(ksp, ierr)
+
+    call KSPSolve(ksp, bb, xx, ierr)
+
+    ! Extract solution
+    call VecGetArrayF90(xx, xptr, ierr)
+    x = xptr
+    call VecRestoreArrayF90(xx, xptr, ierr)
+
+    ! Cleanup
+    deallocate(idx)
+    call KSPDestroy(ksp, ierr)
+    call VecDestroy(bb, ierr)
+    call VecDestroy(xx, ierr)
+    call MatDestroy(A, ierr)
+  end subroutine solve_petsc_csr
+
+end module petsc_solver
\ No newline at end of file
diff --git a/src/heatflow/mod_sparse_solver.f90 b/src/heatflow/mod_sparse_solver.f90
index f085b87..8bb24d0 100644
--- a/src/heatflow/mod_sparse_solver.f90
+++ b/src/heatflow/mod_sparse_solver.f90
@@ -139,7 +139,6 @@ subroutine bicgstab( acsr, &
     real(8), dimension(:), allocatable :: r, p, s, rst, temp1, temp2
     
     n = size(b,1)
-    
     allocate(x(n))
     allocate(r(n))
     allocate(p(n))
@@ -150,6 +149,7 @@ subroutine bicgstab( acsr, &
     
     call mkl_dcsrgemv("N",n,acsr,ia,ja,x,temp1)
     
+    print *, "Initial residual norm: ", norm2(b - temp1)
     r = b - temp1
 
     call random_number(rst)
@@ -163,7 +163,7 @@ subroutine bicgstab( acsr, &
     delta0 = delta
 
     do i = 1, maxiter
-       
+       print *, "Iteration ", i
        if ( norm2(r) /= norm2(r) ) then
           write(*,'(a)') "Error in solver: residual NaN"
           exit

From 763ec50e5c19fbdbc8713bb8d37af2031ff57f4f Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Mon, 6 Oct 2025 11:22:12 +0100
Subject: [PATCH 15/28] Fix petsc finaliser

---
 src/heatflow.f90            | 8 +++++---
 src/heatflow/mod_evolve.f90 | 2 --
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/heatflow.f90 b/src/heatflow.f90
index 135482b..d1db700 100644
--- a/src/heatflow.f90
+++ b/src/heatflow.f90
@@ -65,6 +65,8 @@ program HEATFLOW_V0_3
    !-------------------------------------------------------------!
    ! run simulation for 'ntime' time steps                       !
    !-------------------------------------------------------------!
+   CALL petsc_init()                                 
+
    do itime=1,ntime 
 
       if (iverb.eq.0) then
@@ -81,9 +83,7 @@ program HEATFLOW_V0_3
       if (itime .eq. 1) CALL initial_evolve                      
       
       ! run the time evolution  
-      CALL petsc_init()                                 
       CALL simulate(itime)
-      CALL petsc_finalize()
 
                                                 
                              
@@ -91,7 +91,9 @@ program HEATFLOW_V0_3
       CALL data_write(itime) 
       if (IVERB.ge.3) CALL final_print                           
                                                                  
-   end do                                                        
+   end do  
+   CALL petsc_finalize()
+                                                      
    !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^!
 
    !-------------------------------------------------------------!
diff --git a/src/heatflow/mod_evolve.f90 b/src/heatflow/mod_evolve.f90
index 22aac5a..5a857cd 100644
--- a/src/heatflow/mod_evolve.f90
+++ b/src/heatflow/mod_evolve.f90
@@ -152,7 +152,6 @@ subroutine simulate(itime)
     iter= 0
     err=E
 
-   print *, "Calling solver..."
    !  call bicgstab(acsr, ia, ja, S, itmax, Temp_p, x, iter)
   if (.not. allocated(ia32)) then
      allocate(ia32(size(ia)), ja32(size(ja)))
@@ -164,7 +163,6 @@ subroutine simulate(itime)
    call solve_petsc_csr(NA32, ia32, ja32, acsr, S, x, tol, itmax)
   if (allocated(ia32)) deallocate(ia32, ja32)
 
-   print *, "Solver finished."
 
    ! CALL solve_pardiso(acsr, S, ia, ja, x)
    !  CALL linbcg(S,x,itol=int(itol,I4B),tol=tol, itmax=int(itmax,I4B), iter=iter, &

From ac8835cce2dbc158174db28592c8c304b7a7574e Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Fri, 14 Nov 2025 12:57:14 +0000
Subject: [PATCH 16/28] Working petsc, not fully reproducing old yet

---
 src/.vscode/settings.json         |  5 +++++
 src/heatflow/mod_evolve.f90       | 24 ++++++++++++++++--------
 src/heatflow/mod_petsc_solver.f90 | 29 ++++++++++++++++++++++++++---
 3 files changed, 47 insertions(+), 11 deletions(-)
 create mode 100644 src/.vscode/settings.json

diff --git a/src/.vscode/settings.json b/src/.vscode/settings.json
new file mode 100644
index 0000000..a8c2003
--- /dev/null
+++ b/src/.vscode/settings.json
@@ -0,0 +1,5 @@
+{
+    "python-envs.defaultEnvManager": "ms-python.python:conda",
+    "python-envs.defaultPackageManager": "ms-python.python:conda",
+    "python-envs.pythonProjects": []
+}
\ No newline at end of file
diff --git a/src/heatflow/mod_evolve.f90 b/src/heatflow/mod_evolve.f90
index 5a857cd..5921610 100644
--- a/src/heatflow/mod_evolve.f90
+++ b/src/heatflow/mod_evolve.f90
@@ -153,22 +153,30 @@ subroutine simulate(itime)
     err=E
 
    !  call bicgstab(acsr, ia, ja, S, itmax, Temp_p, x, iter)
-  if (.not. allocated(ia32)) then
-     allocate(ia32(size(ia)), ja32(size(ja)))
-     ia32 = int(ia, kind=kind(ia32))
-     ja32 = int(ja, kind=kind(ja32))
-  end if
-   NA32 = int(NA, kind=kind(NA32))
+   
+   ! Allocate and initialize x with a good initial guess
    allocate(x(NA))
+   x = Temp_p + (Temp_p - Temp_pp)
+   if (any(x - Temp_p .lt. TINY)) x = x + TINY ! avoid nan solver issue
+   
+   ! Convert to 32-bit integers for PETSc (only on first call)
+   if (.not. allocated(ia32)) then
+      allocate(ia32(size(ia)), ja32(size(ja)))
+      ia32 = int(ia, kind=kind(ia32))
+      ja32 = int(ja, kind=kind(ja32))
+   end if
+   NA32 = int(NA, kind=kind(NA32))
+   
    call solve_petsc_csr(NA32, ia32, ja32, acsr, S, x, tol, itmax)
-  if (allocated(ia32)) deallocate(ia32, ja32)
+   
+   ! Note: Don't deallocate ia32, ja32 - keep them for next time step
 
 
    ! CALL solve_pardiso(acsr, S, ia, ja, x)
    !  CALL linbcg(S,x,itol=int(itol,I4B),tol=tol, itmax=int(itmax,I4B), iter=iter, &
          ! err=E)
          
-   ! 
+   !
     if (any(isnan(x(:)))) then
        write(0,*) "fatal error: NAN in x tempurature vector"
        write(0,*) 'time step ', itime, "      T   ", sum(Temp_p)/size(Temp_p), E ,iter
diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90
index ceda8a6..b24ac17 100644
--- a/src/heatflow/mod_petsc_solver.f90
+++ b/src/heatflow/mod_petsc_solver.f90
@@ -29,10 +29,12 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
     Vec :: bb, xx
     KSP :: ksp
     PC  :: pc
-    integer :: ierr, i, row_nz, start_k
+    integer :: ierr, i, row_nz, start_k, its
     integer, allocatable :: cols0(:), idx(:)
     real(8), allocatable :: vals(:)
     real(8), pointer :: xptr(:)
+    KSPConvergedReason :: reason
+    real(8) :: rnorm
 
     if (size(ia) /= n+1) stop 'solve_petsc_csr: ia size mismatch'
     if (size(b) /= n .or. size(x) /= n) stop 'solve_petsc_csr: vector size mismatch'
@@ -71,12 +73,33 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
     call KSPCreate(PETSC_COMM_SELF, ksp, ierr)
     call KSPSetOperators(ksp, A, A, ierr)     ! 3-arg form (reuse automatically)
     call KSPGetPC(ksp, pc, ierr)
-    call PCSetType(pc, PCILU, ierr)           ! Override at runtime with -pc_type
-    call KSPSetType(ksp, KSPCG, ierr)         ! Use -ksp_type bcgs if not SPD
+    call PCSetType(pc, PCJACOBI, ierr)        ! Use Jacobi (diagonal) preconditioner to match linbcg
+    call KSPSetType(ksp, KSPBCGS, ierr)       ! Use BCGS to match linbcg behavior
+    
+    ! Set convergence tolerances
+    ! rtol = relative tolerance, atol = absolute tolerance (use default), dtol = divergence tolerance, maxits = max iterations
     call KSPSetTolerances(ksp, rtol, PETSC_DEFAULT_REAL, PETSC_DEFAULT_REAL, maxit, ierr)
+    
+    ! Use unpreconditioned norm (matching linbcg with itol=1)
+    call KSPSetNormType(ksp, KSP_NORM_UNPRECONDITIONED, ierr)
+    
+    ! Allow command line override of solver options
     call KSPSetFromOptions(ksp, ierr)
 
     call KSPSolve(ksp, bb, xx, ierr)
+    
+    ! Check convergence
+    call KSPGetConvergedReason(ksp, reason, ierr)
+    call KSPGetIterationNumber(ksp, its, ierr)
+    call KSPGetResidualNorm(ksp, rnorm, ierr)
+    
+    if (reason < 0) then
+       write(0,*) "WARNING: PETSc solver diverged or failed!"
+       write(0,*) "  Reason code:", reason
+       write(0,*) "  Iterations:", its
+       write(0,*) "  Residual norm:", rnorm
+       ! Don't stop - let the main code detect NaNs if needed
+    end if
 
     ! Extract solution
     call VecGetArrayF90(xx, xptr, ierr)

From 2e2617772c593d4e322d6a3d3a8b07e5f71ae170 Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Fri, 14 Nov 2025 14:54:39 +0000
Subject: [PATCH 17/28] Fixed petsc

---
 src/heatflow/mod_evolve.f90        | 33 ++++++++++++++++++-
 src/heatflow/mod_heating.f90       |  2 +-
 src/heatflow/mod_petsc_solver.f90  | 52 +++++++++++++++++++++++++++++-
 src/heatflow/mod_setup.f90         |  5 ++-
 src/heatflow/mod_sparse_solver.f90 | 39 +++++++++++++++++++---
 5 files changed, 120 insertions(+), 11 deletions(-)
 mode change 100644 => 100755 src/heatflow/mod_setup.f90

diff --git a/src/heatflow/mod_evolve.f90 b/src/heatflow/mod_evolve.f90
index 5921610..67af070 100644
--- a/src/heatflow/mod_evolve.f90
+++ b/src/heatflow/mod_evolve.f90
@@ -37,6 +37,9 @@ module evolution
 
   private
   public :: simulate
+  
+  ! Module-level variables for PETSc (persist across time steps)
+  integer, allocatable, save :: ia32(:), ja32(:)   ! 32-bit copies for PETSc
 
 contains
 
@@ -52,7 +55,6 @@ subroutine simulate(itime)
     integer:: ncg, itol, itmax !, iss
     integer :: iter
     real(real12) :: e, err, tol
-    integer, allocatable :: ia32(:), ja32(:)   ! 32-bit copies for PETSc
     integer :: NA32
 
     !----------------------
@@ -117,6 +119,16 @@ subroutine simulate(itime)
     !---------------------------------------------
     if ( iSteady .eq. 0 ) then
        S = - inverse_time * Temp_p * lin_rhoc - Qdens - B
+       if (IVERB .gt. 3) then
+          write(*,*) "S construction diagnostics:"
+          write(*,*) "  inverse_time =", inverse_time
+          write(*,*) "  Temp_p avg =", sum(Temp_p)/size(Temp_p)
+          write(*,*) "  lin_rhoc avg =", sum(lin_rhoc)/size(lin_rhoc)
+          write(*,*) "  Qdens avg =", sum(Qdens)/size(Qdens)
+          write(*,*) "  B avg =", sum(B)/size(B)
+          write(*,*) "  -inverse_time*Temp_p*lin_rhoc avg =", sum(-inverse_time*Temp_p*lin_rhoc)/size(Temp_p)
+          write(*,*) "  S before S_CAT avg =", sum(S)/size(S)
+       end if
        if ( iCAttaneo  .eq. 1) then
           S = S + S_CAT
        end if
@@ -159,6 +171,17 @@ subroutine simulate(itime)
    x = Temp_p + (Temp_p - Temp_pp)
    if (any(x - Temp_p .lt. TINY)) x = x + TINY ! avoid nan solver issue
    
+   ! Debug: Print initial guess statistics
+   if (IVERB .gt. 3) then
+      write(*,*) "========== PETSc Solver Diagnostics =========="
+      write(*,*) "Time step:", itime
+      write(*,*) "Initial guess x: min=", minval(x), " max=", maxval(x), " avg=", sum(x)/size(x)
+      write(*,*) "RHS S: min=", minval(S), " max=", maxval(S), " avg=", sum(S)/size(S)
+      write(*,*) "Temp_p: min=", minval(Temp_p), " max=", maxval(Temp_p), " avg=", sum(Temp_p)/size(Temp_p)
+      write(*,*) "Matrix acsr: min=", minval(acsr), " max=", maxval(acsr), " avg=", sum(acsr)/size(acsr)
+      write(*,*) "Matrix size: n=", NA32, " nnz=", size(acsr)
+   end if
+   
    ! Convert to 32-bit integers for PETSc (only on first call)
    if (.not. allocated(ia32)) then
       allocate(ia32(size(ia)), ja32(size(ja)))
@@ -169,6 +192,14 @@ subroutine simulate(itime)
    
    call solve_petsc_csr(NA32, ia32, ja32, acsr, S, x, tol, itmax)
    
+   ! Debug: Print solution statistics and verify solution
+   if (IVERB .gt. 3) then
+      write(*,*) "Solution x after PETSc: min=", minval(x), " max=", maxval(x), " avg=", sum(x)/size(x)
+      write(*,*) "Temperature change: avg(x-Temp_p)=", sum(x-Temp_p)/size(x)
+      write(*,*) "Max temperature change: ", maxval(abs(x-Temp_p))
+      write(*,*) "=============================================="
+   end if
+   
    ! Note: Don't deallocate ia32, ja32 - keep them for next time step
 
 
diff --git a/src/heatflow/mod_heating.f90 b/src/heatflow/mod_heating.f90
index f33eb5f..aa070ac 100644
--- a/src/heatflow/mod_heating.f90
+++ b/src/heatflow/mod_heating.f90
@@ -181,7 +181,7 @@ subroutine heater(itime, Q, Qdens)
    
 
     ! Normalize all heat sources by the heated volume
-    shared_power = .False.
+    shared_power = .TRUE.
     if (shared_power) then
       if (heated_volume .gt. 0.0) then
          Qdens(:) = Q(:) / heated_volume
diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90
index b24ac17..dd6c73a 100644
--- a/src/heatflow/mod_petsc_solver.f90
+++ b/src/heatflow/mod_petsc_solver.f90
@@ -42,19 +42,34 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
     ! Create matrix with an estimated 7 nonzeros/row (adjust if needed)
     call MatCreateSeqAIJ(PETSC_COMM_SELF, n, n, 7, PETSC_NULL_INTEGER, A, ierr)
 
+    ! Fill matrix from CSR format (ia, ja are 1-based Fortran indexing)
     do i = 1, n
        row_nz = ia(i+1) - ia(i)
        if (row_nz > 0) then
           start_k = ia(i)
           allocate(cols0(row_nz), vals(row_nz))
-          cols0 = ja(start_k:start_k+row_nz-1) - 1      ! zero-based
+          ! Convert column indices from 1-based to 0-based for PETSc
+          cols0 = ja(start_k:start_k+row_nz-1) - 1
           vals  = aval(start_k:start_k+row_nz-1)
+          
+          ! Debug: print first row details
+          if (i == 1) then
+             write(*,'(A,I0,A,I0)') 'Row 1: nnz=', row_nz, ', start_k=', start_k
+             write(*,'(A,10I6)') '  1-based cols:', ja(start_k:min(start_k+9,start_k+row_nz-1))
+             write(*,'(A,10I6)') '  0-based cols:', cols0(1:min(10,row_nz))
+             write(*,'(A,10ES12.4)') '  vals:', vals(1:min(10,row_nz))
+          end if
+          
+          ! Set row i-1 (0-based) with column indices cols0 (0-based)
           call MatSetValues(A, 1, (/i-1/), row_nz, cols0, vals, INSERT_VALUES, ierr)
           deallocate(cols0, vals)
        end if
     end do
     call MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY, ierr)
     call MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY, ierr)
+    
+    ! Optional: Verify matrix assembly (uncomment for debugging)
+    ! call MatView(A, PETSC_VIEWER_STDOUT_SELF, ierr)
 
     ! Create vectors
     call VecCreateSeq(PETSC_COMM_SELF, n, bb, ierr)
@@ -93,6 +108,10 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
     call KSPGetIterationNumber(ksp, its, ierr)
     call KSPGetResidualNorm(ksp, rnorm, ierr)
     
+    ! Report convergence status (commented out by default for performance)
+    ! Uncomment the next line to see convergence info every solve:
+    ! write(*,'(A,I0,A,ES12.5,A,I0)') ' PETSc: iterations=', its, ', residual=', rnorm, ', reason=', reason
+    
     if (reason < 0) then
        write(0,*) "WARNING: PETSc solver diverged or failed!"
        write(0,*) "  Reason code:", reason
@@ -105,6 +124,37 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
     call VecGetArrayF90(xx, xptr, ierr)
     x = xptr
     call VecRestoreArrayF90(xx, xptr, ierr)
+    
+    ! Manual residual check: compute ||A*x - b||
+    block
+      real(8), allocatable :: Ax(:), residual(:)
+      real(8) :: manual_rnorm, bnorm
+      integer(8) :: i, k, start_k, row_nz
+      
+      allocate(Ax(n), residual(n))
+      Ax = 0.0_8
+      
+      ! Compute A*x manually using CSR format
+      do i = 1, n
+         row_nz = ia(i+1) - ia(i)
+         if (row_nz > 0) then
+            start_k = ia(i)
+            do k = start_k, start_k + row_nz - 1
+               Ax(i) = Ax(i) + aval(k) * x(ja(k))
+            end do
+         end if
+      end do
+      
+      residual = Ax - b
+      manual_rnorm = sqrt(sum(residual**2))
+      bnorm = sqrt(sum(b**2))
+      
+      write(*,'(A,ES15.7)') ' Manual residual check: ||A*x-b|| = ', manual_rnorm
+      write(*,'(A,ES15.7)') '                        ||b||     = ', bnorm
+      write(*,'(A,ES15.7)') '                        ||A*x-b||/||b|| = ', manual_rnorm/bnorm
+      
+      deallocate(Ax, residual)
+    end block
 
     ! Cleanup
     deallocate(idx)
diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90
old mode 100644
new mode 100755
index 40c53bd..e3c2872
--- a/src/heatflow/mod_setup.f90
+++ b/src/heatflow/mod_setup.f90
@@ -37,6 +37,7 @@ subroutine set_global_variables()
       integer(int12) :: ix,iy,iz,index
       real(real12) :: kappa,kappa3D,h_conv,heat_capacity,rho,sound_speed,tau, em
       real(real12), dimension(3) :: vel
+
       allocate(Temp_cur(nx, ny, nz))
       allocate(Temp_p(NA))
       allocate(Temp_pp(NA))
@@ -82,7 +83,6 @@ subroutine set_global_variables()
          ! Allocate the arrays to hold the H matrix in CSR format
          allocate(acsr(ra%len), ja(ra%len), ia(ra%n+1))
          CALL coo2csr(ra%n, ra%len, ra%val, ra%irow, ra%jcol, acsr, ja, ia)
-         ! print*, ra%val               
       end if
       !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -106,7 +106,7 @@ subroutine sparse_Hmatrix()
       if (Periodicz) len = len + 2*nx*ny
       ra%n = NA ! The number of rows in the H matrix
       ra%len = len ! The number of non-zero elements in the H matrix
-      ! Allocate the arrays to hold the H matrix in sparse row storage
+      ! Allocate the arrays to hold the H matrix in sparse storage
       allocate(ra%val(len), ra%irow(len), ra%jcol(len))
       ra%val(:)=0
       ra%irow(:)=-2
@@ -147,7 +147,6 @@ subroutine sparse_Hmatrix()
              ra%irow(count) = i ! The row of the H matrix
              ra%jcol(count) = j ! The column of the H matrix
              count = count + 1 ! The number of non-zero elements in the H matrix
-             H0=hmatrixfunc(j,i) ! The value of the H matrix
              ra%val(count) = H0 ! The value of the H matrix
              ra%irow(count) = j ! The row of the H matrix
              ra%jcol(count) = i ! The column of the H matrix
diff --git a/src/heatflow/mod_sparse_solver.f90 b/src/heatflow/mod_sparse_solver.f90
index 8bb24d0..8f7957c 100644
--- a/src/heatflow/mod_sparse_solver.f90
+++ b/src/heatflow/mod_sparse_solver.f90
@@ -60,10 +60,14 @@ subroutine coo2csr( nrow, &
     integer(kind=8), dimension(nrow+1), intent(out) :: ia
 
     ! Local variables.
-    integer (kind=8) :: i, iad, j, k, k0
+    integer (kind=8) :: i, iad, j, k, k0, row_start
+    integer (kind=8) :: dup_count
     real(kind=8) :: x
+    logical :: found_dup
+    integer(kind=8), dimension(nrow+1) :: ia_save  ! Save original row starts
     
     ia(1:nrow+1) = 0
+    dup_count = 0
     
     ! determine the row lengths.
     
@@ -83,20 +87,45 @@ subroutine coo2csr( nrow, &
 
     end do
     
+    ! Save the original row pointers
+    ia_save = ia
+    
     ! go through the structure once more. fill in output matrix.
+    ! This version handles duplicate (i,j) entries by summing them.
     
     do k = 1, nnz
 
        i = ir(k)
        j = jc(k)
        x = a(k)
-       iad = ia(i)
-       acsr(iad) = x
-       ja(iad) = j
-       ia(i) = iad + 1
+       
+       ! Search for existing entry in this row with same column
+       found_dup = .false.
+       row_start = ia_save(i)
+       do iad = row_start, ia(i)-1
+          if (ja(iad) == j) then
+             ! Found duplicate - sum the values
+             acsr(iad) = acsr(iad) + x
+             found_dup = .true.
+             dup_count = dup_count + 1
+             exit
+          end if
+       end do
+       
+       if (.not. found_dup) then
+          ! New entry - insert it
+          iad = ia(i)
+          acsr(iad) = x
+          ja(iad) = j
+          ia(i) = iad + 1
+       end if
 
     end do
     
+    if (dup_count > 0) then
+       write(*,'(A,I0,A)') 'WARNING: COO->CSR found and summed ', dup_count, ' duplicate entries'
+    end if
+    
     ! shift back ia.
     
     do j = nrow, 1, -1

From 61acedfc9dfa65511671744e7f553a3c1db9c380 Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Tue, 18 Nov 2025 09:53:30 +0000
Subject: [PATCH 18/28] remove debugging prints

---
 src/heatflow/mod_petsc_solver.f90 | 39 -------------------------------
 1 file changed, 39 deletions(-)

diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90
index dd6c73a..d6fd327 100644
--- a/src/heatflow/mod_petsc_solver.f90
+++ b/src/heatflow/mod_petsc_solver.f90
@@ -52,14 +52,6 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
           cols0 = ja(start_k:start_k+row_nz-1) - 1
           vals  = aval(start_k:start_k+row_nz-1)
           
-          ! Debug: print first row details
-          if (i == 1) then
-             write(*,'(A,I0,A,I0)') 'Row 1: nnz=', row_nz, ', start_k=', start_k
-             write(*,'(A,10I6)') '  1-based cols:', ja(start_k:min(start_k+9,start_k+row_nz-1))
-             write(*,'(A,10I6)') '  0-based cols:', cols0(1:min(10,row_nz))
-             write(*,'(A,10ES12.4)') '  vals:', vals(1:min(10,row_nz))
-          end if
-          
           ! Set row i-1 (0-based) with column indices cols0 (0-based)
           call MatSetValues(A, 1, (/i-1/), row_nz, cols0, vals, INSERT_VALUES, ierr)
           deallocate(cols0, vals)
@@ -124,37 +116,6 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
     call VecGetArrayF90(xx, xptr, ierr)
     x = xptr
     call VecRestoreArrayF90(xx, xptr, ierr)
-    
-    ! Manual residual check: compute ||A*x - b||
-    block
-      real(8), allocatable :: Ax(:), residual(:)
-      real(8) :: manual_rnorm, bnorm
-      integer(8) :: i, k, start_k, row_nz
-      
-      allocate(Ax(n), residual(n))
-      Ax = 0.0_8
-      
-      ! Compute A*x manually using CSR format
-      do i = 1, n
-         row_nz = ia(i+1) - ia(i)
-         if (row_nz > 0) then
-            start_k = ia(i)
-            do k = start_k, start_k + row_nz - 1
-               Ax(i) = Ax(i) + aval(k) * x(ja(k))
-            end do
-         end if
-      end do
-      
-      residual = Ax - b
-      manual_rnorm = sqrt(sum(residual**2))
-      bnorm = sqrt(sum(b**2))
-      
-      write(*,'(A,ES15.7)') ' Manual residual check: ||A*x-b|| = ', manual_rnorm
-      write(*,'(A,ES15.7)') '                        ||b||     = ', bnorm
-      write(*,'(A,ES15.7)') '                        ||A*x-b||/||b|| = ', manual_rnorm/bnorm
-      
-      deallocate(Ax, residual)
-    end block
 
     ! Cleanup
     deallocate(idx)

From 9307f859a53c6fdfcde7335466d693c0cab4b929 Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Tue, 18 Nov 2025 10:24:36 +0000
Subject: [PATCH 19/28] make more memory efficient

---
 src/heatflow/mod_petsc_solver.f90 | 134 ++++++++++++-------
 src/heatflow/mod_setup.f90        | 212 +++++++++++++++++++++++-------
 2 files changed, 248 insertions(+), 98 deletions(-)

diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90
index d6fd327..e45ce2e 100644
--- a/src/heatflow/mod_petsc_solver.f90
+++ b/src/heatflow/mod_petsc_solver.f90
@@ -4,7 +4,15 @@ module petsc_solver
   use petscksp
   implicit none
   private
-  public :: petsc_init, petsc_finalize, solve_petsc_csr
+  public :: petsc_init, petsc_finalize, solve_petsc_csr, petsc_cleanup
+
+  ! Persistent PETSc objects (reused across timesteps for memory efficiency)
+  Mat, save :: A_saved = PETSC_NULL_MAT
+  Vec, save :: bb_saved = PETSC_NULL_VEC
+  Vec, save :: xx_saved = PETSC_NULL_VEC
+  KSP, save :: ksp_saved = PETSC_NULL_KSP
+  logical, save :: initialized = .false.
+  integer, save :: n_saved = 0
 
 contains
 
@@ -15,9 +23,25 @@ end subroutine petsc_init
 
   subroutine petsc_finalize()
     integer :: ierr
+    call petsc_cleanup()
     call PetscFinalize(ierr)
   end subroutine petsc_finalize
 
+  subroutine petsc_cleanup()
+    ! Clean up persistent PETSc objects
+    integer :: ierr
+    if (A_saved /= PETSC_NULL_MAT) call MatDestroy(A_saved, ierr)
+    if (bb_saved /= PETSC_NULL_VEC) call VecDestroy(bb_saved, ierr)
+    if (xx_saved /= PETSC_NULL_VEC) call VecDestroy(xx_saved, ierr)
+    if (ksp_saved /= PETSC_NULL_KSP) call KSPDestroy(ksp_saved, ierr)
+    A_saved = PETSC_NULL_MAT
+    bb_saved = PETSC_NULL_VEC
+    xx_saved = PETSC_NULL_VEC
+    ksp_saved = PETSC_NULL_KSP
+    initialized = .false.
+    n_saved = 0
+  end subroutine petsc_cleanup
+
   subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
     integer,  intent(in) :: n
     integer,  intent(in) :: ia(:), ja(:)
@@ -25,24 +49,58 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
     real(8),  intent(inout) :: x(:)
     real(8),  intent(in) :: rtol
     integer,  intent(in) :: maxit
-    Mat :: A
-    Vec :: bb, xx
-    KSP :: ksp
+    
     PC  :: pc
     integer :: ierr, i, row_nz, start_k, its
-    integer, allocatable :: cols0(:), idx(:)
+    integer, allocatable :: cols0(:), idx(:), d_nnz(:)
     real(8), allocatable :: vals(:)
     real(8), pointer :: xptr(:)
     KSPConvergedReason :: reason
     real(8) :: rnorm
+    logical :: rebuild_needed
 
     if (size(ia) /= n+1) stop 'solve_petsc_csr: ia size mismatch'
     if (size(b) /= n .or. size(x) /= n) stop 'solve_petsc_csr: vector size mismatch'
 
-    ! Create matrix with an estimated 7 nonzeros/row (adjust if needed)
-    call MatCreateSeqAIJ(PETSC_COMM_SELF, n, n, 7, PETSC_NULL_INTEGER, A, ierr)
+    ! Determine if we need to rebuild the matrix structure
+    rebuild_needed = .false.
+    if (.not. initialized) rebuild_needed = .true.
+    if (n /= n_saved) rebuild_needed = .true.
+    
+    ! Create PETSc objects on first call or if size changed
+    if (rebuild_needed) then
+      ! Clean up old objects if they exist
+      if (initialized) call petsc_cleanup()
+      
+      ! Preallocate matrix with exact nonzeros per row (saves memory)
+      allocate(d_nnz(n))
+      do i = 1, n
+        d_nnz(i) = ia(i+1) - ia(i)
+      end do
+      call MatCreateSeqAIJ(PETSC_COMM_SELF, n, n, 0, d_nnz, A_saved, ierr)
+      deallocate(d_nnz)
+      
+      ! Create persistent vectors
+      call VecCreateSeq(PETSC_COMM_SELF, n, bb_saved, ierr)
+      call VecCreateSeq(PETSC_COMM_SELF, n, xx_saved, ierr)
+      
+      ! Create and configure KSP solver (persistent across timesteps)
+      call KSPCreate(PETSC_COMM_SELF, ksp_saved, ierr)
+      call KSPSetOperators(ksp_saved, A_saved, A_saved, ierr)
+      call KSPGetPC(ksp_saved, pc, ierr)
+      call PCSetType(pc, PCJACOBI, ierr)        ! Jacobi preconditioner
+      call KSPSetType(ksp_saved, KSPBCGS, ierr) ! BiCGSTAB solver
+      call KSPSetTolerances(ksp_saved, rtol, PETSC_DEFAULT_REAL, &
+                           PETSC_DEFAULT_REAL, maxit, ierr)
+      call KSPSetNormType(ksp_saved, KSP_NORM_UNPRECONDITIONED, ierr)
+      call KSPSetFromOptions(ksp_saved, ierr)
+      
+      initialized = .true.
+      n_saved = n
+    end if
 
-    ! Fill matrix from CSR format (ia, ja are 1-based Fortran indexing)
+    ! Update matrix values (always needed each timestep)
+    call MatZeroEntries(A_saved, ierr)
     do i = 1, n
        row_nz = ia(i+1) - ia(i)
        if (row_nz > 0) then
@@ -53,52 +111,34 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
           vals  = aval(start_k:start_k+row_nz-1)
           
           ! Set row i-1 (0-based) with column indices cols0 (0-based)
-          call MatSetValues(A, 1, (/i-1/), row_nz, cols0, vals, INSERT_VALUES, ierr)
+          call MatSetValues(A_saved, 1, (/i-1/), row_nz, cols0, vals, INSERT_VALUES, ierr)
           deallocate(cols0, vals)
        end if
     end do
-    call MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY, ierr)
-    call MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY, ierr)
+    call MatAssemblyBegin(A_saved, MAT_FINAL_ASSEMBLY, ierr)
+    call MatAssemblyEnd(A_saved, MAT_FINAL_ASSEMBLY, ierr)
     
     ! Optional: Verify matrix assembly (uncomment for debugging)
-    ! call MatView(A, PETSC_VIEWER_STDOUT_SELF, ierr)
-
-    ! Create vectors
-    call VecCreateSeq(PETSC_COMM_SELF, n, bb, ierr)
-    call VecCreateSeq(PETSC_COMM_SELF, n, xx, ierr)
+    ! call MatView(A_saved, PETSC_VIEWER_STDOUT_SELF, ierr)
 
-    ! Set RHS and initial guess
+    ! Update RHS vector
     allocate(idx(n))
     idx = [(i-1, i=1,n)]
-    call VecSetValues(bb, n, idx, b, INSERT_VALUES, ierr)
-    call VecAssemblyBegin(bb,ierr); call VecAssemblyEnd(bb,ierr)
+    call VecSetValues(bb_saved, n, idx, b, INSERT_VALUES, ierr)
+    call VecAssemblyBegin(bb_saved,ierr); call VecAssemblyEnd(bb_saved,ierr)
 
-    call VecSetValues(xx, n, idx, x, INSERT_VALUES, ierr)
-    call VecAssemblyBegin(xx,ierr); call VecAssemblyEnd(xx,ierr)
-
-    ! KSP setup
-    call KSPCreate(PETSC_COMM_SELF, ksp, ierr)
-    call KSPSetOperators(ksp, A, A, ierr)     ! 3-arg form (reuse automatically)
-    call KSPGetPC(ksp, pc, ierr)
-    call PCSetType(pc, PCJACOBI, ierr)        ! Use Jacobi (diagonal) preconditioner to match linbcg
-    call KSPSetType(ksp, KSPBCGS, ierr)       ! Use BCGS to match linbcg behavior
-    
-    ! Set convergence tolerances
-    ! rtol = relative tolerance, atol = absolute tolerance (use default), dtol = divergence tolerance, maxits = max iterations
-    call KSPSetTolerances(ksp, rtol, PETSC_DEFAULT_REAL, PETSC_DEFAULT_REAL, maxit, ierr)
-    
-    ! Use unpreconditioned norm (matching linbcg with itol=1)
-    call KSPSetNormType(ksp, KSP_NORM_UNPRECONDITIONED, ierr)
-    
-    ! Allow command line override of solver options
-    call KSPSetFromOptions(ksp, ierr)
+    ! Update initial guess
+    call VecSetValues(xx_saved, n, idx, x, INSERT_VALUES, ierr)
+    call VecAssemblyBegin(xx_saved,ierr); call VecAssemblyEnd(xx_saved,ierr)
+    deallocate(idx)
 
-    call KSPSolve(ksp, bb, xx, ierr)
+    ! Solve the system (reusing persistent KSP)
+    call KSPSolve(ksp_saved, bb_saved, xx_saved, ierr)
     
     ! Check convergence
-    call KSPGetConvergedReason(ksp, reason, ierr)
-    call KSPGetIterationNumber(ksp, its, ierr)
-    call KSPGetResidualNorm(ksp, rnorm, ierr)
+    call KSPGetConvergedReason(ksp_saved, reason, ierr)
+    call KSPGetIterationNumber(ksp_saved, its, ierr)
+    call KSPGetResidualNorm(ksp_saved, rnorm, ierr)
     
     ! Report convergence status (commented out by default for performance)
     ! Uncomment the next line to see convergence info every solve:
@@ -113,16 +153,10 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
     end if
 
     ! Extract solution
-    call VecGetArrayF90(xx, xptr, ierr)
+    call VecGetArrayF90(xx_saved, xptr, ierr)
     x = xptr
-    call VecRestoreArrayF90(xx, xptr, ierr)
+    call VecRestoreArrayF90(xx_saved, xptr, ierr)
 
-    ! Cleanup
-    deallocate(idx)
-    call KSPDestroy(ksp, ierr)
-    call VecDestroy(bb, ierr)
-    call VecDestroy(xx, ierr)
-    call MatDestroy(A, ierr)
   end subroutine solve_petsc_csr
 
 end module petsc_solver
\ No newline at end of file
diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90
index e3c2872..8c51b62 100755
--- a/src/heatflow/mod_setup.f90
+++ b/src/heatflow/mod_setup.f90
@@ -52,8 +52,13 @@ subroutine set_global_variables()
       ! later date
       !---------------------------------------------------
       write(*,*) "Setting up material properties"
+      write(*,'(A,I10,A)') " Processing ", NA, " grid cells..."
       index = 0
       do iz = 1, nz
+         ! Progress reporting every 10% for large grids
+         if (mod(iz-1, max(1,nz/10)) == 0 .and. iz > 1) then
+            write(*,'(A,I3,A)') "   Progress: ", int(100.0*real(iz)/real(nz)), "%"
+         end if
          do iy = 1, ny
             do ix = 1, nx
             index = index + 1
@@ -75,14 +80,15 @@ subroutine set_global_variables()
       !---------------------------------------------------
       ! Check if the sparse matrix matches the full matrix
       !---------------------------------------------------
+      write(*,*) "Building sparse H matrix..."
       if (Check_Sparse_Full) then
          print*, "CHECK SPARSE FULL"
          CALL build_Hmatrix()
       else
+         ! Build CSR format directly (acsr, ja, ia are allocated inside sparse_Hmatrix)
          CALL sparse_Hmatrix()
-         ! Allocate the arrays to hold the H matrix in CSR format
-         allocate(acsr(ra%len), ja(ra%len), ia(ra%n+1))
-         CALL coo2csr(ra%n, ra%len, ra%val, ra%irow, ra%jcol, acsr, ja, ia)
+         ! No need for COO->CSR conversion anymore, it's already in CSR format!
+         write(*,*) "Sparse matrix setup complete."
       end if
       !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -91,26 +97,46 @@ end subroutine set_global_variables
 !!!#################################################################################################
 
 !!!#################################################################################################
-!!! This sets up the H Matrix directly in sparse row storage
+!!! This sets up the H Matrix directly in sparse row storage (CSR format)
+!!! Modified to build CSR directly instead of COO->CSR to save memory
 !!!#################################################################################################
    subroutine sparse_Hmatrix()
      implicit none
       real(real12) :: H0 ! Holds the value of the H matrix
-      integer(int12) :: i, j, len, count, k ! i and j are the row and column of the H matrix
+      integer(int12) :: i, j, count, k, row ! i and j are the row and column of the H matrix
+      integer(int12) :: nnz_estimate
       ! Holds the values to add to the row to get the column
       integer(int12), allocatable, dimension(:) :: addit 
-      ! The number of non-zero elements in the H matrix to look for
-      len = 7*nx*ny*nz - 2*(nx*ny + ny*nz + nz*nx)
-      if (Periodicx) len = len + 2*ny*nz
-      if (Periodicy) len = len + 2*nz*nx
-      if (Periodicz) len = len + 2*nx*ny
+      ! Temporary arrays for building each row
+      real(real12), allocatable, dimension(:) :: row_vals
+      integer(int12), allocatable, dimension(:) :: row_cols
+      integer(int12) :: row_count, max_row_size
+      
       ra%n = NA ! The number of rows in the H matrix
-      ra%len = len ! The number of non-zero elements in the H matrix
-      ! Allocate the arrays to hold the H matrix in sparse storage
-      allocate(ra%val(len), ra%irow(len), ra%jcol(len))
-      ra%val(:)=0
-      ra%irow(:)=-2
-      ra%jcol(:)=-1
+      
+      ! Estimate nonzeros (7 per interior cell, less at boundaries)
+      nnz_estimate = 7*nx*ny*nz - 2*(nx*ny + ny*nz + nz*nx)
+      if (Periodicx) nnz_estimate = nnz_estimate + 2*ny*nz
+      if (Periodicy) nnz_estimate = nnz_estimate + 2*nz*nx
+      if (Periodicz) nnz_estimate = nnz_estimate + 2*nx*ny
+      
+      ! Allocate CSR arrays with initial estimate (will grow if needed)
+      ! For 451^3: ~640M entries = 10GB, so allocate conservatively
+      write(*,'(A,I12,A)') " Estimated nonzeros: ", nnz_estimate, ""
+      allocate(acsr(nnz_estimate), ja(nnz_estimate))
+      allocate(ia(NA+1))
+      
+      ! Setup neighbor offsets
+      addit = [1] 
+      if (Periodicx) addit = [addit, (nx-1)]
+      if (ny .gt. 1) addit = [addit, nx]
+      if ((Periodicy).and.(ny .gt. 1)) addit = [addit, (ny-1)*nx]
+      if (nz .gt. 1) addit = [addit, nx*ny]
+      if ((Periodicz).and.(nz .gt. 1)) addit = [addit, (nz-1)*ny*nx]
+      
+      ! Allocate temporary row storage (max ~13 entries per row for 3D)
+      max_row_size = 2*size(addit,1) + 1
+      allocate(row_vals(max_row_size), row_cols(max_row_size))
       addit = [1] ! The values to add to the row to get the column
       if (Periodicx) addit = [addit, (nx-1)]
       if (ny .gt. 1) addit = [addit, nx] ! Add the values to add to the row to get the column
@@ -124,41 +150,131 @@ subroutine sparse_Hmatrix()
       !write(6,*) NA
       !write(6,*) "========================================="
 
-      count = 0 ! The number of non-zero elements in the H matrix
-      parent_loop: do j = 1, NA ! Loop over the columns of the H matrix
-         i=j ! The row of the H matrix
-         count = count + 1 ! The number of non-zero elements in the H matrix
-         H0 = hmatrixfunc(i,j) ! The value of the H matrix
-         ra%val(count) = H0 ! The value of the H matrix
-         ra%irow(count) = i ! The row of the H matrix
-         ra%jcol(count) = j ! The column of the H matrix
-         ! Loop over the values to add to the row to get the column
-         neighbour_loop: do k = 1, size(addit,1)
-             i = j + addit(k) ! The row of the H matrix
-             ! If the row is greater than the number of rows ...
-             !...in the H matrix then go to the next column
-             if ((i.gt.NA)) cycle parent_loop 
-             H0=hmatrixfunc(i,j) ! The value of the H matrix
-             ! If the value of the H matrix is less than TINY then go to the next value ...
-             !...to add to the row to get the column
-             if (abs(H0).lt.TINY) cycle neighbour_loop 
-             count = count + 1 ! The number of non-zero elements in the H matrix
-             ra%val(count) = H0 ! The value of the H matrix
-             ra%irow(count) = i ! The row of the H matrix
-             ra%jcol(count) = j ! The column of the H matrix
-             count = count + 1 ! The number of non-zero elements in the H matrix
-             ra%val(count) = H0 ! The value of the H matrix
-             ra%irow(count) = j ! The row of the H matrix
-             ra%jcol(count) = i ! The column of the H matrix
-             !write(6,*) j,i, H0, count
-          end do neighbour_loop
-     end do parent_loop
-
-     !write(6,*) "========================================="
-     !write(6,*) 'c len',count, len
+      
+      count = 0 ! Total nonzeros counter
+      ia(1) = 1 ! CSR row pointer (1-based for Fortran)
+      
+      ! Build CSR format row-by-row
+      write(*,'(A)') " Building CSR matrix row-by-row..."
+      parent_loop: do row = 1, NA
+         ! Progress reporting every 10%
+         if (mod(row-1, max(1,NA/10)) == 0 .and. row > 1) then
+            write(*,'(A,I3,A,I12,A)') "   Progress: ", int(100.0*real(row)/real(NA)), &
+                 "%, nnz=", count, ""
+         end if
+         
+         row_count = 0
+         
+         ! Diagonal element
+         j = row
+         row_count = row_count + 1
+         H0 = hmatrixfunc(row, j)
+         row_vals(row_count) = H0
+         row_cols(row_count) = j
+         
+         ! Off-diagonal elements (process in column-sorted order for CSR)
+         ! First pass: collect all neighbors
+         do k = 1, size(addit,1)
+            j = row + addit(k)
+            if (j > NA) cycle ! Skip if out of bounds
+            
+            H0 = hmatrixfunc(row, j)
+            if (abs(H0) >= TINY) then
+               row_count = row_count + 1
+               row_vals(row_count) = H0
+               row_cols(row_count) = j
+            end if
+         end do
+         
+         ! Second pass: collect reverse neighbors (j < row)
+         do k = 1, size(addit,1)
+            j = row - addit(k)
+            if (j < 1) cycle ! Skip if out of bounds
+            
+            H0 = hmatrixfunc(row, j)
+            if (abs(H0) >= TINY) then
+               row_count = row_count + 1
+               row_vals(row_count) = H0
+               row_cols(row_count) = j
+            end if
+         end do
+         
+         ! Sort this row's entries by column index (required for CSR)
+         call sort_row(row_vals, row_cols, row_count)
+         
+         ! Copy row data to CSR arrays (no bounds checking - we pre-allocated correctly)
+         do i = 1, row_count
+            count = count + 1
+            acsr(count) = row_vals(i)
+            ja(count) = row_cols(i)
+         end do
+         
+         ! Update row pointer
+         ia(row+1) = count + 1
+      end do parent_loop
+      
+      ra%len = count
+      
+      ! Trim arrays to actual size if we over-estimated
+      if (count < size(acsr)) then
+         write(*,'(A,I12,A,I12)') " Trimming arrays from ", size(acsr), " to ", count
+         call trim_csr_arrays(acsr, ja, count)
+      end if
+      
+      deallocate(row_vals, row_cols)
+      write(*,'(A,I12,A)') " CSR matrix built successfully. Actual nonzeros: ", count, ""
    end subroutine sparse_Hmatrix
 !!!#################################################################################################
 
+!!!#################################################################################################
+!!! Sort a row's entries by column index (simple insertion sort, rows are small)
+!!!#################################################################################################
+   subroutine sort_row(vals, cols, n)
+      implicit none
+      integer(int12), intent(in) :: n
+      real(real12), dimension(n), intent(inout) :: vals
+      integer(int12), dimension(n), intent(inout) :: cols
+      integer(int12) :: i, j, temp_col
+      real(real12) :: temp_val
+      
+      do i = 2, n
+         temp_val = vals(i)
+         temp_col = cols(i)
+         j = i - 1
+         do while (j >= 1)
+            if (cols(j) <= temp_col) exit
+            vals(j+1) = vals(j)
+            cols(j+1) = cols(j)
+            j = j - 1
+         end do
+         vals(j+1) = temp_val
+         cols(j+1) = temp_col
+      end do
+   end subroutine sort_row
+!!!#################################################################################################
+
+!!!#################################################################################################
+!!! Trim CSR arrays to exact size
+!!!#################################################################################################
+   subroutine trim_csr_arrays(acsr_arr, ja_arr, final_size)
+      implicit none
+      integer(int12), intent(in) :: final_size
+      real(real12), allocatable, dimension(:), intent(inout) :: acsr_arr
+      integer(int12), allocatable, dimension(:), intent(inout) :: ja_arr
+      real(real12), allocatable, dimension(:) :: temp_vals
+      integer(int12), allocatable, dimension(:) :: temp_cols
+      
+      allocate(temp_vals(final_size), temp_cols(final_size))
+      temp_vals = acsr_arr(1:final_size)
+      temp_cols = ja_arr(1:final_size)
+      deallocate(acsr_arr, ja_arr)
+      allocate(acsr_arr(final_size), ja_arr(final_size))
+      acsr_arr = temp_vals
+      ja_arr = temp_cols
+      deallocate(temp_vals, temp_cols)
+   end subroutine trim_csr_arrays
+!!!#################################################################################################
+
 !!!#################################################################################################
 !!! Check if the simulation will be stable. NOT FULLY IMPLEMENTED   
 !!!#################################################################################################

From b2b58f5f18dd4a451594c5de4b7cd28a2be40b86 Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Tue, 18 Nov 2025 12:56:36 +0000
Subject: [PATCH 20/28] Fixed memory issues

---
 src/heatflow/mkl_pardiso.f90       | 452 -----------------------------
 src/heatflow/mod_petsc_solver.f90  | 160 ++++++++--
 src/heatflow/mod_setup.f90         |   1 -
 src/heatflow/mod_sparse_solver.f90 | 420 ---------------------------
 src/heatflow/mod_tempdep.f90       |   0
 5 files changed, 141 insertions(+), 892 deletions(-)
 delete mode 100644 src/heatflow/mkl_pardiso.f90
 mode change 100755 => 100644 src/heatflow/mod_setup.f90
 delete mode 100644 src/heatflow/mod_sparse_solver.f90
 mode change 100755 => 100644 src/heatflow/mod_tempdep.f90

diff --git a/src/heatflow/mkl_pardiso.f90 b/src/heatflow/mkl_pardiso.f90
deleted file mode 100644
index 0ff7527..0000000
--- a/src/heatflow/mkl_pardiso.f90
+++ /dev/null
@@ -1,452 +0,0 @@
-!*******************************************************************************
-!   Copyright(C) 2004-2013 Intel Corporation. All Rights Reserved.
-!
-!   The source code, information  and  material ("Material") contained herein is
-!   owned  by Intel Corporation or its suppliers or licensors, and title to such
-!   Material remains  with Intel Corporation  or its suppliers or licensors. The
-!   Material  contains proprietary information  of  Intel or  its  suppliers and
-!   licensors. The  Material is protected by worldwide copyright laws and treaty
-!   provisions. No  part  of  the  Material  may  be  used,  copied, reproduced,
-!   modified, published, uploaded, posted, transmitted, distributed or disclosed
-!   in any way  without Intel's  prior  express written  permission. No  license
-!   under  any patent, copyright  or  other intellectual property rights  in the
-!   Material  is  granted  to  or  conferred  upon  you,  either  expressly,  by
-!   implication, inducement,  estoppel or  otherwise.  Any  license  under  such
-!   intellectual  property  rights must  be express  and  approved  by  Intel in
-!   writing.
-!
-!   *Third Party trademarks are the property of their respective owners.
-!
-!   Unless otherwise  agreed  by Intel  in writing, you may not remove  or alter
-!   this  notice or  any other notice embedded  in Materials by Intel or Intel's
-!   suppliers or licensors in any way.
-!
-!*******************************************************************************
-!   Content : MKL PARDISO Fortran-90 header file
-!
-!           Contains PARDISO routine definition.
-!           For CDECL use only.
-!
-!*******************************************************************************
-!DEC$ IF .NOT. DEFINED( __MKL_PARDISO_F90 )
-
-!DEC$ DEFINE __MKL_PARDISO_F90
-
-      MODULE MKL_PARDISO_PRIVATE
-
-        TYPE MKL_PARDISO_HANDLE; INTEGER(KIND=8) DUMMY; END TYPE
-
-        INTEGER, PARAMETER :: PARDISO_OOC_FILE_NAME = 1
-
-      END MODULE MKL_PARDISO_PRIVATE
-
-      MODULE MKL_PARDISO
-        USE MKL_PARDISO_PRIVATE
-
-!
-! Subroutine prototype for PARDISO
-!
-
-      INTERFACE PARDISO
-        SUBROUTINE PARDISO_S( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER,          INTENT(IN)    :: MAXFCT
-          INTEGER,          INTENT(IN)    :: MNUM
-          INTEGER,          INTENT(IN)    :: MTYPE
-          INTEGER,          INTENT(IN)    :: PHASE
-          INTEGER,          INTENT(IN)    :: N
-          INTEGER,          INTENT(IN)    :: IA(*)
-          INTEGER,          INTENT(IN)    :: JA(*)
-          INTEGER,          INTENT(INOUT) :: PERM(*)
-          INTEGER,          INTENT(IN)    :: NRHS
-          INTEGER,          INTENT(INOUT) :: IPARM(*)
-          INTEGER,          INTENT(IN)    :: MSGLVL
-          INTEGER,          INTENT(OUT)   :: ERROR
-
-          REAL(KIND=4),     INTENT(IN)    :: A(*)
-          REAL(KIND=4),     INTENT(INOUT) :: B(*)
-          REAL(KIND=4),     INTENT(OUT)   :: X(*)
-        END SUBROUTINE PARDISO_S
-
-        SUBROUTINE PARDISO_D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER,          INTENT(IN)    :: MAXFCT
-          INTEGER,          INTENT(IN)    :: MNUM
-          INTEGER,          INTENT(IN)    :: MTYPE
-          INTEGER,          INTENT(IN)    :: PHASE
-          INTEGER,          INTENT(IN)    :: N
-          INTEGER,          INTENT(IN)    :: IA(*)
-          INTEGER,          INTENT(IN)    :: JA(*)
-          INTEGER,          INTENT(INOUT) :: PERM(*)
-          INTEGER,          INTENT(IN)    :: NRHS
-          INTEGER,          INTENT(INOUT) :: IPARM(*)
-          INTEGER,          INTENT(IN)    :: MSGLVL
-          INTEGER,          INTENT(OUT)   :: ERROR
-
-          REAL(KIND=8),     INTENT(IN)    :: A(*)
-          REAL(KIND=8),     INTENT(INOUT) :: B(*)
-          REAL(KIND=8),     INTENT(OUT)   :: X(*)
-        END SUBROUTINE PARDISO_D
-
-        SUBROUTINE PARDISO_SC( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER,          INTENT(IN)    :: MAXFCT
-          INTEGER,          INTENT(IN)    :: MNUM
-          INTEGER,          INTENT(IN)    :: MTYPE
-          INTEGER,          INTENT(IN)    :: PHASE
-          INTEGER,          INTENT(IN)    :: N
-          INTEGER,          INTENT(IN)    :: IA(*)
-          INTEGER,          INTENT(IN)    :: JA(*)
-          INTEGER,          INTENT(INOUT) :: PERM(*)
-          INTEGER,          INTENT(IN)    :: NRHS
-          INTEGER,          INTENT(INOUT) :: IPARM(*)
-          INTEGER,          INTENT(IN)    :: MSGLVL
-          INTEGER,          INTENT(OUT)   :: ERROR
-
-          COMPLEX(KIND=4),  INTENT(IN)    :: A(*)
-          COMPLEX(KIND=4),  INTENT(INOUT) :: B(*)
-          COMPLEX(KIND=4),  INTENT(OUT)   :: X(*)
-        END SUBROUTINE PARDISO_SC
-
-        SUBROUTINE PARDISO_DC( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER,          INTENT(IN)    :: MAXFCT
-          INTEGER,          INTENT(IN)    :: MNUM
-          INTEGER,          INTENT(IN)    :: MTYPE
-          INTEGER,          INTENT(IN)    :: PHASE
-          INTEGER,          INTENT(IN)    :: N
-          INTEGER,          INTENT(IN)    :: IA(*)
-          INTEGER,          INTENT(IN)    :: JA(*)
-          INTEGER,          INTENT(INOUT) :: PERM(*)
-          INTEGER,          INTENT(IN)    :: NRHS
-          INTEGER,          INTENT(INOUT) :: IPARM(*)
-          INTEGER,          INTENT(IN)    :: MSGLVL
-          INTEGER,          INTENT(OUT)   :: ERROR
-          COMPLEX(KIND=8),  INTENT(IN)    :: A(*)
-          COMPLEX(KIND=8),  INTENT(INOUT) :: B(*)
-          COMPLEX(KIND=8),  INTENT(OUT)   :: X(*)
-        END SUBROUTINE PARDISO_DC
-
-
-        SUBROUTINE PARDISO_S_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER,          INTENT(IN)    :: MAXFCT
-          INTEGER,          INTENT(IN)    :: MNUM
-          INTEGER,          INTENT(IN)    :: MTYPE
-          INTEGER,          INTENT(IN)    :: PHASE
-          INTEGER,          INTENT(IN)    :: N
-          INTEGER,          INTENT(IN)    :: IA(*)
-          INTEGER,          INTENT(IN)    :: JA(*)
-          INTEGER,          INTENT(INOUT) :: PERM(*)
-          INTEGER,          INTENT(IN)    :: NRHS
-          INTEGER,          INTENT(INOUT) :: IPARM(*)
-          INTEGER,          INTENT(IN)    :: MSGLVL
-          INTEGER,          INTENT(OUT)   :: ERROR
-          REAL(KIND=4),     INTENT(IN)    :: A(*)
-          REAL(KIND=4),     INTENT(INOUT) :: B(N,*)
-          REAL(KIND=4),     INTENT(OUT)   :: X(N,*)
-        END SUBROUTINE PARDISO_S_2D
-
-        SUBROUTINE PARDISO_D_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER,          INTENT(IN)    :: MAXFCT
-          INTEGER,          INTENT(IN)    :: MNUM
-          INTEGER,          INTENT(IN)    :: MTYPE
-          INTEGER,          INTENT(IN)    :: PHASE
-          INTEGER,          INTENT(IN)    :: N
-          INTEGER,          INTENT(IN)    :: IA(*)
-          INTEGER,          INTENT(IN)    :: JA(*)
-          INTEGER,          INTENT(INOUT) :: PERM(*)
-          INTEGER,          INTENT(IN)    :: NRHS
-          INTEGER,          INTENT(INOUT) :: IPARM(*)
-          INTEGER,          INTENT(IN)    :: MSGLVL
-          INTEGER,          INTENT(OUT)   :: ERROR
-          REAL(KIND=8),     INTENT(IN)    :: A(*)
-          REAL(KIND=8),     INTENT(INOUT) :: B(N,*)
-          REAL(KIND=8),     INTENT(OUT)   :: X(N,*)
-        END SUBROUTINE PARDISO_D_2D
-
-        SUBROUTINE PARDISO_SC_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER,          INTENT(IN)    :: MAXFCT
-          INTEGER,          INTENT(IN)    :: MNUM
-          INTEGER,          INTENT(IN)    :: MTYPE
-          INTEGER,          INTENT(IN)    :: PHASE
-          INTEGER,          INTENT(IN)    :: N
-          INTEGER,          INTENT(IN)    :: IA(*)
-          INTEGER,          INTENT(IN)    :: JA(*)
-          INTEGER,          INTENT(INOUT) :: PERM(*)
-          INTEGER,          INTENT(IN)    :: NRHS
-          INTEGER,          INTENT(INOUT) :: IPARM(*)
-          INTEGER,          INTENT(IN)    :: MSGLVL
-          INTEGER,          INTENT(OUT)   :: ERROR
-          COMPLEX(KIND=4),  INTENT(IN)    :: A(*)
-          COMPLEX(KIND=4),  INTENT(INOUT) :: B(N,*)
-          COMPLEX(KIND=4),  INTENT(OUT)   :: X(N,*)
-        END SUBROUTINE PARDISO_SC_2D
-
-        SUBROUTINE PARDISO_DC_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER,          INTENT(IN)    :: MAXFCT
-          INTEGER,          INTENT(IN)    :: MNUM
-          INTEGER,          INTENT(IN)    :: MTYPE
-          INTEGER,          INTENT(IN)    :: PHASE
-          INTEGER,          INTENT(IN)    :: N
-          INTEGER,          INTENT(IN)    :: IA(*)
-          INTEGER,          INTENT(IN)    :: JA(*)
-          INTEGER,          INTENT(INOUT) :: PERM(*)
-          INTEGER,          INTENT(IN)    :: NRHS
-          INTEGER,          INTENT(INOUT) :: IPARM(*)
-          INTEGER,          INTENT(IN)    :: MSGLVL
-          INTEGER,          INTENT(OUT)   :: ERROR
-          COMPLEX(KIND=8),  INTENT(IN)    :: A(*)
-          COMPLEX(KIND=8),  INTENT(INOUT) :: B(N,*)
-          COMPLEX(KIND=8),  INTENT(OUT)   :: X(N,*)
-        END SUBROUTINE PARDISO_DC_2D
-      END INTERFACE
-!
-! Subroutine prototype for PARDISO_64
-!
-! Note: The pardiso_64 interface is not supported on IA-32 architecture.
-!       If called on IA-32, error = -12 is returned.
-!
-      INTERFACE PARDISO_64
-        SUBROUTINE PARDISO_S_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
-          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
-          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
-          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
-          INTEGER(KIND=8),          INTENT(IN)    :: N
-          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
-          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
-          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
-          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
-          REAL(KIND=4),             INTENT(IN)    :: A(*)
-          REAL(KIND=4),             INTENT(INOUT) :: B(*)
-          REAL(KIND=4),             INTENT(OUT)   :: X(*)
-        END SUBROUTINE PARDISO_S_64
-
-        SUBROUTINE PARDISO_D_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
-          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
-          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
-          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
-          INTEGER(KIND=8),          INTENT(IN)    :: N
-          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
-          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
-          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
-          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
-          REAL(KIND=8),             INTENT(IN)    :: A(*)
-          REAL(KIND=8),             INTENT(INOUT) :: B(*)
-          REAL(KIND=8),             INTENT(OUT)   :: X(*)
-        END SUBROUTINE PARDISO_D_64
-
-        SUBROUTINE PARDISO_SC_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
-          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
-          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
-          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
-          INTEGER(KIND=8),          INTENT(IN)    :: N
-          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
-          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
-          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
-          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
-          COMPLEX(KIND=4),          INTENT(IN)    :: A(*)
-          COMPLEX(KIND=4),          INTENT(INOUT) :: B(*)
-          COMPLEX(KIND=4),          INTENT(OUT)   :: X(*)
-        END SUBROUTINE PARDISO_SC_64
-
-        SUBROUTINE PARDISO_DC_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
-          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
-          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
-          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
-          INTEGER(KIND=8),          INTENT(IN)    :: N
-          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
-          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
-          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
-          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
-          COMPLEX(KIND=8),          INTENT(IN)    :: A(*)
-          COMPLEX(KIND=8),          INTENT(INOUT) :: B(*)
-          COMPLEX(KIND=8),          INTENT(OUT)   :: X(*)
-        END SUBROUTINE PARDISO_DC_64
-
-        SUBROUTINE PARDISO_S_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
-          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
-          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
-          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
-          INTEGER(KIND=8),          INTENT(IN)    :: N
-          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
-          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
-          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
-          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
-          REAL(KIND=4),             INTENT(IN)    :: A(*)
-          REAL(KIND=4),             INTENT(INOUT) :: B(N,*)
-          REAL(KIND=4),             INTENT(OUT)   :: X(N,*)
-        END SUBROUTINE PARDISO_S_64_2D
-
-        SUBROUTINE PARDISO_D_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
-          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
-          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
-          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
-          INTEGER(KIND=8),          INTENT(IN)    :: N
-          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
-          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
-          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
-          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
-          REAL(KIND=8),             INTENT(IN)    :: A(*)
-          REAL(KIND=8),             INTENT(INOUT) :: B(N,*)
-          REAL(KIND=8),             INTENT(OUT)   :: X(N,*)
-        END SUBROUTINE PARDISO_D_64_2D
-
-        SUBROUTINE PARDISO_SC_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
-          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
-          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
-          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
-          INTEGER(KIND=8),          INTENT(IN)    :: N
-          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
-          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
-          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
-          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
-          COMPLEX(KIND=4),          INTENT(IN)    :: A(*)
-          COMPLEX(KIND=4),          INTENT(INOUT) :: B(N,*)
-          COMPLEX(KIND=4),          INTENT(OUT)   :: X(N,*)
-        END SUBROUTINE PARDISO_SC_64_2D
-
-        SUBROUTINE PARDISO_DC_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
-          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
-          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
-          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
-          INTEGER(KIND=8),          INTENT(IN)    :: N
-          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
-          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
-          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
-          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
-          COMPLEX(KIND=8),          INTENT(IN)    :: A(*)
-          COMPLEX(KIND=8),          INTENT(INOUT) :: B(N,*)
-          COMPLEX(KIND=8),          INTENT(OUT)   :: X(N,*)
-        END SUBROUTINE PARDISO_DC_64_2D
-
-      END INTERFACE
-
-      INTERFACE
-
-       SUBROUTINE PARDISOINIT(PT, MTYPE, IPARM)
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(OUT) :: PT(*)
-          INTEGER,          INTENT(IN)    :: MTYPE
-          INTEGER,          INTENT(OUT) :: IPARM(*)
-       END SUBROUTINE PARDISOINIT
-
-      END INTERFACE
-
-      INTERFACE PARDISO_GET
-
-       FUNCTION PARDISO_GETENV(PT, OptName, StrVal)
-          USE MKL_PARDISO_PRIVATE
-          INTEGER PARDISO_GETENV
-          TYPE(MKL_PARDISO_HANDLE), INTENT(IN)  :: PT(*)
-          INTEGER,                  INTENT(IN)  :: OptName
-          CHARACTER(*),             INTENT(OUT)  :: StrVal
-       END FUNCTION PARDISO_GETENV
-
-      END INTERFACE
-
-      INTERFACE PARDISO_SET
-
-       FUNCTION PARDISO_SETENV(PT, OptName, StrVal)
-          USE MKL_PARDISO_PRIVATE
-          INTEGER PARDISO_SETENV
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER,                  INTENT(IN)    :: OptName
-          INTEGER,                  INTENT(IN)    :: StrVal(*)
-       END FUNCTION PARDISO_SETENV
-
-      END INTERFACE
-
-      INTERFACE PARDISO_PIV
-      
-      FUNCTION MKL_PARDISO_PIVOT( AII, BII, EPS)
-         REAL(KIND=8)   :: AII, BII, EPS
-         INTEGER   MKL_PARDISO_PIVOT
-         END
-      END INTERFACE PARDISO_PIV
-
-      INTERFACE PARDISO_GETDIAG
-
-         SUBROUTINE PARDISO_GETDIAG_D(PT, DIAG_FACT, DIAG_A, MNUM, EPS)
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          REAL(KIND=8), INTENT(INOUT)             :: DIAG_FACT, DIAG_A
-          INTEGER, INTENT(IN)            :: MNUM
-          INTEGER, INTENT(INOUT)         :: EPS
-         END
-
-         SUBROUTINE PARDISO_GETDIAG_Z(PT, DIAG_FACT, DIAG_A, MNUM, EPS)
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          COMPLEX(KIND=8), INTENT(INOUT)          :: DIAG_FACT, DIAG_A
-          INTEGER, INTENT(IN)            :: MNUM
-          INTEGER, INTENT(INOUT)         :: EPS
-         END
-
-      END INTERFACE PARDISO_GETDIAG
-
-
-
-
-      END MODULE MKL_PARDISO
-
-!DEC$ ENDIF
diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90
index e45ce2e..f0c061f 100644
--- a/src/heatflow/mod_petsc_solver.f90
+++ b/src/heatflow/mod_petsc_solver.f90
@@ -2,6 +2,7 @@ module petsc_solver
 #include "petsc/finclude/petscsys.h"
 #include "petsc/finclude/petscksp.h"
   use petscksp
+  use iso_c_binding
   implicit none
   private
   public :: petsc_init, petsc_finalize, solve_petsc_csr, petsc_cleanup
@@ -53,37 +54,70 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
     PC  :: pc
     integer :: ierr, i, row_nz, start_k, its
     integer, allocatable :: cols0(:), idx(:), d_nnz(:)
+    PetscInt :: idx_array(1)
+    PetscScalar :: val_array(1)
     real(8), allocatable :: vals(:)
-    real(8), pointer :: xptr(:)
-    KSPConvergedReason :: reason
     real(8) :: rnorm
     logical :: rebuild_needed
 
+    write(*,'(A,I0)') ' [DEBUG] Entered solve_petsc_csr, n=', n
+    call flush(6)
+    
     if (size(ia) /= n+1) stop 'solve_petsc_csr: ia size mismatch'
     if (size(b) /= n .or. size(x) /= n) stop 'solve_petsc_csr: vector size mismatch'
 
+    write(*,'(A)') ' [DEBUG] Size checks passed'
+    call flush(6)
+
     ! Determine if we need to rebuild the matrix structure
     rebuild_needed = .false.
     if (.not. initialized) rebuild_needed = .true.
     if (n /= n_saved) rebuild_needed = .true.
     
+    write(*,'(A,L1)') ' [DEBUG] rebuild_needed=', rebuild_needed
+    call flush(6)
+    
     ! Create PETSc objects on first call or if size changed
     if (rebuild_needed) then
+      write(*,'(A)') ' [DEBUG] Starting PETSc object creation...'
+      call flush(6)
+      
       ! Clean up old objects if they exist
       if (initialized) call petsc_cleanup()
       
+      write(*,'(A)') ' [DEBUG] Preallocating matrix...'
+      call flush(6)
+      
       ! Preallocate matrix with exact nonzeros per row (saves memory)
       allocate(d_nnz(n))
       do i = 1, n
         d_nnz(i) = ia(i+1) - ia(i)
       end do
+      
+      write(*,'(A,I0,A,I0)') ' [DEBUG] Creating matrix: n=', n, ', max_nnz/row=', maxval(d_nnz)
+      call flush(6)
+      
+      ! Create matrix with exact preallocation (most memory-efficient)
       call MatCreateSeqAIJ(PETSC_COMM_SELF, n, n, 0, d_nnz, A_saved, ierr)
+      if (ierr /= 0) then
+        write(0,*) "ERROR: MatCreateSeqAIJ failed with ierr=", ierr
+        write(0,*) "  Matrix size may exceed system limits"
+        write(0,*) "  n=", n, ", nnz=", sum(int(d_nnz,8)), ", max_nnz/row=", maxval(d_nnz)
+        stop
+      end if
+      
+      write(*,'(A)') ' [DEBUG] Matrix created successfully'
+      call flush(6)
+      
       deallocate(d_nnz)
       
       ! Create persistent vectors
       call VecCreateSeq(PETSC_COMM_SELF, n, bb_saved, ierr)
       call VecCreateSeq(PETSC_COMM_SELF, n, xx_saved, ierr)
       
+      write(*,'(A)') ' [DEBUG] Vectors created'
+      call flush(6)
+      
       ! Create and configure KSP solver (persistent across timesteps)
       call KSPCreate(PETSC_COMM_SELF, ksp_saved, ierr)
       call KSPSetOperators(ksp_saved, A_saved, A_saved, ierr)
@@ -100,6 +134,8 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
     end if
 
     ! Update matrix values (always needed each timestep)
+    write(*,'(A)') ' Updating PETSc matrix values...'
+    call flush(6)
     call MatZeroEntries(A_saved, ierr)
     do i = 1, n
        row_nz = ia(i+1) - ia(i)
@@ -115,47 +151,133 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
           deallocate(cols0, vals)
        end if
     end do
+    write(*,'(A)') ' Matrix assembly beginning...'
+    call flush(6)
     call MatAssemblyBegin(A_saved, MAT_FINAL_ASSEMBLY, ierr)
     call MatAssemblyEnd(A_saved, MAT_FINAL_ASSEMBLY, ierr)
+    write(*,'(A)') ' Matrix assembly complete.'
+    call flush(6)
     
     ! Optional: Verify matrix assembly (uncomment for debugging)
     ! call MatView(A_saved, PETSC_VIEWER_STDOUT_SELF, ierr)
 
-    ! Update RHS vector
-    allocate(idx(n))
-    idx = [(i-1, i=1,n)]
-    call VecSetValues(bb_saved, n, idx, b, INSERT_VALUES, ierr)
+    ! Update RHS vector in batches to avoid memory issues with huge systems
+    write(*,'(A)') ' Updating RHS vector...'
+    call flush(6)
+    block
+      integer, parameter :: VEC_CHUNK = 1000000
+      integer :: vec_start, vec_end, vec_len, k
+      integer, allocatable :: idx_vec(:)
+      
+      do vec_start = 1, n, VEC_CHUNK
+        vec_end = min(vec_start + VEC_CHUNK - 1, n)
+        vec_len = vec_end - vec_start + 1
+        
+        allocate(idx_vec(vec_len))
+        idx_vec = [(vec_start + k - 2, k=1,vec_len)]  ! 0-based indices
+        
+        call VecSetValues(bb_saved, vec_len, idx_vec, b(vec_start:vec_end), INSERT_VALUES, ierr)
+        if (ierr /= 0) then
+          write(0,*) "ERROR: VecSetValues(bb) failed at", vec_start, "ierr=", ierr
+          stop
+        end if
+        
+        deallocate(idx_vec)
+      end do
+    end block
     call VecAssemblyBegin(bb_saved,ierr); call VecAssemblyEnd(bb_saved,ierr)
+    write(*,'(A)') ' RHS vector complete.'
+    call flush(6)
 
-    ! Update initial guess
-    call VecSetValues(xx_saved, n, idx, x, INSERT_VALUES, ierr)
+    ! Update initial guess in batches
+    write(*,'(A)') ' Updating initial guess vector...'
+    call flush(6)
+    block
+      integer, parameter :: VEC_CHUNK = 1000000
+      integer :: vec_start, vec_end, vec_len, k
+      integer, allocatable :: idx_vec(:)
+      
+      do vec_start = 1, n, VEC_CHUNK
+        vec_end = min(vec_start + VEC_CHUNK - 1, n)
+        vec_len = vec_end - vec_start + 1
+        
+        allocate(idx_vec(vec_len))
+        idx_vec = [(vec_start + k - 2, k=1,vec_len)]  ! 0-based indices
+        
+        call VecSetValues(xx_saved, vec_len, idx_vec, x(vec_start:vec_end), INSERT_VALUES, ierr)
+        if (ierr /= 0) then
+          write(0,*) "ERROR: VecSetValues(xx) failed at", vec_start, "ierr=", ierr
+          stop
+        end if
+        
+        deallocate(idx_vec)
+      end do
+    end block
     call VecAssemblyBegin(xx_saved,ierr); call VecAssemblyEnd(xx_saved,ierr)
-    deallocate(idx)
 
     ! Solve the system (reusing persistent KSP)
+    write(*,'(A,I0,A)') ' Solving linear system with n=', n, ' unknowns...'
+    call flush(6)
     call KSPSolve(ksp_saved, bb_saved, xx_saved, ierr)
     
-    ! Check convergence
-    call KSPGetConvergedReason(ksp_saved, reason, ierr)
+    write(*,'(A)') ' KSPSolve completed, checking status...'
+    call flush(6)
+    
+    if (ierr /= 0) then
+       write(0,*) "ERROR: KSPSolve failed with error code:", ierr
+       stop
+    end if
+    
+    ! Check convergence - Note: KSPConvergedReason type changed in PETSc 3.24
+    ! Simplified error checking without explicit reason query
     call KSPGetIterationNumber(ksp_saved, its, ierr)
     call KSPGetResidualNorm(ksp_saved, rnorm, ierr)
     
     ! Report convergence status (commented out by default for performance)
     ! Uncomment the next line to see convergence info every solve:
-    ! write(*,'(A,I0,A,ES12.5,A,I0)') ' PETSc: iterations=', its, ', residual=', rnorm, ', reason=', reason
+    ! write(*,'(A,I0,A,ES12.5)') ' PETSc: iterations=', its, ', residual=', rnorm
     
-    if (reason < 0) then
-       write(0,*) "WARNING: PETSc solver diverged or failed!"
-       write(0,*) "  Reason code:", reason
+    ! Basic divergence check via error code from solve
+    if (ierr /= 0) then
+       write(0,*) "WARNING: PETSc solver returned non-zero error code!"
+       write(0,*) "  Error code:", ierr
        write(0,*) "  Iterations:", its
        write(0,*) "  Residual norm:", rnorm
        ! Don't stop - let the main code detect NaNs if needed
     end if
 
-    ! Extract solution
-    call VecGetArrayF90(xx_saved, xptr, ierr)
-    x = xptr
-    call VecRestoreArrayF90(xx_saved, xptr, ierr)
+    ! Extract solution vector using batched VecGetValues
+    ! Process in chunks for better performance on large systems
+    block
+      integer, parameter :: CHUNK_SIZE = 100000
+      PetscInt, allocatable :: idx_batch(:)
+      PetscScalar, allocatable :: val_batch(:)
+      integer :: i_start, i_end, chunk_len, j
+      
+      do i_start = 1, n, CHUNK_SIZE
+        i_end = min(i_start + CHUNK_SIZE - 1, n)
+        chunk_len = i_end - i_start + 1
+        
+        allocate(idx_batch(chunk_len), val_batch(chunk_len))
+        
+        ! Build index array (0-based for PETSc)
+        do j = 1, chunk_len
+          idx_batch(j) = i_start + j - 2  ! -1 for 0-based, -1 more for offset
+        end do
+        
+        ! Get chunk of values
+        call VecGetValues(xx_saved, chunk_len, idx_batch, val_batch, ierr)
+        if (ierr /= 0) then
+          write(0,*) "ERROR: VecGetValues failed at chunk starting", i_start, "ierr=", ierr
+          stop
+        end if
+        
+        ! Copy to output array
+        x(i_start:i_end) = val_batch(1:chunk_len)
+        
+        deallocate(idx_batch, val_batch)
+      end do
+    end block
 
   end subroutine solve_petsc_csr
 
diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90
old mode 100755
new mode 100644
index 8c51b62..1d7145a
--- a/src/heatflow/mod_setup.f90
+++ b/src/heatflow/mod_setup.f90
@@ -24,7 +24,6 @@ module setup
   use globe_data, only: acsr, ja, ia
   use solver, only: SRSin
   use materials, only: material
-  use sparse_solver, only: coo2csr
   implicit none
   
    contains
diff --git a/src/heatflow/mod_sparse_solver.f90 b/src/heatflow/mod_sparse_solver.f90
deleted file mode 100644
index 8f7957c..0000000
--- a/src/heatflow/mod_sparse_solver.f90
+++ /dev/null
@@ -1,420 +0,0 @@
-module sparse_solver
-
-  use mkl_pardiso
-  implicit none
-
-  private
-
-  public :: coo2csr, &
-            bicgstab, &
-            solve_pardiso
-  
-contains
-
-  subroutine coo2csr( nrow, &
-                      nnz, &
-                      a, &
-                      ir, &
-                      jc, &
-                      acsr, &
-                      ja, &
-                      ia )
-    
-    !--------------------------------------------------------------------------!
-    !! coocsr converts coo to csr.
-    !
-    !  discussion:
-    !
-    !    this routine converts a matrix that is stored in coo coordinate format
-    !    a, ir, jc into a csr row general sparse acsr, ja, ia format.
-    !
-    !  parameters:
-    !
-    !    input, integer nrow, the row dimension of the matrix.
-    !
-    !    input, integer nnz, the number of nonzero elements in the matrix.
-    !
-    ! a,
-    ! ir,
-    ! jc    = matrix in coordinate format. a(k), ir(k), jc(k) store the nnz
-    !         nonzero elements of the matrix with a(k) = actual real value of
-    !         the elements, ir(k) = its row number and jc(k) = its column
-    !        number. the order of the elements is arbitrary.
-    !
-    ! on return:
-    !
-    ! ir       is destroyed
-    !
-    !    output, real acsr(nnz), ja(nnz), ia(nrow+1), the matrix in csr
-    !    compressed sparse row format.
-    !--------------------------------------------------------------------------!
-
-    ! Arguments.
-    integer (kind=8), intent(in) :: nrow
-    integer (kind=8), intent(in) :: nnz
-    real(8), dimension(nnz), intent(in) :: a
-    integer (kind=8), dimension(nnz), intent(in) :: ir
-    integer (kind=8), dimension(nnz), intent(in) :: jc   
-    real(8), dimension(nnz), intent(out) :: acsr
-    integer(kind=8), dimension(nnz), intent(out) :: ja
-    integer(kind=8), dimension(nrow+1), intent(out) :: ia
-
-    ! Local variables.
-    integer (kind=8) :: i, iad, j, k, k0, row_start
-    integer (kind=8) :: dup_count
-    real(kind=8) :: x
-    logical :: found_dup
-    integer(kind=8), dimension(nrow+1) :: ia_save  ! Save original row starts
-    
-    ia(1:nrow+1) = 0
-    dup_count = 0
-    
-    ! determine the row lengths.
-    
-    do k = 1, nnz
-       ia(ir(k)) = ia(ir(k)) + 1
-    end do
-    
-    ! the starting position of each row.
-    
-    k = 1
-
-    do j = 1, nrow + 1
-
-       k0 = ia(j)
-       ia(j) = k
-       k = k + k0
-
-    end do
-    
-    ! Save the original row pointers
-    ia_save = ia
-    
-    ! go through the structure once more. fill in output matrix.
-    ! This version handles duplicate (i,j) entries by summing them.
-    
-    do k = 1, nnz
-
-       i = ir(k)
-       j = jc(k)
-       x = a(k)
-       
-       ! Search for existing entry in this row with same column
-       found_dup = .false.
-       row_start = ia_save(i)
-       do iad = row_start, ia(i)-1
-          if (ja(iad) == j) then
-             ! Found duplicate - sum the values
-             acsr(iad) = acsr(iad) + x
-             found_dup = .true.
-             dup_count = dup_count + 1
-             exit
-          end if
-       end do
-       
-       if (.not. found_dup) then
-          ! New entry - insert it
-          iad = ia(i)
-          acsr(iad) = x
-          ja(iad) = j
-          ia(i) = iad + 1
-       end if
-
-    end do
-    
-    if (dup_count > 0) then
-       write(*,'(A,I0,A)') 'WARNING: COO->CSR found and summed ', dup_count, ' duplicate entries'
-    end if
-    
-    ! shift back ia.
-    
-    do j = nrow, 1, -1
-       ia(j+1) = ia(j)
-    end do
-
-    ia(1) = 1
-    
-    return
-    
-  end subroutine coo2csr
-  
-  !-------------------------------------------------------------------
-  !	BiConjugate Gradient (Stabilised) Method
-  !-------------------------------------------------------------------
-  
-  subroutine bicgstab( acsr, &
-                       ia, &
-                       ja, &
-                       b, &
-                       maxiter, &
-                       initGuess, &
-                       x, &
-                       iter )
-
-    ! Arguments
-    real(8), dimension(:), intent(in) :: acsr
-    integer (kind=8), dimension(:), intent(in) :: ia
-    integer (kind=8), dimension(:), intent(in) :: ja
-    real(8), dimension(:), intent(in) :: b
-    integer, intent(in) :: maxiter
-    real(8), dimension(:), intent(in) :: initGuess
-    real(8), dimension(:), allocatable, intent(out) :: x
-    integer, intent(out) :: iter
-
-    ! Local variables
-    integer :: i, j, k, n
-    real(8), parameter :: cc = 1.0e-9
-    real(8) :: alpha,beta,delta0,delta,delta_old,omega
-    real(8), dimension(:), allocatable :: r, p, s, rst, temp1, temp2
-    
-    n = size(b,1)
-    allocate(x(n))
-    allocate(r(n))
-    allocate(p(n))
-    allocate(s(n))
-    allocate(rst(n))
-    allocate(temp1(n))
-    allocate(temp2(n))
-    
-    call mkl_dcsrgemv("N",n,acsr,ia,ja,x,temp1)
-    
-    print *, "Initial residual norm: ", norm2(b - temp1)
-    r = b - temp1
-
-    call random_number(rst)
-    
-    p = r
-
-    delta = dot_product(rst,r)
-
-    write(*,'(a,1x,f15.3)') "Starting delta: ", delta
-    
-    delta0 = delta
-
-    do i = 1, maxiter
-       print *, "Iteration ", i
-       if ( norm2(r) /= norm2(r) ) then
-          write(*,'(a)') "Error in solver: residual NaN"
-          exit
-       end if
-
-       if(mod(i,1000).eq.0) then
-          write(*,'(a,1x,i6)') 'Iteration number: ',i
-          write(*,'(a,1x,f15.3)') "Residual ratio: ", norm2(r)/cc
-       end if
-       
-       call mkl_dcsrgemv("N",n,acsr,ia,ja,p,temp1)	! temp1=A*p
-       
-       alpha = delta/dot_product(rst,temp1)
-       s = r - alpha*temp1
-       
-       call mkl_dcsrgemv("N",n,acsr,ia,ja,s,temp2)	! temp2=A*s
-       
-       omega = dot_product(s,temp2)/dot_product(temp2,temp2)
-       x = x + alpha*p + omega*s
-       r = s - omega*temp2
-       delta_old = delta
-       delta = dot_product(rst,r)
-       beta = (delta/delta_old)*(alpha/omega)
-       p = r + beta*(p - omega*temp1)
-
-       if(norm2(r) .lt. cc) then
-          iter = i
-          return
-       end if
-       
-       if(i.eq.maxiter) then
-          write(*,'(a)') "Maximum iterations reached."
-          write(*,'(a)') "Convergence not achieved."
-          write(*,'(a,1x,f15.3)') "Norm of residual: ", norm2(r)
-          write(*,'(a,1x,f15.3)') "Convergence criterion: ", cc
-          if((norm2(r)/cc) .lt. 2.d0) then
-             write(*,'(a)') "The residual is within a small",&
-                  "range of the convergence criterion."
-             write(*,'(a)') "Perhaps increasing iteration ",	&
-                  "count may help."
-          end if
-       end if
-       
-    end do
-    
-  end subroutine bicgstab
-  
-  !-------------------------------------------------------------------
-  !	END BiConjugate Gradient (Stabilised) Method
-  !-------------------------------------------------------------------
-  
-  !-------------------------------------------------------------------
-  !	PARDISO Direct Solver
-  !-------------------------------------------------------------------
-  
-  subroutine solve_pardiso( acsr, &
-                            b, &
-                            ia, &
-                            ja, &
-                            x )
-    
-    use mkl_pardiso
-
-    ! Arguments
-    real(8), dimension(:), intent(in) :: acsr
-    real(8), dimension(:), intent(inout) :: b
-    integer,dimension(:), intent(in) :: ia
-    integer,dimension(:), intent(in) :: ja
-    real(8), dimension(:), allocatable, intent(out) :: x
-
-    ! Local variables
-    type(mkl_pardiso_handle), dimension(:), allocatable  :: pt
-    integer :: i,maxfct,mnum,mtype,phase,n,nrhs,error,msglvl,nnz,error1
-    integer, dimension(:), allocatable :: iparm  
-    integer,dimension(1) :: idum
-    real(8),dimension(1) :: ddum
-        integer :: badcol, missing_diag, k
-    logical :: found
-    n = size(b,1)
-    nnz = size(acsr,1)
-    nrhs = 1
-    maxfct = 1
-    mnum = 1
-    
-    if (.not.(allocated(x))) allocate(x(n))
-
-
-
-    allocate(iparm(64))		!set up pardiso control parameter
-    
-    do i=1,64
-       iparm(i) = 0
-    end do
-    
-    iparm(1) = 1 ! no solver default
-    iparm(2) = 2 ! fill-in reordering from metis
-    iparm(4) = 0 ! no iterative-direct algorithm
-    iparm(5) = 0 ! no user fill-in reducing permutation
-    iparm(6) = 0 ! =0 solution on the first n compoments of x
-    iparm(8) = 2 ! numbers of iterative refinement steps
-    iparm(10) = 13 ! perturbe the pivot elements with 1e-13
-    iparm(11) = 1 ! use nonsymmetric permutation and scaling mps
-    iparm(13) = 0 ! maximum weighted matching algorithm is
-    !switched-off (default for symmetric).
-    ! try iparm(13) = 1 in case of inaccuracy
-    iparm(14) = 0 ! output: number of perturbed pivots
-    iparm(18) = -1 ! output: number of nonzeros in the factor lu
-    iparm(19) = -1 ! output: mflops for lu factorization
-    iparm(20) = 0 ! output: numbers of cg iterations
-    
-    error  = 0 ! initialize error flag
-    msglvl = 0 ! 0=no output, 1=print statistical information
-    mtype  = 11 ! real and unsymmetric matrix
-    
-    ! Initiliaze the internal solver memory pointer.
-    ! This is only necessary for the first call of the solver.
-    
-    allocate (pt(64))
-    do i=1,64
-       pt(i)%dummy =  0
-    end do
-
-    !---------------- CSR integrity / diagnostic checks ----------------
-
-    badcol = 0
-
-    write(*,*) 'PARDISO debug:'
-    write(*,*) ' n       =', n
-    write(*,*) ' nnz     =', nnz
-    write(*,*) ' ia(1)   =', ia(1), ' ia(n+1)=', ia(n+1), ' ia(n+1)-1=', ia(n+1)-1
-
-    if (ia(1) /= 1) stop 'ERROR: ia(1) must be 1'
-    if (ia(n+1)-1 /= nnz) stop 'ERROR: ia end mismatch'
-
-    do i=1,n
-       if (ia(i) > ia(i+1)) then
-          write(*,*) 'Row pointer decreases at row', i
-          stop 'ERROR: ia not monotone'
-       end if
-    end do
-
-    do k=1,nnz
-       if (ja(k) < 1 .or. ja(k) > n) then
-          badcol = badcol + 1
-          if (badcol <= 10) write(*,*) 'Bad column index k=',k,' ja=',ja(k)
-       end if
-    end do
-    if (badcol > 0) then
-       write(*,*) 'Total bad columns =', badcol
-       stop 'ERROR: invalid ja entries'
-    end if
-
-    ! Check each row has a diagonal and (optionally) detect duplicates
-    missing_diag = 0
-    do i=1,n
-       found = .false.
-       if (ia(i) < ia(i+1)) then
-          ! simple duplicate check (requires row segment unsorted ascending to be meaningful)
-          do k = ia(i), ia(i+1)-1
-             if (ja(k) == i) then
-                if (acsr(k) == 0.0d0) then
-                   write(*,*) 'Zero diagonal at row', i
-                   stop 'ERROR: zero diagonal'
-                end if
-                found = .true.
-             end if
-          end do
-       end if
-       if (.not. found) then
-          missing_diag = missing_diag + 1
-          if (missing_diag <= 10) write(*,*) 'Missing diagonal at row', i
-       end if
-    end do
-    if (missing_diag > 0) stop 'ERROR: missing diagonals'
-    !-------------------------------------------------------------------
-    phase = 11 ! Only reordering and symbolic factorization
-    
-    call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, &
-         idum, nrhs, iparm, msglvl, ddum, ddum, error)
-    
-    if (error /= 0) then
-       write(*,*) 'the following error was detected: ', error
-       goto 1000
-    end if
-    
-    phase = 22 ! only factorization
-    call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, &
-         idum, nrhs, iparm, msglvl, ddum, ddum, error)
-    if (error /= 0) then
-       write(*,*) 'the following error was detected: ', error
-       goto 1000
-    endif
-    
-    ! back substitution and iterative refinement
-    iparm(8) = 2 ! max numbers of iterative refinement steps
-    phase = 33 ! only solving
-    call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, &
-         idum, nrhs, iparm, msglvl, b, x, error)
-    write(*,*) 'solve completed ... '
-    if (error /= 0) then
-       write(*,*) 'the following error was detected: ', error
-       goto 1000
-    endif
-    
-1000 continue
-    ! termination and release of memory
-    phase = -1 ! release internal memory
-    call pardiso (pt,maxfct,mnum,mtype,phase,n,ddum,idum,idum, 	&
-         idum, nrhs, iparm, msglvl, ddum, ddum, error1)
-    
-    if (error1 /= 0) then
-       write(*,*) 'the following release error was detected: ',	&
-            error1
-       stop 1
-    endif
-    
-    if ( error /= 0 ) stop 1
-    
-  end subroutine solve_pardiso
-  
-  !-------------------------------------------------------------------
-  !	END PARDISO Direct Solver
-  !-------------------------------------------------------------------
-  
-end module sparse_solver
diff --git a/src/heatflow/mod_tempdep.f90 b/src/heatflow/mod_tempdep.f90
old mode 100755
new mode 100644

From 8687aafc8b91516b577292c3816975ff4e8e44d7 Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Tue, 18 Nov 2025 16:17:17 +0000
Subject: [PATCH 21/28] Enable threading

---
 src/heatflow.f90                  |  9 +++-
 src/heatflow/mod_petsc_solver.f90 | 85 +++++--------------------------
 src/heatflow/mod_setup.f90        |  2 +
 3 files changed, 23 insertions(+), 73 deletions(-)

diff --git a/src/heatflow.f90 b/src/heatflow.f90
index d1db700..6817c67 100644
--- a/src/heatflow.f90
+++ b/src/heatflow.f90
@@ -34,6 +34,12 @@ program HEATFLOW_V0_3
    real(real12) :: cpustart, cpuend, cpustart2, progress
    integer(int12) :: itime
 
+   !-------------------------------------------------------------!
+   ! Initialize PETSc FIRST (before any other operations)       !
+   !-------------------------------------------------------------!
+   CALL petsc_init()
+   !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^!
+
    !-------------------------------------------------------------!
    ! calculate the time to run full simulation                   !
    !-------------------------------------------------------------!
@@ -65,7 +71,6 @@ program HEATFLOW_V0_3
    !-------------------------------------------------------------!
    ! run simulation for 'ntime' time steps                       !
    !-------------------------------------------------------------!
-   CALL petsc_init()                                 
 
    do itime=1,ntime 
 
@@ -99,7 +104,7 @@ program HEATFLOW_V0_3
    !-------------------------------------------------------------!
    ! calculate end time and print to user                        !
    !-------------------------------------------------------------!
-   CALL cpu_time(cpuend)                                          
+   CALL cpu_time(cpuend)
    write(*,'(A,F12.6)') ' time=', cpuend-cpustart                 
    !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^!
 
diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90
index f0c061f..5a16b4b 100644
--- a/src/heatflow/mod_petsc_solver.f90
+++ b/src/heatflow/mod_petsc_solver.f90
@@ -60,33 +60,33 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
     real(8) :: rnorm
     logical :: rebuild_needed
 
-    write(*,'(A,I0)') ' [DEBUG] Entered solve_petsc_csr, n=', n
-    call flush(6)
+    ! write(*,'(A,I0)') ' [DEBUG] Entered solve_petsc_csr, n=', n
+    ! call flush(6)
     
     if (size(ia) /= n+1) stop 'solve_petsc_csr: ia size mismatch'
     if (size(b) /= n .or. size(x) /= n) stop 'solve_petsc_csr: vector size mismatch'
 
-    write(*,'(A)') ' [DEBUG] Size checks passed'
-    call flush(6)
+    ! write(*,'(A)') ' [DEBUG] Size checks passed'
+    ! call flush(6)
 
     ! Determine if we need to rebuild the matrix structure
     rebuild_needed = .false.
     if (.not. initialized) rebuild_needed = .true.
     if (n /= n_saved) rebuild_needed = .true.
     
-    write(*,'(A,L1)') ' [DEBUG] rebuild_needed=', rebuild_needed
-    call flush(6)
+    ! write(*,'(A,L1)') ' [DEBUG] rebuild_needed=', rebuild_needed
+    ! call flush(6)
     
     ! Create PETSc objects on first call or if size changed
     if (rebuild_needed) then
-      write(*,'(A)') ' [DEBUG] Starting PETSc object creation...'
-      call flush(6)
+      ! write(*,'(A)') ' [DEBUG] Starting PETSc object creation...'
+      ! call flush(6)
       
       ! Clean up old objects if they exist
       if (initialized) call petsc_cleanup()
       
-      write(*,'(A)') ' [DEBUG] Preallocating matrix...'
-      call flush(6)
+      ! write(*,'(A)') ' [DEBUG] Preallocating matrix...'
+      ! call flush(6)
       
       ! Preallocate matrix with exact nonzeros per row (saves memory)
       allocate(d_nnz(n))
@@ -94,8 +94,8 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
         d_nnz(i) = ia(i+1) - ia(i)
       end do
       
-      write(*,'(A,I0,A,I0)') ' [DEBUG] Creating matrix: n=', n, ', max_nnz/row=', maxval(d_nnz)
-      call flush(6)
+      ! write(*,'(A,I0,A,I0)') ' [DEBUG] Creating matrix: n=', n, ', max_nnz/row=', maxval(d_nnz)
+      ! call flush(6)
       
       ! Create matrix with exact preallocation (most memory-efficient)
       call MatCreateSeqAIJ(PETSC_COMM_SELF, n, n, 0, d_nnz, A_saved, ierr)
@@ -106,18 +106,12 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
         stop
       end if
       
-      write(*,'(A)') ' [DEBUG] Matrix created successfully'
-      call flush(6)
-      
       deallocate(d_nnz)
       
       ! Create persistent vectors
       call VecCreateSeq(PETSC_COMM_SELF, n, bb_saved, ierr)
       call VecCreateSeq(PETSC_COMM_SELF, n, xx_saved, ierr)
       
-      write(*,'(A)') ' [DEBUG] Vectors created'
-      call flush(6)
-      
       ! Create and configure KSP solver (persistent across timesteps)
       call KSPCreate(PETSC_COMM_SELF, ksp_saved, ierr)
       call KSPSetOperators(ksp_saved, A_saved, A_saved, ierr)
@@ -134,8 +128,6 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
     end if
 
     ! Update matrix values (always needed each timestep)
-    write(*,'(A)') ' Updating PETSc matrix values...'
-    call flush(6)
     call MatZeroEntries(A_saved, ierr)
     do i = 1, n
        row_nz = ia(i+1) - ia(i)
@@ -151,19 +143,13 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
           deallocate(cols0, vals)
        end if
     end do
-    write(*,'(A)') ' Matrix assembly beginning...'
-    call flush(6)
     call MatAssemblyBegin(A_saved, MAT_FINAL_ASSEMBLY, ierr)
     call MatAssemblyEnd(A_saved, MAT_FINAL_ASSEMBLY, ierr)
-    write(*,'(A)') ' Matrix assembly complete.'
-    call flush(6)
     
     ! Optional: Verify matrix assembly (uncomment for debugging)
     ! call MatView(A_saved, PETSC_VIEWER_STDOUT_SELF, ierr)
 
     ! Update RHS vector in batches to avoid memory issues with huge systems
-    write(*,'(A)') ' Updating RHS vector...'
-    call flush(6)
     block
       integer, parameter :: VEC_CHUNK = 1000000
       integer :: vec_start, vec_end, vec_len, k
@@ -177,21 +163,12 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
         idx_vec = [(vec_start + k - 2, k=1,vec_len)]  ! 0-based indices
         
         call VecSetValues(bb_saved, vec_len, idx_vec, b(vec_start:vec_end), INSERT_VALUES, ierr)
-        if (ierr /= 0) then
-          write(0,*) "ERROR: VecSetValues(bb) failed at", vec_start, "ierr=", ierr
-          stop
-        end if
-        
         deallocate(idx_vec)
       end do
     end block
     call VecAssemblyBegin(bb_saved,ierr); call VecAssemblyEnd(bb_saved,ierr)
-    write(*,'(A)') ' RHS vector complete.'
-    call flush(6)
 
     ! Update initial guess in batches
-    write(*,'(A)') ' Updating initial guess vector...'
-    call flush(6)
     block
       integer, parameter :: VEC_CHUNK = 1000000
       integer :: vec_start, vec_end, vec_len, k
@@ -205,49 +182,23 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
         idx_vec = [(vec_start + k - 2, k=1,vec_len)]  ! 0-based indices
         
         call VecSetValues(xx_saved, vec_len, idx_vec, x(vec_start:vec_end), INSERT_VALUES, ierr)
-        if (ierr /= 0) then
-          write(0,*) "ERROR: VecSetValues(xx) failed at", vec_start, "ierr=", ierr
-          stop
-        end if
-        
         deallocate(idx_vec)
       end do
     end block
     call VecAssemblyBegin(xx_saved,ierr); call VecAssemblyEnd(xx_saved,ierr)
 
-    ! Solve the system (reusing persistent KSP)
-    write(*,'(A,I0,A)') ' Solving linear system with n=', n, ' unknowns...'
-    call flush(6)
+    ! Solve the system
     call KSPSolve(ksp_saved, bb_saved, xx_saved, ierr)
     
-    write(*,'(A)') ' KSPSolve completed, checking status...'
-    call flush(6)
-    
     if (ierr /= 0) then
        write(0,*) "ERROR: KSPSolve failed with error code:", ierr
        stop
     end if
     
-    ! Check convergence - Note: KSPConvergedReason type changed in PETSc 3.24
-    ! Simplified error checking without explicit reason query
     call KSPGetIterationNumber(ksp_saved, its, ierr)
     call KSPGetResidualNorm(ksp_saved, rnorm, ierr)
-    
-    ! Report convergence status (commented out by default for performance)
-    ! Uncomment the next line to see convergence info every solve:
-    ! write(*,'(A,I0,A,ES12.5)') ' PETSc: iterations=', its, ', residual=', rnorm
-    
-    ! Basic divergence check via error code from solve
-    if (ierr /= 0) then
-       write(0,*) "WARNING: PETSc solver returned non-zero error code!"
-       write(0,*) "  Error code:", ierr
-       write(0,*) "  Iterations:", its
-       write(0,*) "  Residual norm:", rnorm
-       ! Don't stop - let the main code detect NaNs if needed
-    end if
 
     ! Extract solution vector using batched VecGetValues
-    ! Process in chunks for better performance on large systems
     block
       integer, parameter :: CHUNK_SIZE = 100000
       PetscInt, allocatable :: idx_batch(:)
@@ -260,19 +211,11 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
         
         allocate(idx_batch(chunk_len), val_batch(chunk_len))
         
-        ! Build index array (0-based for PETSc)
         do j = 1, chunk_len
-          idx_batch(j) = i_start + j - 2  ! -1 for 0-based, -1 more for offset
+          idx_batch(j) = i_start + j - 2
         end do
         
-        ! Get chunk of values
         call VecGetValues(xx_saved, chunk_len, idx_batch, val_batch, ierr)
-        if (ierr /= 0) then
-          write(0,*) "ERROR: VecGetValues failed at chunk starting", i_start, "ierr=", ierr
-          stop
-        end if
-        
-        ! Copy to output array
         x(i_start:i_end) = val_batch(1:chunk_len)
         
         deallocate(idx_batch, val_batch)
diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90
index 1d7145a..0a25b00 100644
--- a/src/heatflow/mod_setup.f90
+++ b/src/heatflow/mod_setup.f90
@@ -26,6 +26,8 @@ module setup
   use materials, only: material
   implicit none
   
+  public :: set_global_variables
+  
    contains
     
 

From 90f435425f90d58a9fd300803c57db66e3b3e0b3 Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Tue, 2 Dec 2025 19:11:12 +0000
Subject: [PATCH 22/28] Add ILU, and fix heating case 2

---
 src/heatflow.f90                   |   9 +-
 src/heatflow/mkl_pardiso.f90       | 452 -----------------------------
 src/heatflow/mod_evolve.f90        |   2 +-
 src/heatflow/mod_heating.f90       |  50 +++-
 src/heatflow/mod_petsc_solver.f90  | 272 ++++++++++-------
 src/heatflow/mod_setup.f90         | 215 ++++++++++----
 src/heatflow/mod_sparse_solver.f90 | 420 ---------------------------
 src/heatflow/mod_tempdep.f90       |   0
 8 files changed, 382 insertions(+), 1038 deletions(-)
 delete mode 100644 src/heatflow/mkl_pardiso.f90
 mode change 100755 => 100644 src/heatflow/mod_setup.f90
 delete mode 100644 src/heatflow/mod_sparse_solver.f90
 mode change 100755 => 100644 src/heatflow/mod_tempdep.f90

diff --git a/src/heatflow.f90 b/src/heatflow.f90
index d1db700..6817c67 100644
--- a/src/heatflow.f90
+++ b/src/heatflow.f90
@@ -34,6 +34,12 @@ program HEATFLOW_V0_3
    real(real12) :: cpustart, cpuend, cpustart2, progress
    integer(int12) :: itime
 
+   !-------------------------------------------------------------!
+   ! Initialize PETSc FIRST (before any other operations)       !
+   !-------------------------------------------------------------!
+   CALL petsc_init()
+   !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^!
+
    !-------------------------------------------------------------!
    ! calculate the time to run full simulation                   !
    !-------------------------------------------------------------!
@@ -65,7 +71,6 @@ program HEATFLOW_V0_3
    !-------------------------------------------------------------!
    ! run simulation for 'ntime' time steps                       !
    !-------------------------------------------------------------!
-   CALL petsc_init()                                 
 
    do itime=1,ntime 
 
@@ -99,7 +104,7 @@ program HEATFLOW_V0_3
    !-------------------------------------------------------------!
    ! calculate end time and print to user                        !
    !-------------------------------------------------------------!
-   CALL cpu_time(cpuend)                                          
+   CALL cpu_time(cpuend)
    write(*,'(A,F12.6)') ' time=', cpuend-cpustart                 
    !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^!
 
diff --git a/src/heatflow/mkl_pardiso.f90 b/src/heatflow/mkl_pardiso.f90
deleted file mode 100644
index 0ff7527..0000000
--- a/src/heatflow/mkl_pardiso.f90
+++ /dev/null
@@ -1,452 +0,0 @@
-!*******************************************************************************
-!   Copyright(C) 2004-2013 Intel Corporation. All Rights Reserved.
-!
-!   The source code, information  and  material ("Material") contained herein is
-!   owned  by Intel Corporation or its suppliers or licensors, and title to such
-!   Material remains  with Intel Corporation  or its suppliers or licensors. The
-!   Material  contains proprietary information  of  Intel or  its  suppliers and
-!   licensors. The  Material is protected by worldwide copyright laws and treaty
-!   provisions. No  part  of  the  Material  may  be  used,  copied, reproduced,
-!   modified, published, uploaded, posted, transmitted, distributed or disclosed
-!   in any way  without Intel's  prior  express written  permission. No  license
-!   under  any patent, copyright  or  other intellectual property rights  in the
-!   Material  is  granted  to  or  conferred  upon  you,  either  expressly,  by
-!   implication, inducement,  estoppel or  otherwise.  Any  license  under  such
-!   intellectual  property  rights must  be express  and  approved  by  Intel in
-!   writing.
-!
-!   *Third Party trademarks are the property of their respective owners.
-!
-!   Unless otherwise  agreed  by Intel  in writing, you may not remove  or alter
-!   this  notice or  any other notice embedded  in Materials by Intel or Intel's
-!   suppliers or licensors in any way.
-!
-!*******************************************************************************
-!   Content : MKL PARDISO Fortran-90 header file
-!
-!           Contains PARDISO routine definition.
-!           For CDECL use only.
-!
-!*******************************************************************************
-!DEC$ IF .NOT. DEFINED( __MKL_PARDISO_F90 )
-
-!DEC$ DEFINE __MKL_PARDISO_F90
-
-      MODULE MKL_PARDISO_PRIVATE
-
-        TYPE MKL_PARDISO_HANDLE; INTEGER(KIND=8) DUMMY; END TYPE
-
-        INTEGER, PARAMETER :: PARDISO_OOC_FILE_NAME = 1
-
-      END MODULE MKL_PARDISO_PRIVATE
-
-      MODULE MKL_PARDISO
-        USE MKL_PARDISO_PRIVATE
-
-!
-! Subroutine prototype for PARDISO
-!
-
-      INTERFACE PARDISO
-        SUBROUTINE PARDISO_S( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER,          INTENT(IN)    :: MAXFCT
-          INTEGER,          INTENT(IN)    :: MNUM
-          INTEGER,          INTENT(IN)    :: MTYPE
-          INTEGER,          INTENT(IN)    :: PHASE
-          INTEGER,          INTENT(IN)    :: N
-          INTEGER,          INTENT(IN)    :: IA(*)
-          INTEGER,          INTENT(IN)    :: JA(*)
-          INTEGER,          INTENT(INOUT) :: PERM(*)
-          INTEGER,          INTENT(IN)    :: NRHS
-          INTEGER,          INTENT(INOUT) :: IPARM(*)
-          INTEGER,          INTENT(IN)    :: MSGLVL
-          INTEGER,          INTENT(OUT)   :: ERROR
-
-          REAL(KIND=4),     INTENT(IN)    :: A(*)
-          REAL(KIND=4),     INTENT(INOUT) :: B(*)
-          REAL(KIND=4),     INTENT(OUT)   :: X(*)
-        END SUBROUTINE PARDISO_S
-
-        SUBROUTINE PARDISO_D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER,          INTENT(IN)    :: MAXFCT
-          INTEGER,          INTENT(IN)    :: MNUM
-          INTEGER,          INTENT(IN)    :: MTYPE
-          INTEGER,          INTENT(IN)    :: PHASE
-          INTEGER,          INTENT(IN)    :: N
-          INTEGER,          INTENT(IN)    :: IA(*)
-          INTEGER,          INTENT(IN)    :: JA(*)
-          INTEGER,          INTENT(INOUT) :: PERM(*)
-          INTEGER,          INTENT(IN)    :: NRHS
-          INTEGER,          INTENT(INOUT) :: IPARM(*)
-          INTEGER,          INTENT(IN)    :: MSGLVL
-          INTEGER,          INTENT(OUT)   :: ERROR
-
-          REAL(KIND=8),     INTENT(IN)    :: A(*)
-          REAL(KIND=8),     INTENT(INOUT) :: B(*)
-          REAL(KIND=8),     INTENT(OUT)   :: X(*)
-        END SUBROUTINE PARDISO_D
-
-        SUBROUTINE PARDISO_SC( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER,          INTENT(IN)    :: MAXFCT
-          INTEGER,          INTENT(IN)    :: MNUM
-          INTEGER,          INTENT(IN)    :: MTYPE
-          INTEGER,          INTENT(IN)    :: PHASE
-          INTEGER,          INTENT(IN)    :: N
-          INTEGER,          INTENT(IN)    :: IA(*)
-          INTEGER,          INTENT(IN)    :: JA(*)
-          INTEGER,          INTENT(INOUT) :: PERM(*)
-          INTEGER,          INTENT(IN)    :: NRHS
-          INTEGER,          INTENT(INOUT) :: IPARM(*)
-          INTEGER,          INTENT(IN)    :: MSGLVL
-          INTEGER,          INTENT(OUT)   :: ERROR
-
-          COMPLEX(KIND=4),  INTENT(IN)    :: A(*)
-          COMPLEX(KIND=4),  INTENT(INOUT) :: B(*)
-          COMPLEX(KIND=4),  INTENT(OUT)   :: X(*)
-        END SUBROUTINE PARDISO_SC
-
-        SUBROUTINE PARDISO_DC( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER,          INTENT(IN)    :: MAXFCT
-          INTEGER,          INTENT(IN)    :: MNUM
-          INTEGER,          INTENT(IN)    :: MTYPE
-          INTEGER,          INTENT(IN)    :: PHASE
-          INTEGER,          INTENT(IN)    :: N
-          INTEGER,          INTENT(IN)    :: IA(*)
-          INTEGER,          INTENT(IN)    :: JA(*)
-          INTEGER,          INTENT(INOUT) :: PERM(*)
-          INTEGER,          INTENT(IN)    :: NRHS
-          INTEGER,          INTENT(INOUT) :: IPARM(*)
-          INTEGER,          INTENT(IN)    :: MSGLVL
-          INTEGER,          INTENT(OUT)   :: ERROR
-          COMPLEX(KIND=8),  INTENT(IN)    :: A(*)
-          COMPLEX(KIND=8),  INTENT(INOUT) :: B(*)
-          COMPLEX(KIND=8),  INTENT(OUT)   :: X(*)
-        END SUBROUTINE PARDISO_DC
-
-
-        SUBROUTINE PARDISO_S_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER,          INTENT(IN)    :: MAXFCT
-          INTEGER,          INTENT(IN)    :: MNUM
-          INTEGER,          INTENT(IN)    :: MTYPE
-          INTEGER,          INTENT(IN)    :: PHASE
-          INTEGER,          INTENT(IN)    :: N
-          INTEGER,          INTENT(IN)    :: IA(*)
-          INTEGER,          INTENT(IN)    :: JA(*)
-          INTEGER,          INTENT(INOUT) :: PERM(*)
-          INTEGER,          INTENT(IN)    :: NRHS
-          INTEGER,          INTENT(INOUT) :: IPARM(*)
-          INTEGER,          INTENT(IN)    :: MSGLVL
-          INTEGER,          INTENT(OUT)   :: ERROR
-          REAL(KIND=4),     INTENT(IN)    :: A(*)
-          REAL(KIND=4),     INTENT(INOUT) :: B(N,*)
-          REAL(KIND=4),     INTENT(OUT)   :: X(N,*)
-        END SUBROUTINE PARDISO_S_2D
-
-        SUBROUTINE PARDISO_D_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER,          INTENT(IN)    :: MAXFCT
-          INTEGER,          INTENT(IN)    :: MNUM
-          INTEGER,          INTENT(IN)    :: MTYPE
-          INTEGER,          INTENT(IN)    :: PHASE
-          INTEGER,          INTENT(IN)    :: N
-          INTEGER,          INTENT(IN)    :: IA(*)
-          INTEGER,          INTENT(IN)    :: JA(*)
-          INTEGER,          INTENT(INOUT) :: PERM(*)
-          INTEGER,          INTENT(IN)    :: NRHS
-          INTEGER,          INTENT(INOUT) :: IPARM(*)
-          INTEGER,          INTENT(IN)    :: MSGLVL
-          INTEGER,          INTENT(OUT)   :: ERROR
-          REAL(KIND=8),     INTENT(IN)    :: A(*)
-          REAL(KIND=8),     INTENT(INOUT) :: B(N,*)
-          REAL(KIND=8),     INTENT(OUT)   :: X(N,*)
-        END SUBROUTINE PARDISO_D_2D
-
-        SUBROUTINE PARDISO_SC_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER,          INTENT(IN)    :: MAXFCT
-          INTEGER,          INTENT(IN)    :: MNUM
-          INTEGER,          INTENT(IN)    :: MTYPE
-          INTEGER,          INTENT(IN)    :: PHASE
-          INTEGER,          INTENT(IN)    :: N
-          INTEGER,          INTENT(IN)    :: IA(*)
-          INTEGER,          INTENT(IN)    :: JA(*)
-          INTEGER,          INTENT(INOUT) :: PERM(*)
-          INTEGER,          INTENT(IN)    :: NRHS
-          INTEGER,          INTENT(INOUT) :: IPARM(*)
-          INTEGER,          INTENT(IN)    :: MSGLVL
-          INTEGER,          INTENT(OUT)   :: ERROR
-          COMPLEX(KIND=4),  INTENT(IN)    :: A(*)
-          COMPLEX(KIND=4),  INTENT(INOUT) :: B(N,*)
-          COMPLEX(KIND=4),  INTENT(OUT)   :: X(N,*)
-        END SUBROUTINE PARDISO_SC_2D
-
-        SUBROUTINE PARDISO_DC_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER,          INTENT(IN)    :: MAXFCT
-          INTEGER,          INTENT(IN)    :: MNUM
-          INTEGER,          INTENT(IN)    :: MTYPE
-          INTEGER,          INTENT(IN)    :: PHASE
-          INTEGER,          INTENT(IN)    :: N
-          INTEGER,          INTENT(IN)    :: IA(*)
-          INTEGER,          INTENT(IN)    :: JA(*)
-          INTEGER,          INTENT(INOUT) :: PERM(*)
-          INTEGER,          INTENT(IN)    :: NRHS
-          INTEGER,          INTENT(INOUT) :: IPARM(*)
-          INTEGER,          INTENT(IN)    :: MSGLVL
-          INTEGER,          INTENT(OUT)   :: ERROR
-          COMPLEX(KIND=8),  INTENT(IN)    :: A(*)
-          COMPLEX(KIND=8),  INTENT(INOUT) :: B(N,*)
-          COMPLEX(KIND=8),  INTENT(OUT)   :: X(N,*)
-        END SUBROUTINE PARDISO_DC_2D
-      END INTERFACE
-!
-! Subroutine prototype for PARDISO_64
-!
-! Note: The pardiso_64 interface is not supported on IA-32 architecture.
-!       If called on IA-32, error = -12 is returned.
-!
-      INTERFACE PARDISO_64
-        SUBROUTINE PARDISO_S_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
-          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
-          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
-          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
-          INTEGER(KIND=8),          INTENT(IN)    :: N
-          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
-          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
-          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
-          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
-          REAL(KIND=4),             INTENT(IN)    :: A(*)
-          REAL(KIND=4),             INTENT(INOUT) :: B(*)
-          REAL(KIND=4),             INTENT(OUT)   :: X(*)
-        END SUBROUTINE PARDISO_S_64
-
-        SUBROUTINE PARDISO_D_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
-          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
-          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
-          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
-          INTEGER(KIND=8),          INTENT(IN)    :: N
-          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
-          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
-          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
-          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
-          REAL(KIND=8),             INTENT(IN)    :: A(*)
-          REAL(KIND=8),             INTENT(INOUT) :: B(*)
-          REAL(KIND=8),             INTENT(OUT)   :: X(*)
-        END SUBROUTINE PARDISO_D_64
-
-        SUBROUTINE PARDISO_SC_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
-          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
-          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
-          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
-          INTEGER(KIND=8),          INTENT(IN)    :: N
-          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
-          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
-          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
-          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
-          COMPLEX(KIND=4),          INTENT(IN)    :: A(*)
-          COMPLEX(KIND=4),          INTENT(INOUT) :: B(*)
-          COMPLEX(KIND=4),          INTENT(OUT)   :: X(*)
-        END SUBROUTINE PARDISO_SC_64
-
-        SUBROUTINE PARDISO_DC_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
-          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
-          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
-          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
-          INTEGER(KIND=8),          INTENT(IN)    :: N
-          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
-          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
-          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
-          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
-          COMPLEX(KIND=8),          INTENT(IN)    :: A(*)
-          COMPLEX(KIND=8),          INTENT(INOUT) :: B(*)
-          COMPLEX(KIND=8),          INTENT(OUT)   :: X(*)
-        END SUBROUTINE PARDISO_DC_64
-
-        SUBROUTINE PARDISO_S_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
-          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
-          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
-          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
-          INTEGER(KIND=8),          INTENT(IN)    :: N
-          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
-          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
-          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
-          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
-          REAL(KIND=4),             INTENT(IN)    :: A(*)
-          REAL(KIND=4),             INTENT(INOUT) :: B(N,*)
-          REAL(KIND=4),             INTENT(OUT)   :: X(N,*)
-        END SUBROUTINE PARDISO_S_64_2D
-
-        SUBROUTINE PARDISO_D_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
-          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
-          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
-          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
-          INTEGER(KIND=8),          INTENT(IN)    :: N
-          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
-          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
-          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
-          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
-          REAL(KIND=8),             INTENT(IN)    :: A(*)
-          REAL(KIND=8),             INTENT(INOUT) :: B(N,*)
-          REAL(KIND=8),             INTENT(OUT)   :: X(N,*)
-        END SUBROUTINE PARDISO_D_64_2D
-
-        SUBROUTINE PARDISO_SC_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
-          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
-          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
-          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
-          INTEGER(KIND=8),          INTENT(IN)    :: N
-          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
-          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
-          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
-          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
-          COMPLEX(KIND=4),          INTENT(IN)    :: A(*)
-          COMPLEX(KIND=4),          INTENT(INOUT) :: B(N,*)
-          COMPLEX(KIND=4),          INTENT(OUT)   :: X(N,*)
-        END SUBROUTINE PARDISO_SC_64_2D
-
-        SUBROUTINE PARDISO_DC_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR )
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MAXFCT
-          INTEGER(KIND=8),          INTENT(IN)    :: MNUM
-          INTEGER(KIND=8),          INTENT(IN)    :: MTYPE
-          INTEGER(KIND=8),          INTENT(IN)    :: PHASE
-          INTEGER(KIND=8),          INTENT(IN)    :: N
-          INTEGER(KIND=8),          INTENT(IN)    :: IA(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: JA(*)
-          INTEGER(KIND=8),          INTENT(INOUT) :: PERM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: NRHS
-          INTEGER(KIND=8),          INTENT(INOUT) :: IPARM(*)
-          INTEGER(KIND=8),          INTENT(IN)    :: MSGLVL
-          INTEGER(KIND=8),          INTENT(OUT)   :: ERROR
-          COMPLEX(KIND=8),          INTENT(IN)    :: A(*)
-          COMPLEX(KIND=8),          INTENT(INOUT) :: B(N,*)
-          COMPLEX(KIND=8),          INTENT(OUT)   :: X(N,*)
-        END SUBROUTINE PARDISO_DC_64_2D
-
-      END INTERFACE
-
-      INTERFACE
-
-       SUBROUTINE PARDISOINIT(PT, MTYPE, IPARM)
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(OUT) :: PT(*)
-          INTEGER,          INTENT(IN)    :: MTYPE
-          INTEGER,          INTENT(OUT) :: IPARM(*)
-       END SUBROUTINE PARDISOINIT
-
-      END INTERFACE
-
-      INTERFACE PARDISO_GET
-
-       FUNCTION PARDISO_GETENV(PT, OptName, StrVal)
-          USE MKL_PARDISO_PRIVATE
-          INTEGER PARDISO_GETENV
-          TYPE(MKL_PARDISO_HANDLE), INTENT(IN)  :: PT(*)
-          INTEGER,                  INTENT(IN)  :: OptName
-          CHARACTER(*),             INTENT(OUT)  :: StrVal
-       END FUNCTION PARDISO_GETENV
-
-      END INTERFACE
-
-      INTERFACE PARDISO_SET
-
-       FUNCTION PARDISO_SETENV(PT, OptName, StrVal)
-          USE MKL_PARDISO_PRIVATE
-          INTEGER PARDISO_SETENV
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          INTEGER,                  INTENT(IN)    :: OptName
-          INTEGER,                  INTENT(IN)    :: StrVal(*)
-       END FUNCTION PARDISO_SETENV
-
-      END INTERFACE
-
-      INTERFACE PARDISO_PIV
-      
-      FUNCTION MKL_PARDISO_PIVOT( AII, BII, EPS)
-         REAL(KIND=8)   :: AII, BII, EPS
-         INTEGER   MKL_PARDISO_PIVOT
-         END
-      END INTERFACE PARDISO_PIV
-
-      INTERFACE PARDISO_GETDIAG
-
-         SUBROUTINE PARDISO_GETDIAG_D(PT, DIAG_FACT, DIAG_A, MNUM, EPS)
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          REAL(KIND=8), INTENT(INOUT)             :: DIAG_FACT, DIAG_A
-          INTEGER, INTENT(IN)            :: MNUM
-          INTEGER, INTENT(INOUT)         :: EPS
-         END
-
-         SUBROUTINE PARDISO_GETDIAG_Z(PT, DIAG_FACT, DIAG_A, MNUM, EPS)
-          USE MKL_PARDISO_PRIVATE
-          TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*)
-          COMPLEX(KIND=8), INTENT(INOUT)          :: DIAG_FACT, DIAG_A
-          INTEGER, INTENT(IN)            :: MNUM
-          INTEGER, INTENT(INOUT)         :: EPS
-         END
-
-      END INTERFACE PARDISO_GETDIAG
-
-
-
-
-      END MODULE MKL_PARDISO
-
-!DEC$ ENDIF
diff --git a/src/heatflow/mod_evolve.f90 b/src/heatflow/mod_evolve.f90
index 67af070..0554c0d 100644
--- a/src/heatflow/mod_evolve.f90
+++ b/src/heatflow/mod_evolve.f90
@@ -159,7 +159,7 @@ subroutine simulate(itime)
    !  if (any(x-Temp_p .lt. TINY)) x=x+TINY !avoid nan solver issue
     itol=1
     tol=1.e-32_real12
-    itmax=50000
+    itmax=500000
     ncg = 0
     iter= 0
     err=E
diff --git a/src/heatflow/mod_heating.f90 b/src/heatflow/mod_heating.f90
index aa070ac..8fb0298 100644
--- a/src/heatflow/mod_heating.f90
+++ b/src/heatflow/mod_heating.f90
@@ -78,18 +78,22 @@ subroutine heater(itime, Q, Qdens)
                 !------------------------------
                 if ( time .le. time_pulse ) then
                    Q(IA) = POWER
-                if (icattaneo .eq. 1) then
-                if (itime .eq. 1) then
-                  Q(IA) = Q(IA) + (tau*(POWER))
-                  end if
-                if (itime .eq. heated_steps+1) then
-                  Q(IA) = Q(IA) - (tau*(POWER))
-                end if
-                end if 
-                
+                  !  print *, "Heating on"
                 else
+                  !  print *, "Heating off"
                    Q(IA) = 0.0_real12
                 end if
+
+               if (icattaneo .eq. 1) then
+                  if (itime .eq. 1) then
+                     Q(IA) = Q(IA) + tau*POWER
+                     ! print *, "Turning on heater at time ", time
+                  end if
+                  if (itime .eq. (heated_steps + 1)) then
+                     Q(IA) = Q(IA) - tau*POWER
+                     ! print *, "Turning off heater at time ", time
+                  end if
+               end if
                
 
                 !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -126,6 +130,34 @@ subroutine heater(itime, Q, Qdens)
                 end if
                 !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+            case(7)
+               !---------------------------------------------------------
+               ! Heater on for a time period, off for a time period,
+               ! then on again (square-wave heating)
+               !---------------------------------------------------------
+
+               if (mod(time, 2.0_real12 * time_pulse) .le. time_pulse) then
+                  Q(IA) = POWER
+
+                  if (icattaneo .eq. 1) then
+
+                     ! Forward-time pulse check
+                     if (mod(time + time_step, 2.0_real12 * time_pulse) .le. time_pulse) then
+                        Q(IA) = Q(IA) + tau * POWER
+                     end if
+
+                     ! Backward-time pulse check
+                     if (mod(time - time_step, 2.0_real12 * time_pulse) .le. time_pulse) then
+                        Q(IA) = Q(IA) - tau * POWER
+                     end if
+
+                  end if
+
+               else
+                  Q(IA) = 0.0_real12
+               end if
+               
+
                case(10)
                   Q(IA) = POWER - (tau*(POWER))
 
diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90
index dd6c73a..f5e37b7 100644
--- a/src/heatflow/mod_petsc_solver.f90
+++ b/src/heatflow/mod_petsc_solver.f90
@@ -2,9 +2,18 @@ module petsc_solver
 #include "petsc/finclude/petscsys.h"
 #include "petsc/finclude/petscksp.h"
   use petscksp
+  use iso_c_binding
   implicit none
   private
-  public :: petsc_init, petsc_finalize, solve_petsc_csr
+  public :: petsc_init, petsc_finalize, solve_petsc_csr, petsc_cleanup
+
+  ! Persistent PETSc objects (reused across timesteps for memory efficiency)
+  Mat, save :: A_saved = PETSC_NULL_MAT
+  Vec, save :: bb_saved = PETSC_NULL_VEC
+  Vec, save :: xx_saved = PETSC_NULL_VEC
+  KSP, save :: ksp_saved = PETSC_NULL_KSP
+  logical, save :: initialized = .false.
+  integer, save :: n_saved = 0
 
 contains
 
@@ -15,9 +24,25 @@ end subroutine petsc_init
 
   subroutine petsc_finalize()
     integer :: ierr
+    call petsc_cleanup()
     call PetscFinalize(ierr)
   end subroutine petsc_finalize
 
+  subroutine petsc_cleanup()
+    ! Clean up persistent PETSc objects
+    integer :: ierr
+    if (A_saved /= PETSC_NULL_MAT) call MatDestroy(A_saved, ierr)
+    if (bb_saved /= PETSC_NULL_VEC) call VecDestroy(bb_saved, ierr)
+    if (xx_saved /= PETSC_NULL_VEC) call VecDestroy(xx_saved, ierr)
+    if (ksp_saved /= PETSC_NULL_KSP) call KSPDestroy(ksp_saved, ierr)
+    A_saved = PETSC_NULL_MAT
+    bb_saved = PETSC_NULL_VEC
+    xx_saved = PETSC_NULL_VEC
+    ksp_saved = PETSC_NULL_KSP
+    initialized = .false.
+    n_saved = 0
+  end subroutine petsc_cleanup
+
   subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
     integer,  intent(in) :: n
     integer,  intent(in) :: ia(:), ja(:)
@@ -25,24 +50,87 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
     real(8),  intent(inout) :: x(:)
     real(8),  intent(in) :: rtol
     integer,  intent(in) :: maxit
-    Mat :: A
-    Vec :: bb, xx
-    KSP :: ksp
+    
     PC  :: pc
     integer :: ierr, i, row_nz, start_k, its
-    integer, allocatable :: cols0(:), idx(:)
+    integer, allocatable :: cols0(:), idx(:), d_nnz(:)
+    PetscInt :: idx_array(1)
+    PetscScalar :: val_array(1)
     real(8), allocatable :: vals(:)
-    real(8), pointer :: xptr(:)
-    KSPConvergedReason :: reason
     real(8) :: rnorm
+    logical :: rebuild_needed
 
+    ! write(*,'(A,I0)') ' [DEBUG] Entered solve_petsc_csr, n=', n
+    ! call flush(6)
+    
     if (size(ia) /= n+1) stop 'solve_petsc_csr: ia size mismatch'
     if (size(b) /= n .or. size(x) /= n) stop 'solve_petsc_csr: vector size mismatch'
 
-    ! Create matrix with an estimated 7 nonzeros/row (adjust if needed)
-    call MatCreateSeqAIJ(PETSC_COMM_SELF, n, n, 7, PETSC_NULL_INTEGER, A, ierr)
+    ! write(*,'(A)') ' [DEBUG] Size checks passed'
+    ! call flush(6)
+
+    ! Determine if we need to rebuild the matrix structure
+    rebuild_needed = .false.
+    if (.not. initialized) rebuild_needed = .true.
+    if (n /= n_saved) rebuild_needed = .true.
+    
+    ! write(*,'(A,L1)') ' [DEBUG] rebuild_needed=', rebuild_needed
+    ! call flush(6)
+    
+    ! Create PETSc objects on first call or if size changed
+    if (rebuild_needed) then
+      ! write(*,'(A)') ' [DEBUG] Starting PETSc object creation...'
+      ! call flush(6)
+      
+      ! Clean up old objects if they exist
+      if (initialized) call petsc_cleanup()
+      
+      ! write(*,'(A)') ' [DEBUG] Preallocating matrix...'
+      ! call flush(6)
+      
+      ! Preallocate matrix with exact nonzeros per row (saves memory)
+      allocate(d_nnz(n))
+      do i = 1, n
+        d_nnz(i) = ia(i+1) - ia(i)
+      end do
+      
+      ! write(*,'(A,I0,A,I0)') ' [DEBUG] Creating matrix: n=', n, ', max_nnz/row=', maxval(d_nnz)
+      ! call flush(6)
+      
+      ! Create matrix with exact preallocation (most memory-efficient)
+      call MatCreateSeqAIJ(PETSC_COMM_SELF, n, n, 0, d_nnz, A_saved, ierr)
+      if (ierr /= 0) then
+        write(0,*) "ERROR: MatCreateSeqAIJ failed with ierr=", ierr
+        write(0,*) "  Matrix size may exceed system limits"
+        write(0,*) "  n=", n, ", nnz=", sum(int(d_nnz,8)), ", max_nnz/row=", maxval(d_nnz)
+        stop
+      end if
+      
+      deallocate(d_nnz)
+      
+      ! Create persistent vectors
+      call VecCreateSeq(PETSC_COMM_SELF, n, bb_saved, ierr)
+      call VecCreateSeq(PETSC_COMM_SELF, n, xx_saved, ierr)
+      
+      ! Create and configure KSP solver (persistent across timesteps)
+      call KSPCreate(PETSC_COMM_SELF, ksp_saved, ierr)
+      call KSPSetOperators(ksp_saved, A_saved, A_saved, ierr)
+      call KSPGetPC(ksp_saved, pc, ierr)
+      ! Use ILU preconditioner for better conditioning (especially for high conductivity materials)
+      call PCSetType(pc, PCILU, ierr)           ! ILU preconditioner (better than Jacobi)
+      ! call PCSetType(pc, PCJACOBI, ierr)        ! Jacobi preconditioner (simple) 
+      call KSPSetType(ksp_saved, KSPBCGS, ierr) ! BiCGSTAB solver
+      call KSPSetTolerances(ksp_saved, rtol, PETSC_DEFAULT_REAL, &
+                           PETSC_DEFAULT_REAL, maxit, ierr)
+      call KSPSetNormType(ksp_saved, KSP_NORM_UNPRECONDITIONED, ierr)
+      call KSPSetFromOptions(ksp_saved, ierr)
+      
+      initialized = .true.
+      n_saved = n
+    end if
 
-    ! Fill matrix from CSR format (ia, ja are 1-based Fortran indexing)
+    ! Update matrix values (always needed each timestep)
+    call MatZeroEntries(A_saved, ierr)
     do i = 1, n
        row_nz = ia(i+1) - ia(i)
        if (row_nz > 0) then
@@ -52,116 +140,90 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
           cols0 = ja(start_k:start_k+row_nz-1) - 1
           vals  = aval(start_k:start_k+row_nz-1)
           
-          ! Debug: print first row details
-          if (i == 1) then
-             write(*,'(A,I0,A,I0)') 'Row 1: nnz=', row_nz, ', start_k=', start_k
-             write(*,'(A,10I6)') '  1-based cols:', ja(start_k:min(start_k+9,start_k+row_nz-1))
-             write(*,'(A,10I6)') '  0-based cols:', cols0(1:min(10,row_nz))
-             write(*,'(A,10ES12.4)') '  vals:', vals(1:min(10,row_nz))
-          end if
-          
           ! Set row i-1 (0-based) with column indices cols0 (0-based)
-          call MatSetValues(A, 1, (/i-1/), row_nz, cols0, vals, INSERT_VALUES, ierr)
+          call MatSetValues(A_saved, 1, (/i-1/), row_nz, cols0, vals, INSERT_VALUES, ierr)
           deallocate(cols0, vals)
        end if
     end do
-    call MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY, ierr)
-    call MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY, ierr)
+    call MatAssemblyBegin(A_saved, MAT_FINAL_ASSEMBLY, ierr)
+    call MatAssemblyEnd(A_saved, MAT_FINAL_ASSEMBLY, ierr)
     
     ! Optional: Verify matrix assembly (uncomment for debugging)
-    ! call MatView(A, PETSC_VIEWER_STDOUT_SELF, ierr)
-
-    ! Create vectors
-    call VecCreateSeq(PETSC_COMM_SELF, n, bb, ierr)
-    call VecCreateSeq(PETSC_COMM_SELF, n, xx, ierr)
-
-    ! Set RHS and initial guess
-    allocate(idx(n))
-    idx = [(i-1, i=1,n)]
-    call VecSetValues(bb, n, idx, b, INSERT_VALUES, ierr)
-    call VecAssemblyBegin(bb,ierr); call VecAssemblyEnd(bb,ierr)
-
-    call VecSetValues(xx, n, idx, x, INSERT_VALUES, ierr)
-    call VecAssemblyBegin(xx,ierr); call VecAssemblyEnd(xx,ierr)
-
-    ! KSP setup
-    call KSPCreate(PETSC_COMM_SELF, ksp, ierr)
-    call KSPSetOperators(ksp, A, A, ierr)     ! 3-arg form (reuse automatically)
-    call KSPGetPC(ksp, pc, ierr)
-    call PCSetType(pc, PCJACOBI, ierr)        ! Use Jacobi (diagonal) preconditioner to match linbcg
-    call KSPSetType(ksp, KSPBCGS, ierr)       ! Use BCGS to match linbcg behavior
-    
-    ! Set convergence tolerances
-    ! rtol = relative tolerance, atol = absolute tolerance (use default), dtol = divergence tolerance, maxits = max iterations
-    call KSPSetTolerances(ksp, rtol, PETSC_DEFAULT_REAL, PETSC_DEFAULT_REAL, maxit, ierr)
-    
-    ! Use unpreconditioned norm (matching linbcg with itol=1)
-    call KSPSetNormType(ksp, KSP_NORM_UNPRECONDITIONED, ierr)
-    
-    ! Allow command line override of solver options
-    call KSPSetFromOptions(ksp, ierr)
+    ! call MatView(A_saved, PETSC_VIEWER_STDOUT_SELF, ierr)
 
-    call KSPSolve(ksp, bb, xx, ierr)
-    
-    ! Check convergence
-    call KSPGetConvergedReason(ksp, reason, ierr)
-    call KSPGetIterationNumber(ksp, its, ierr)
-    call KSPGetResidualNorm(ksp, rnorm, ierr)
-    
-    ! Report convergence status (commented out by default for performance)
-    ! Uncomment the next line to see convergence info every solve:
-    ! write(*,'(A,I0,A,ES12.5,A,I0)') ' PETSc: iterations=', its, ', residual=', rnorm, ', reason=', reason
+    ! Update RHS vector in batches to avoid memory issues with huge systems
+    block
+      integer, parameter :: VEC_CHUNK = 1000000
+      integer :: vec_start, vec_end, vec_len, k
+      integer, allocatable :: idx_vec(:)
+      
+      do vec_start = 1, n, VEC_CHUNK
+        vec_end = min(vec_start + VEC_CHUNK - 1, n)
+        vec_len = vec_end - vec_start + 1
+        
+        allocate(idx_vec(vec_len))
+        idx_vec = [(vec_start + k - 2, k=1,vec_len)]  ! 0-based indices
+        
+        call VecSetValues(bb_saved, vec_len, idx_vec, b(vec_start:vec_end), INSERT_VALUES, ierr)
+        deallocate(idx_vec)
+      end do
+    end block
+    call VecAssemblyBegin(bb_saved,ierr); call VecAssemblyEnd(bb_saved,ierr)
+
+    ! Update initial guess in batches
+    block
+      integer, parameter :: VEC_CHUNK = 1000000
+      integer :: vec_start, vec_end, vec_len, k
+      integer, allocatable :: idx_vec(:)
+      
+      do vec_start = 1, n, VEC_CHUNK
+        vec_end = min(vec_start + VEC_CHUNK - 1, n)
+        vec_len = vec_end - vec_start + 1
+        
+        allocate(idx_vec(vec_len))
+        idx_vec = [(vec_start + k - 2, k=1,vec_len)]  ! 0-based indices
+        
+        call VecSetValues(xx_saved, vec_len, idx_vec, x(vec_start:vec_end), INSERT_VALUES, ierr)
+        deallocate(idx_vec)
+      end do
+    end block
+    call VecAssemblyBegin(xx_saved,ierr); call VecAssemblyEnd(xx_saved,ierr)
+
+    ! Solve the system
+    call KSPSolve(ksp_saved, bb_saved, xx_saved, ierr)
     
-    if (reason < 0) then
-       write(0,*) "WARNING: PETSc solver diverged or failed!"
-       write(0,*) "  Reason code:", reason
-       write(0,*) "  Iterations:", its
-       write(0,*) "  Residual norm:", rnorm
-       ! Don't stop - let the main code detect NaNs if needed
+    if (ierr /= 0) then
+       write(0,*) "ERROR: KSPSolve failed with error code:", ierr
+       stop
     end if
-
-    ! Extract solution
-    call VecGetArrayF90(xx, xptr, ierr)
-    x = xptr
-    call VecRestoreArrayF90(xx, xptr, ierr)
     
-    ! Manual residual check: compute ||A*x - b||
+    call KSPGetIterationNumber(ksp_saved, its, ierr)
+    call KSPGetResidualNorm(ksp_saved, rnorm, ierr)
+
+    ! Extract solution vector using batched VecGetValues
     block
-      real(8), allocatable :: Ax(:), residual(:)
-      real(8) :: manual_rnorm, bnorm
-      integer(8) :: i, k, start_k, row_nz
+      integer, parameter :: CHUNK_SIZE = 100000
+      PetscInt, allocatable :: idx_batch(:)
+      PetscScalar, allocatable :: val_batch(:)
+      integer :: i_start, i_end, chunk_len, j
       
-      allocate(Ax(n), residual(n))
-      Ax = 0.0_8
-      
-      ! Compute A*x manually using CSR format
-      do i = 1, n
-         row_nz = ia(i+1) - ia(i)
-         if (row_nz > 0) then
-            start_k = ia(i)
-            do k = start_k, start_k + row_nz - 1
-               Ax(i) = Ax(i) + aval(k) * x(ja(k))
-            end do
-         end if
+      do i_start = 1, n, CHUNK_SIZE
+        i_end = min(i_start + CHUNK_SIZE - 1, n)
+        chunk_len = i_end - i_start + 1
+        
+        allocate(idx_batch(chunk_len), val_batch(chunk_len))
+        
+        do j = 1, chunk_len
+          idx_batch(j) = i_start + j - 2
+        end do
+        
+        call VecGetValues(xx_saved, chunk_len, idx_batch, val_batch, ierr)
+        x(i_start:i_end) = val_batch(1:chunk_len)
+        
+        deallocate(idx_batch, val_batch)
       end do
-      
-      residual = Ax - b
-      manual_rnorm = sqrt(sum(residual**2))
-      bnorm = sqrt(sum(b**2))
-      
-      write(*,'(A,ES15.7)') ' Manual residual check: ||A*x-b|| = ', manual_rnorm
-      write(*,'(A,ES15.7)') '                        ||b||     = ', bnorm
-      write(*,'(A,ES15.7)') '                        ||A*x-b||/||b|| = ', manual_rnorm/bnorm
-      
-      deallocate(Ax, residual)
     end block
 
-    ! Cleanup
-    deallocate(idx)
-    call KSPDestroy(ksp, ierr)
-    call VecDestroy(bb, ierr)
-    call VecDestroy(xx, ierr)
-    call MatDestroy(A, ierr)
   end subroutine solve_petsc_csr
 
 end module petsc_solver
\ No newline at end of file
diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90
old mode 100755
new mode 100644
index e3c2872..0a25b00
--- a/src/heatflow/mod_setup.f90
+++ b/src/heatflow/mod_setup.f90
@@ -24,9 +24,10 @@ module setup
   use globe_data, only: acsr, ja, ia
   use solver, only: SRSin
   use materials, only: material
-  use sparse_solver, only: coo2csr
   implicit none
   
+  public :: set_global_variables
+  
    contains
     
 
@@ -52,8 +53,13 @@ subroutine set_global_variables()
       ! later date
       !---------------------------------------------------
       write(*,*) "Setting up material properties"
+      write(*,'(A,I10,A)') " Processing ", NA, " grid cells..."
       index = 0
       do iz = 1, nz
+         ! Progress reporting every 10% for large grids
+         if (mod(iz-1, max(1,nz/10)) == 0 .and. iz > 1) then
+            write(*,'(A,I3,A)') "   Progress: ", int(100.0*real(iz)/real(nz)), "%"
+         end if
          do iy = 1, ny
             do ix = 1, nx
             index = index + 1
@@ -75,14 +81,15 @@ subroutine set_global_variables()
       !---------------------------------------------------
       ! Check if the sparse matrix matches the full matrix
       !---------------------------------------------------
+      write(*,*) "Building sparse H matrix..."
       if (Check_Sparse_Full) then
          print*, "CHECK SPARSE FULL"
          CALL build_Hmatrix()
       else
+         ! Build CSR format directly (acsr, ja, ia are allocated inside sparse_Hmatrix)
          CALL sparse_Hmatrix()
-         ! Allocate the arrays to hold the H matrix in CSR format
-         allocate(acsr(ra%len), ja(ra%len), ia(ra%n+1))
-         CALL coo2csr(ra%n, ra%len, ra%val, ra%irow, ra%jcol, acsr, ja, ia)
+         ! No need for COO->CSR conversion anymore, it's already in CSR format!
+         write(*,*) "Sparse matrix setup complete."
       end if
       !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -91,26 +98,46 @@ end subroutine set_global_variables
 !!!#################################################################################################
 
 !!!#################################################################################################
-!!! This sets up the H Matrix directly in sparse row storage
+!!! This sets up the H Matrix directly in sparse row storage (CSR format)
+!!! Modified to build CSR directly instead of COO->CSR to save memory
 !!!#################################################################################################
    subroutine sparse_Hmatrix()
      implicit none
       real(real12) :: H0 ! Holds the value of the H matrix
-      integer(int12) :: i, j, len, count, k ! i and j are the row and column of the H matrix
+      integer(int12) :: i, j, count, k, row ! i and j are the row and column of the H matrix
+      integer(int12) :: nnz_estimate
       ! Holds the values to add to the row to get the column
       integer(int12), allocatable, dimension(:) :: addit 
-      ! The number of non-zero elements in the H matrix to look for
-      len = 7*nx*ny*nz - 2*(nx*ny + ny*nz + nz*nx)
-      if (Periodicx) len = len + 2*ny*nz
-      if (Periodicy) len = len + 2*nz*nx
-      if (Periodicz) len = len + 2*nx*ny
+      ! Temporary arrays for building each row
+      real(real12), allocatable, dimension(:) :: row_vals
+      integer(int12), allocatable, dimension(:) :: row_cols
+      integer(int12) :: row_count, max_row_size
+      
       ra%n = NA ! The number of rows in the H matrix
-      ra%len = len ! The number of non-zero elements in the H matrix
-      ! Allocate the arrays to hold the H matrix in sparse storage
-      allocate(ra%val(len), ra%irow(len), ra%jcol(len))
-      ra%val(:)=0
-      ra%irow(:)=-2
-      ra%jcol(:)=-1
+      
+      ! Estimate nonzeros (7 per interior cell, less at boundaries)
+      nnz_estimate = 7*nx*ny*nz - 2*(nx*ny + ny*nz + nz*nx)
+      if (Periodicx) nnz_estimate = nnz_estimate + 2*ny*nz
+      if (Periodicy) nnz_estimate = nnz_estimate + 2*nz*nx
+      if (Periodicz) nnz_estimate = nnz_estimate + 2*nx*ny
+      
+      ! Allocate CSR arrays with initial estimate (will grow if needed)
+      ! For 451^3: ~640M entries = 10GB, so allocate conservatively
+      write(*,'(A,I12,A)') " Estimated nonzeros: ", nnz_estimate, ""
+      allocate(acsr(nnz_estimate), ja(nnz_estimate))
+      allocate(ia(NA+1))
+      
+      ! Setup neighbor offsets
+      addit = [1] 
+      if (Periodicx) addit = [addit, (nx-1)]
+      if (ny .gt. 1) addit = [addit, nx]
+      if ((Periodicy).and.(ny .gt. 1)) addit = [addit, (ny-1)*nx]
+      if (nz .gt. 1) addit = [addit, nx*ny]
+      if ((Periodicz).and.(nz .gt. 1)) addit = [addit, (nz-1)*ny*nx]
+      
+      ! Allocate temporary row storage (max ~13 entries per row for 3D)
+      max_row_size = 2*size(addit,1) + 1
+      allocate(row_vals(max_row_size), row_cols(max_row_size))
       addit = [1] ! The values to add to the row to get the column
       if (Periodicx) addit = [addit, (nx-1)]
       if (ny .gt. 1) addit = [addit, nx] ! Add the values to add to the row to get the column
@@ -124,41 +151,131 @@ subroutine sparse_Hmatrix()
       !write(6,*) NA
       !write(6,*) "========================================="
 
-      count = 0 ! The number of non-zero elements in the H matrix
-      parent_loop: do j = 1, NA ! Loop over the columns of the H matrix
-         i=j ! The row of the H matrix
-         count = count + 1 ! The number of non-zero elements in the H matrix
-         H0 = hmatrixfunc(i,j) ! The value of the H matrix
-         ra%val(count) = H0 ! The value of the H matrix
-         ra%irow(count) = i ! The row of the H matrix
-         ra%jcol(count) = j ! The column of the H matrix
-         ! Loop over the values to add to the row to get the column
-         neighbour_loop: do k = 1, size(addit,1)
-             i = j + addit(k) ! The row of the H matrix
-             ! If the row is greater than the number of rows ...
-             !...in the H matrix then go to the next column
-             if ((i.gt.NA)) cycle parent_loop 
-             H0=hmatrixfunc(i,j) ! The value of the H matrix
-             ! If the value of the H matrix is less than TINY then go to the next value ...
-             !...to add to the row to get the column
-             if (abs(H0).lt.TINY) cycle neighbour_loop 
-             count = count + 1 ! The number of non-zero elements in the H matrix
-             ra%val(count) = H0 ! The value of the H matrix
-             ra%irow(count) = i ! The row of the H matrix
-             ra%jcol(count) = j ! The column of the H matrix
-             count = count + 1 ! The number of non-zero elements in the H matrix
-             ra%val(count) = H0 ! The value of the H matrix
-             ra%irow(count) = j ! The row of the H matrix
-             ra%jcol(count) = i ! The column of the H matrix
-             !write(6,*) j,i, H0, count
-          end do neighbour_loop
-     end do parent_loop
-
-     !write(6,*) "========================================="
-     !write(6,*) 'c len',count, len
+      
+      count = 0 ! Total nonzeros counter
+      ia(1) = 1 ! CSR row pointer (1-based for Fortran)
+      
+      ! Build CSR format row-by-row
+      write(*,'(A)') " Building CSR matrix row-by-row..."
+      parent_loop: do row = 1, NA
+         ! Progress reporting every 10%
+         if (mod(row-1, max(1,NA/10)) == 0 .and. row > 1) then
+            write(*,'(A,I3,A,I12,A)') "   Progress: ", int(100.0*real(row)/real(NA)), &
+                 "%, nnz=", count, ""
+         end if
+         
+         row_count = 0
+         
+         ! Diagonal element
+         j = row
+         row_count = row_count + 1
+         H0 = hmatrixfunc(row, j)
+         row_vals(row_count) = H0
+         row_cols(row_count) = j
+         
+         ! Off-diagonal elements (process in column-sorted order for CSR)
+         ! First pass: collect all neighbors
+         do k = 1, size(addit,1)
+            j = row + addit(k)
+            if (j > NA) cycle ! Skip if out of bounds
+            
+            H0 = hmatrixfunc(row, j)
+            if (abs(H0) >= TINY) then
+               row_count = row_count + 1
+               row_vals(row_count) = H0
+               row_cols(row_count) = j
+            end if
+         end do
+         
+         ! Second pass: collect reverse neighbors (j < row)
+         do k = 1, size(addit,1)
+            j = row - addit(k)
+            if (j < 1) cycle ! Skip if out of bounds
+            
+            H0 = hmatrixfunc(row, j)
+            if (abs(H0) >= TINY) then
+               row_count = row_count + 1
+               row_vals(row_count) = H0
+               row_cols(row_count) = j
+            end if
+         end do
+         
+         ! Sort this row's entries by column index (required for CSR)
+         call sort_row(row_vals, row_cols, row_count)
+         
+         ! Copy row data to CSR arrays (no bounds checking - we pre-allocated correctly)
+         do i = 1, row_count
+            count = count + 1
+            acsr(count) = row_vals(i)
+            ja(count) = row_cols(i)
+         end do
+         
+         ! Update row pointer
+         ia(row+1) = count + 1
+      end do parent_loop
+      
+      ra%len = count
+      
+      ! Trim arrays to actual size if we over-estimated
+      if (count < size(acsr)) then
+         write(*,'(A,I12,A,I12)') " Trimming arrays from ", size(acsr), " to ", count
+         call trim_csr_arrays(acsr, ja, count)
+      end if
+      
+      deallocate(row_vals, row_cols)
+      write(*,'(A,I12,A)') " CSR matrix built successfully. Actual nonzeros: ", count, ""
    end subroutine sparse_Hmatrix
 !!!#################################################################################################
 
+!!!#################################################################################################
+!!! Sort a row's entries by column index (simple insertion sort, rows are small)
+!!!#################################################################################################
+   subroutine sort_row(vals, cols, n)
+      implicit none
+      integer(int12), intent(in) :: n
+      real(real12), dimension(n), intent(inout) :: vals
+      integer(int12), dimension(n), intent(inout) :: cols
+      integer(int12) :: i, j, temp_col
+      real(real12) :: temp_val
+      
+      do i = 2, n
+         temp_val = vals(i)
+         temp_col = cols(i)
+         j = i - 1
+         do while (j >= 1)
+            if (cols(j) <= temp_col) exit
+            vals(j+1) = vals(j)
+            cols(j+1) = cols(j)
+            j = j - 1
+         end do
+         vals(j+1) = temp_val
+         cols(j+1) = temp_col
+      end do
+   end subroutine sort_row
+!!!#################################################################################################
+
+!!!#################################################################################################
+!!! Trim CSR arrays to exact size
+!!!#################################################################################################
+   subroutine trim_csr_arrays(acsr_arr, ja_arr, final_size)
+      implicit none
+      integer(int12), intent(in) :: final_size
+      real(real12), allocatable, dimension(:), intent(inout) :: acsr_arr
+      integer(int12), allocatable, dimension(:), intent(inout) :: ja_arr
+      real(real12), allocatable, dimension(:) :: temp_vals
+      integer(int12), allocatable, dimension(:) :: temp_cols
+      
+      allocate(temp_vals(final_size), temp_cols(final_size))
+      temp_vals = acsr_arr(1:final_size)
+      temp_cols = ja_arr(1:final_size)
+      deallocate(acsr_arr, ja_arr)
+      allocate(acsr_arr(final_size), ja_arr(final_size))
+      acsr_arr = temp_vals
+      ja_arr = temp_cols
+      deallocate(temp_vals, temp_cols)
+   end subroutine trim_csr_arrays
+!!!#################################################################################################
+
 !!!#################################################################################################
 !!! Check if the simulation will be stable. NOT FULLY IMPLEMENTED   
 !!!#################################################################################################
diff --git a/src/heatflow/mod_sparse_solver.f90 b/src/heatflow/mod_sparse_solver.f90
deleted file mode 100644
index 8f7957c..0000000
--- a/src/heatflow/mod_sparse_solver.f90
+++ /dev/null
@@ -1,420 +0,0 @@
-module sparse_solver
-
-  use mkl_pardiso
-  implicit none
-
-  private
-
-  public :: coo2csr, &
-            bicgstab, &
-            solve_pardiso
-  
-contains
-
-  subroutine coo2csr( nrow, &
-                      nnz, &
-                      a, &
-                      ir, &
-                      jc, &
-                      acsr, &
-                      ja, &
-                      ia )
-    
-    !--------------------------------------------------------------------------!
-    !! coocsr converts coo to csr.
-    !
-    !  discussion:
-    !
-    !    this routine converts a matrix that is stored in coo coordinate format
-    !    a, ir, jc into a csr row general sparse acsr, ja, ia format.
-    !
-    !  parameters:
-    !
-    !    input, integer nrow, the row dimension of the matrix.
-    !
-    !    input, integer nnz, the number of nonzero elements in the matrix.
-    !
-    ! a,
-    ! ir,
-    ! jc    = matrix in coordinate format. a(k), ir(k), jc(k) store the nnz
-    !         nonzero elements of the matrix with a(k) = actual real value of
-    !         the elements, ir(k) = its row number and jc(k) = its column
-    !        number. the order of the elements is arbitrary.
-    !
-    ! on return:
-    !
-    ! ir       is destroyed
-    !
-    !    output, real acsr(nnz), ja(nnz), ia(nrow+1), the matrix in csr
-    !    compressed sparse row format.
-    !--------------------------------------------------------------------------!
-
-    ! Arguments.
-    integer (kind=8), intent(in) :: nrow
-    integer (kind=8), intent(in) :: nnz
-    real(8), dimension(nnz), intent(in) :: a
-    integer (kind=8), dimension(nnz), intent(in) :: ir
-    integer (kind=8), dimension(nnz), intent(in) :: jc   
-    real(8), dimension(nnz), intent(out) :: acsr
-    integer(kind=8), dimension(nnz), intent(out) :: ja
-    integer(kind=8), dimension(nrow+1), intent(out) :: ia
-
-    ! Local variables.
-    integer (kind=8) :: i, iad, j, k, k0, row_start
-    integer (kind=8) :: dup_count
-    real(kind=8) :: x
-    logical :: found_dup
-    integer(kind=8), dimension(nrow+1) :: ia_save  ! Save original row starts
-    
-    ia(1:nrow+1) = 0
-    dup_count = 0
-    
-    ! determine the row lengths.
-    
-    do k = 1, nnz
-       ia(ir(k)) = ia(ir(k)) + 1
-    end do
-    
-    ! the starting position of each row.
-    
-    k = 1
-
-    do j = 1, nrow + 1
-
-       k0 = ia(j)
-       ia(j) = k
-       k = k + k0
-
-    end do
-    
-    ! Save the original row pointers
-    ia_save = ia
-    
-    ! go through the structure once more. fill in output matrix.
-    ! This version handles duplicate (i,j) entries by summing them.
-    
-    do k = 1, nnz
-
-       i = ir(k)
-       j = jc(k)
-       x = a(k)
-       
-       ! Search for existing entry in this row with same column
-       found_dup = .false.
-       row_start = ia_save(i)
-       do iad = row_start, ia(i)-1
-          if (ja(iad) == j) then
-             ! Found duplicate - sum the values
-             acsr(iad) = acsr(iad) + x
-             found_dup = .true.
-             dup_count = dup_count + 1
-             exit
-          end if
-       end do
-       
-       if (.not. found_dup) then
-          ! New entry - insert it
-          iad = ia(i)
-          acsr(iad) = x
-          ja(iad) = j
-          ia(i) = iad + 1
-       end if
-
-    end do
-    
-    if (dup_count > 0) then
-       write(*,'(A,I0,A)') 'WARNING: COO->CSR found and summed ', dup_count, ' duplicate entries'
-    end if
-    
-    ! shift back ia.
-    
-    do j = nrow, 1, -1
-       ia(j+1) = ia(j)
-    end do
-
-    ia(1) = 1
-    
-    return
-    
-  end subroutine coo2csr
-  
-  !-------------------------------------------------------------------
-  !	BiConjugate Gradient (Stabilised) Method
-  !-------------------------------------------------------------------
-  
-  subroutine bicgstab( acsr, &
-                       ia, &
-                       ja, &
-                       b, &
-                       maxiter, &
-                       initGuess, &
-                       x, &
-                       iter )
-
-    ! Arguments
-    real(8), dimension(:), intent(in) :: acsr
-    integer (kind=8), dimension(:), intent(in) :: ia
-    integer (kind=8), dimension(:), intent(in) :: ja
-    real(8), dimension(:), intent(in) :: b
-    integer, intent(in) :: maxiter
-    real(8), dimension(:), intent(in) :: initGuess
-    real(8), dimension(:), allocatable, intent(out) :: x
-    integer, intent(out) :: iter
-
-    ! Local variables
-    integer :: i, j, k, n
-    real(8), parameter :: cc = 1.0e-9
-    real(8) :: alpha,beta,delta0,delta,delta_old,omega
-    real(8), dimension(:), allocatable :: r, p, s, rst, temp1, temp2
-    
-    n = size(b,1)
-    allocate(x(n))
-    allocate(r(n))
-    allocate(p(n))
-    allocate(s(n))
-    allocate(rst(n))
-    allocate(temp1(n))
-    allocate(temp2(n))
-    
-    call mkl_dcsrgemv("N",n,acsr,ia,ja,x,temp1)
-    
-    print *, "Initial residual norm: ", norm2(b - temp1)
-    r = b - temp1
-
-    call random_number(rst)
-    
-    p = r
-
-    delta = dot_product(rst,r)
-
-    write(*,'(a,1x,f15.3)') "Starting delta: ", delta
-    
-    delta0 = delta
-
-    do i = 1, maxiter
-       print *, "Iteration ", i
-       if ( norm2(r) /= norm2(r) ) then
-          write(*,'(a)') "Error in solver: residual NaN"
-          exit
-       end if
-
-       if(mod(i,1000).eq.0) then
-          write(*,'(a,1x,i6)') 'Iteration number: ',i
-          write(*,'(a,1x,f15.3)') "Residual ratio: ", norm2(r)/cc
-       end if
-       
-       call mkl_dcsrgemv("N",n,acsr,ia,ja,p,temp1)	! temp1=A*p
-       
-       alpha = delta/dot_product(rst,temp1)
-       s = r - alpha*temp1
-       
-       call mkl_dcsrgemv("N",n,acsr,ia,ja,s,temp2)	! temp2=A*s
-       
-       omega = dot_product(s,temp2)/dot_product(temp2,temp2)
-       x = x + alpha*p + omega*s
-       r = s - omega*temp2
-       delta_old = delta
-       delta = dot_product(rst,r)
-       beta = (delta/delta_old)*(alpha/omega)
-       p = r + beta*(p - omega*temp1)
-
-       if(norm2(r) .lt. cc) then
-          iter = i
-          return
-       end if
-       
-       if(i.eq.maxiter) then
-          write(*,'(a)') "Maximum iterations reached."
-          write(*,'(a)') "Convergence not achieved."
-          write(*,'(a,1x,f15.3)') "Norm of residual: ", norm2(r)
-          write(*,'(a,1x,f15.3)') "Convergence criterion: ", cc
-          if((norm2(r)/cc) .lt. 2.d0) then
-             write(*,'(a)') "The residual is within a small",&
-                  "range of the convergence criterion."
-             write(*,'(a)') "Perhaps increasing iteration ",	&
-                  "count may help."
-          end if
-       end if
-       
-    end do
-    
-  end subroutine bicgstab
-  
-  !-------------------------------------------------------------------
-  !	END BiConjugate Gradient (Stabilised) Method
-  !-------------------------------------------------------------------
-  
-  !-------------------------------------------------------------------
-  !	PARDISO Direct Solver
-  !-------------------------------------------------------------------
-  
-  subroutine solve_pardiso( acsr, &
-                            b, &
-                            ia, &
-                            ja, &
-                            x )
-    
-    use mkl_pardiso
-
-    ! Arguments
-    real(8), dimension(:), intent(in) :: acsr
-    real(8), dimension(:), intent(inout) :: b
-    integer,dimension(:), intent(in) :: ia
-    integer,dimension(:), intent(in) :: ja
-    real(8), dimension(:), allocatable, intent(out) :: x
-
-    ! Local variables
-    type(mkl_pardiso_handle), dimension(:), allocatable  :: pt
-    integer :: i,maxfct,mnum,mtype,phase,n,nrhs,error,msglvl,nnz,error1
-    integer, dimension(:), allocatable :: iparm  
-    integer,dimension(1) :: idum
-    real(8),dimension(1) :: ddum
-        integer :: badcol, missing_diag, k
-    logical :: found
-    n = size(b,1)
-    nnz = size(acsr,1)
-    nrhs = 1
-    maxfct = 1
-    mnum = 1
-    
-    if (.not.(allocated(x))) allocate(x(n))
-
-
-
-    allocate(iparm(64))		!set up pardiso control parameter
-    
-    do i=1,64
-       iparm(i) = 0
-    end do
-    
-    iparm(1) = 1 ! no solver default
-    iparm(2) = 2 ! fill-in reordering from metis
-    iparm(4) = 0 ! no iterative-direct algorithm
-    iparm(5) = 0 ! no user fill-in reducing permutation
-    iparm(6) = 0 ! =0 solution on the first n compoments of x
-    iparm(8) = 2 ! numbers of iterative refinement steps
-    iparm(10) = 13 ! perturbe the pivot elements with 1e-13
-    iparm(11) = 1 ! use nonsymmetric permutation and scaling mps
-    iparm(13) = 0 ! maximum weighted matching algorithm is
-    !switched-off (default for symmetric).
-    ! try iparm(13) = 1 in case of inaccuracy
-    iparm(14) = 0 ! output: number of perturbed pivots
-    iparm(18) = -1 ! output: number of nonzeros in the factor lu
-    iparm(19) = -1 ! output: mflops for lu factorization
-    iparm(20) = 0 ! output: numbers of cg iterations
-    
-    error  = 0 ! initialize error flag
-    msglvl = 0 ! 0=no output, 1=print statistical information
-    mtype  = 11 ! real and unsymmetric matrix
-    
-    ! Initiliaze the internal solver memory pointer.
-    ! This is only necessary for the first call of the solver.
-    
-    allocate (pt(64))
-    do i=1,64
-       pt(i)%dummy =  0
-    end do
-
-    !---------------- CSR integrity / diagnostic checks ----------------
-
-    badcol = 0
-
-    write(*,*) 'PARDISO debug:'
-    write(*,*) ' n       =', n
-    write(*,*) ' nnz     =', nnz
-    write(*,*) ' ia(1)   =', ia(1), ' ia(n+1)=', ia(n+1), ' ia(n+1)-1=', ia(n+1)-1
-
-    if (ia(1) /= 1) stop 'ERROR: ia(1) must be 1'
-    if (ia(n+1)-1 /= nnz) stop 'ERROR: ia end mismatch'
-
-    do i=1,n
-       if (ia(i) > ia(i+1)) then
-          write(*,*) 'Row pointer decreases at row', i
-          stop 'ERROR: ia not monotone'
-       end if
-    end do
-
-    do k=1,nnz
-       if (ja(k) < 1 .or. ja(k) > n) then
-          badcol = badcol + 1
-          if (badcol <= 10) write(*,*) 'Bad column index k=',k,' ja=',ja(k)
-       end if
-    end do
-    if (badcol > 0) then
-       write(*,*) 'Total bad columns =', badcol
-       stop 'ERROR: invalid ja entries'
-    end if
-
-    ! Check each row has a diagonal and (optionally) detect duplicates
-    missing_diag = 0
-    do i=1,n
-       found = .false.
-       if (ia(i) < ia(i+1)) then
-          ! simple duplicate check (requires row segment unsorted ascending to be meaningful)
-          do k = ia(i), ia(i+1)-1
-             if (ja(k) == i) then
-                if (acsr(k) == 0.0d0) then
-                   write(*,*) 'Zero diagonal at row', i
-                   stop 'ERROR: zero diagonal'
-                end if
-                found = .true.
-             end if
-          end do
-       end if
-       if (.not. found) then
-          missing_diag = missing_diag + 1
-          if (missing_diag <= 10) write(*,*) 'Missing diagonal at row', i
-       end if
-    end do
-    if (missing_diag > 0) stop 'ERROR: missing diagonals'
-    !-------------------------------------------------------------------
-    phase = 11 ! Only reordering and symbolic factorization
-    
-    call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, &
-         idum, nrhs, iparm, msglvl, ddum, ddum, error)
-    
-    if (error /= 0) then
-       write(*,*) 'the following error was detected: ', error
-       goto 1000
-    end if
-    
-    phase = 22 ! only factorization
-    call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, &
-         idum, nrhs, iparm, msglvl, ddum, ddum, error)
-    if (error /= 0) then
-       write(*,*) 'the following error was detected: ', error
-       goto 1000
-    endif
-    
-    ! back substitution and iterative refinement
-    iparm(8) = 2 ! max numbers of iterative refinement steps
-    phase = 33 ! only solving
-    call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, &
-         idum, nrhs, iparm, msglvl, b, x, error)
-    write(*,*) 'solve completed ... '
-    if (error /= 0) then
-       write(*,*) 'the following error was detected: ', error
-       goto 1000
-    endif
-    
-1000 continue
-    ! termination and release of memory
-    phase = -1 ! release internal memory
-    call pardiso (pt,maxfct,mnum,mtype,phase,n,ddum,idum,idum, 	&
-         idum, nrhs, iparm, msglvl, ddum, ddum, error1)
-    
-    if (error1 /= 0) then
-       write(*,*) 'the following release error was detected: ',	&
-            error1
-       stop 1
-    endif
-    
-    if ( error /= 0 ) stop 1
-    
-  end subroutine solve_pardiso
-  
-  !-------------------------------------------------------------------
-  !	END PARDISO Direct Solver
-  !-------------------------------------------------------------------
-  
-end module sparse_solver
diff --git a/src/heatflow/mod_tempdep.f90 b/src/heatflow/mod_tempdep.f90
old mode 100755
new mode 100644

From 83c6bfe6bceb20c7e0fbc6554e88cd52413ac419 Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Tue, 2 Dec 2025 21:24:38 +0000
Subject: [PATCH 23/28] Change blas and lapack to work with threading

---
 Makefile | 45 +++++++++++++++------------------------------
 1 file changed, 15 insertions(+), 30 deletions(-)

diff --git a/Makefile b/Makefile
index 840a5a8..43c8a8b 100644
--- a/Makefile
+++ b/Makefile
@@ -9,34 +9,22 @@ SRC_DIR      := ./src
 BUILD_DIR    := ./obj
 BIN_DIR      := ./bin
 
-# Compiler
+# Compiler (gfortran with OpenMP for threading)
 FC           := gfortran
 
 # Core count
 NCORES       := $(shell nproc)
 
-# MKL
-MKLROOT      ?= /opt/intel/oneapi/mkl/latest
-MKL_LIB_DIR  := $(MKLROOT)/lib/intel64
-MKL_INCLUDE  := $(MKLROOT)/include
-MKL_FLAGS    := -L$(MKL_LIB_DIR) -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl
-
-# PETSc (manual fallback if petsc-config missing)
-PETSC_PREFIX    := /usr/lib/petscdir/petsc3.15/x86_64-linux-gnu-real
-PETSC_FINCLUDE  := /usr/share/petsc/3.15/include
-PETSC_AINCLUDE  := $(PETSC_PREFIX)/include
-PETSC_LIBDIR    := $(PETSC_PREFIX)/lib
-
-PETSC_CONFIG := $(shell command -v petsc-config 2>/dev/null)
-ifeq ($(PETSC_CONFIG),)
-  PETSC_INC  := -I$(PETSC_FINCLUDE) -I$(PETSC_AINCLUDE)
-  PETSC_LIB  := -L$(PETSC_LIBDIR) -lpetsc
-  PETSC_NOTE := (PETSc manual paths)
-else
-  PETSC_INC  := $(shell petsc-config --cflags)
-  PETSC_LIB  := $(shell petsc-config --libs)
-  PETSC_NOTE := (petsc-config)
-endif
+# Detect conda environment for BLAS/LAPACK (fallback if no system libs)
+CONDA_PREFIX ?= $(shell conda info --base 2>/dev/null || echo /home/hm556/miniforge3)
+
+# PETSc (system installation)
+PETSC_INC  := -I/usr/share/petsc/3.15/include -I/usr/lib/petscdir/petsc3.15/x86_64-linux-gnu-real/include
+PETSC_LIB  := -L/usr/lib/petscdir/petsc3.15/x86_64-linux-gnu-real/lib -lpetsc -Wl,-rpath,/usr/lib/petscdir/petsc3.15/x86_64-linux-gnu-real/lib
+PETSC_NOTE := (system PETSc 3.15)
+
+# Use OpenBLAS for multi-threaded BLAS/LAPACK (better than reference BLAS/ATLAS)
+BLAS_FLAGS := -lopenblas -lgomp -lpthread -lm
 
 # Flags
 OPTFLAGS    := -O3
@@ -44,8 +32,8 @@ OMPFLAGS    := -fopenmp
 WARNFLAGS   := -Wall
 MODDIR_FLAG := -J$(BUILD_DIR)
 
-FFLAGS      := -cpp $(OPTFLAGS) $(OMPFLAGS) $(WARNFLAGS) -I$(MKL_INCLUDE) $(PETSC_INC) $(MODDIR_FLAG)
-DEBUGFLAGS  := -cpp -O0 -g -fcheck=all -fbacktrace -ffpe-trap=invalid,zero,overflow,underflow -fbounds-check -I$(MKL_INCLUDE) $(PETSC_INC) $(MODDIR_FLAG)
+FFLAGS      := -cpp $(OPTFLAGS) $(OMPFLAGS) $(WARNFLAGS) $(PETSC_INC) $(MODDIR_FLAG)
+DEBUGFLAGS  := -cpp -O0 -g -fcheck=all -fbacktrace -ffpe-trap=invalid,zero,overflow,underflow -fbounds-check $(PETSC_INC) $(MODDIR_FLAG)
 
 # Program
 NAME    := ThermalFlow.x
@@ -62,8 +50,6 @@ SRCS := \
   heatflow/mod_material.f90 \
   heatflow/mod_hmatrix.f90 \
   heatflow/mod_init_evolve.f90 \
-  heatflow/mkl_pardiso.f90 \
-  heatflow/mod_sparse_solver.f90 \
   heatflow/mod_petsc_solver.f90 \
   heatflow/mod_boundary.f90 \
   heatflow/mod_heating.f90 \
@@ -96,15 +82,14 @@ $(BUILD_DIR)/heatflow.o: $(SRC_DIR)/heatflow.f90 | $(BUILD_DIR)
 
 # Link (single definition)
 $(TARGET): $(BIN_DIR) $(OBJS)
-	$(FC) $(OPTFLAGS) $(OMPFLAGS) $(OBJS) -o $@ $(MKL_FLAGS) $(PETSC_LIB) -Wl,-rpath,$(PETSC_LIBDIR)
+	$(FC) $(OPTFLAGS) $(OMPFLAGS) $(OBJS) -o $@ $(BLAS_FLAGS) $(PETSC_LIB)
 
 debug: FFLAGS = $(DEBUGFLAGS)
 debug: clean show $(TARGET)
 
 run: $(TARGET)
 	OMP_NUM_THREADS=$(NCORES) \
-	MKL_NUM_THREADS=$(NCORES) \
-	MKL_DYNAMIC=FALSE \
+	OPENBLAS_NUM_THREADS=$(NCORES) \
 	OMP_PROC_BIND=spread \
 	OMP_PLACES=cores \
 	$< $(RUN_ARGS)

From 2799327a49bb1a95ad9a8cfadb54b9ea75338794 Mon Sep 17 00:00:00 2001
From: Your Name <hm556@exeter.ac.uk>
Date: Tue, 2 Dec 2025 21:25:12 +0000
Subject: [PATCH 24/28] Add ability to change preconditioner and add AMG

---
 src/heatflow/mod_petsc_solver.f90 | 41 ++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90
index f5e37b7..54161af 100644
--- a/src/heatflow/mod_petsc_solver.f90
+++ b/src/heatflow/mod_petsc_solver.f90
@@ -7,6 +7,14 @@ module petsc_solver
   private
   public :: petsc_init, petsc_finalize, solve_petsc_csr, petsc_cleanup
 
+  ! ===== PRECONDITIONER SELECTION =====
+  ! Change this to switch between preconditioners:
+  ! 'GAMG' = Algebraic Multigrid (best for elliptic PDEs, 10-20x faster)
+  ! 'ILU'  = Incomplete LU (good general purpose, robust)
+  ! 'LU'   = Direct solver (most robust, uses more memory)
+  character(len=10), parameter :: PRECONDITIONER = 'GAMG'  ! <-- Change here!
+  ! ====================================
+
   ! Persistent PETSc objects (reused across timesteps for memory efficiency)
   Mat, save :: A_saved = PETSC_NULL_MAT
   Vec, save :: bb_saved = PETSC_NULL_VEC
@@ -116,10 +124,35 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
       call KSPCreate(PETSC_COMM_SELF, ksp_saved, ierr)
       call KSPSetOperators(ksp_saved, A_saved, A_saved, ierr)
       call KSPGetPC(ksp_saved, pc, ierr)
-      ! Use ILU preconditioner for better conditioning (especially for high conductivity materials)
-      call PCSetType(pc, PCILU, ierr)           ! ILU preconditioner (better than Jacobi)
-      ! call PCSetType(pc, PCJACOBI, ierr)        ! Jacobi preconditioner (simple) 
-      call KSPSetType(ksp_saved, KSPBCGS, ierr) ! BiCGSTAB solver
+      
+      ! Select preconditioner based on parameter at top of module
+      select case (trim(PRECONDITIONER))
+        case ('GAMG')
+          ! Algebraic Multigrid - Best for elliptic PDEs with varying coefficients
+          ! Optimal O(1) iterations, 10-20x faster than ILU for large problems
+          call PCSetType(pc, PCGAMG, ierr)
+          call KSPSetType(ksp_saved, KSPGMRES, ierr)  ! GMRES works well with AMG
+          write(*,'(A)') ' [Solver] Using GAMG (Algebraic Multigrid) preconditioner with GMRES'
+          
+        case ('ILU')
+          ! Incomplete LU - Good general purpose, robust
+          call PCSetType(pc, PCILU, ierr)
+          call KSPSetType(ksp_saved, KSPBCGS, ierr)   ! BiCGSTAB works well with ILU
+          write(*,'(A)') ' [Solver] Using ILU preconditioner with BiCGSTAB'
+          
+        case ('LU')
+          ! Direct LU - Most robust, more memory intensive
+          call PCSetType(pc, PCLU, ierr)
+          call KSPSetType(ksp_saved, KSPPREONLY, ierr) ! Direct solve
+          write(*,'(A)') ' [Solver] Using direct LU solver'
+          
+        case default
+          write(*,'(A,A)') ' [Warning] Unknown preconditioner: ', trim(PRECONDITIONER)
+          write(*,'(A)') '           Defaulting to ILU'
+          call PCSetType(pc, PCILU, ierr)
+          call KSPSetType(ksp_saved, KSPBCGS, ierr)
+      end select
+      
       call KSPSetTolerances(ksp_saved, rtol, PETSC_DEFAULT_REAL, &
                            PETSC_DEFAULT_REAL, maxit, ierr)
       call KSPSetNormType(ksp_saved, KSP_NORM_UNPRECONDITIONED, ierr)

From a1137b5ee630f4c7ae028f58680c8405026689f0 Mon Sep 17 00:00:00 2001
From: FHDavies <f.h.davies2@exeter.ac.uk>
Date: Thu, 15 Jan 2026 13:18:42 +0000
Subject: [PATCH 25/28] Code keeps filling up my computer! I've implemented a
 compressed binary output transiant output

Added a new input flag _CompressedOutput to param.in to enable binary 'stream' output in mod_output.f90. Also added a USER_MANUAL.md to docs/.
---
 docs/USER_MANUAL.md               | 140 ++++++++++++++++++++++++++++++
 src/heatflow/mod_inputs.f90       |  11 ++-
 src/heatflow/mod_output.f90       |  55 ++++++++----
 src/heatflow/mod_petsc_solver.f90 |   2 +-
 4 files changed, 186 insertions(+), 22 deletions(-)
 create mode 100644 docs/USER_MANUAL.md

diff --git a/docs/USER_MANUAL.md b/docs/USER_MANUAL.md
new file mode 100644
index 0000000..c622bc9
--- /dev/null
+++ b/docs/USER_MANUAL.md
@@ -0,0 +1,140 @@
+# HeatFlow User Manual
+
+This manual provides a concise guide to configuring and running simulations using the **HeatFlow** software. The software simulates heat transport using finite difference methods, primarily focusing on the Cattaneo (hyperbolic heat equation) and Fourier models.
+
+## Input Files
+
+The simulation is controlled by three main input files located in the `inputs/` directory:
+1.  **`param.in`**: Simulation parameters (time steps, flags, boundary conditions).
+2.  **`mat.in`**: Material properties.
+3.  **`system.in`**: Geometry and grid definition.
+
+### 1. `param.in` (Simulation Parameters)
+
+This file uses a `KEYWORD = VALUE` format. Comments can be added using `!`.
+
+#### General Settings
+| Keyword | Type | Default | Description |
+| :--- | :--- | :--- | :--- |
+| `_RunName` | String | `default` | Name of the simulation run. |
+| `IVERB` | Integer | `1` | Verbosity level (higher = more output). |
+| `ntime` | Integer | `10` | Total number of time steps. |
+| `time_step` | Double | `1.0` | Time step size. |
+| `freq` | Double | `1.0` | Frequency of the heater. |
+| `icattaneo` | Integer | `1` | Switch for Cattaneo term (`1` = On, `0` = Off/Fourier). |
+| `isteady` | Integer | `0` | Steady state switch (`1` = Steady state, `0` = Transient). |
+| `heattime` | Integer | `0` | Number of steps for which heating is applied (case 2). |
+| `TempDepProp`| Integer | `0` | Flag for temperature dependent properties. |
+
+#### Boundary & Conditions
+| Keyword | Type | Default | Description |
+| :--- | :--- | :--- | :--- |
+| `iboundary` | Integer | `1` | Boundary condition type. |
+| `Periodic` | String | `''` | Periodic boundaries. Contains 'x', 'y', or 'z' (e.g., `'xy'`). |
+| `kappaBound` | Double | `0.0` | Global boundary thermal conductivity (sets all planes). |
+| `kappaBoundx1`...`z2` | Double | `0.0` | Specific boundary conductivity (e.g., `kappaBoundx1` for x=1 plane). |
+| `T_System` | Double | `300.0` | Initial system temperature. |
+| `T_Bath` | Double | - | Global bath temperature (sets all boundaries boundaries). |
+| `T_Bathx1`...`z2` | Double | `T_Bath` | Specific boundary temperatures. |
+| `T_BathCG` | Double | `0.0` | Constant gradient bath temperature. |
+| `CG_dir` | String | `' '` | Direction for constant gradient (e.g., `'+x'`, `'-y'`). |
+| `T_BathCC` | Logical| `F` | Scale constant gradient with DeltaT. |
+| `BR` | Double | `1.0` | Bath Ratio (scaling factor). |
+
+#### Power
+| Keyword | Type | Default | Description |
+| :--- | :--- | :--- | :--- |
+| `power_in` | Double | `0.0` | Power input for the heater. |
+
+#### Flags (Logical)
+All flags default to `.False.`. Set to `.True.` (or `T`) to enable.
+- `_Check_Sparse_Full`: Check if simulation is sparse or full.
+- `_Check_Stability`: Perform stability check.
+- `_Check_Steady_State`: Check for steady state convergence.
+- `_WriteToTxt`: Enable writing output to text files.
+- `_Percentage_Completion`: Show progress % in output.
+- `_Test_Run`: Flag for test runs.
+- `_InputTempDis`: Load initial temperature distribution from file.
+- `_FullRestart`: Perform a full restart.
+
+#### Output Control
+Defines the region of the grid to write to output.
+| Keyword | Type | Default | Description |
+| :--- | :--- | :--- | :--- |
+| `write_every` | Integer | `1` | Write output every N steps. |
+| `start_ix`, `end_ix` | Integer | `1`..`Nx` | X-range for output. |
+| `start_iy`, `end_iy` | Integer | `1`..`Ny` | Y-range for output. |
+| `start_iz`, `end_iz` | Integer | `1`..`Nz` | Z-range for output. |
+
+---
+
+### 2. `mat.in` (Material Properties)
+
+Defines the physical properties for each material index used in the system. The file ends with a line containing `0`.
+
+**Format:**
+```
+<Material_Index>
+keyword = value
+...
+0
+```
+
+| Keyword | Description |
+| :--- | :--- |
+| `heat_capacity` | Specific heat capacity. |
+| `kappa` | Thermal conductivity. |
+| `rho` | Density. |
+| `tau` | Relaxation time (for Cattaneo). |
+| `em` | Emissivity / Parameter (usage depends on physics context). |
+| `vel` | Velocity vector (3 components, e.g., `vel = 1.0 0.0 0.0`). |
+
+**Example:**
+```
+1
+heat_capacity = 4200
+kappa = 0.541
+rho = 997
+tau = 1e-12
+vel = 0.0 0.0 0.0
+0
+```
+
+---
+
+### 3. `system.in` (Geometry/Mesh)
+
+Defines the simulation grid and the material distribution.
+
+**Structure:**
+1.  **Grid Dimensions**: `nx ny nz`
+2.  **Physical Dimensions**: `Lx Ly Lz`
+3.  **Grid Data**: A list of `MaterialID:HeaterID` for every cell.
+
+The file is read in the order: Z-planes, then Y-rows, then X-columns.
+Each line in the file (after header) corresponds to one row (X-direction).
+
+**Example:**
+```
+10 10 1
+0.01 0.01 0.001
+
+! Z=1, Y=1 Row
+1:0 1:0 1:0 1:0 1:0 1:0 1:0 1:0 1:0 1:0
+! Z=1, Y=2 Row
+1:0 1:0 ...
+```
+- `1:0` means Material ID 1, Heater ID 0 (no heater).
+- `1:1` means Material ID 1, Heater ID 1 (active heater).
+
+## Execution
+
+Ensure the `inputs/` directory exists with the three required files. Run the executable from the directory containing `inputs/`.
+
+```bash
+./ThermalFlow.x
+```
+or via `fpm`:
+```bash
+fpm run --profile release
+```
diff --git a/src/heatflow/mod_inputs.f90 b/src/heatflow/mod_inputs.f90
index 553e925..d26f629 100644
--- a/src/heatflow/mod_inputs.f90
+++ b/src/heatflow/mod_inputs.f90
@@ -91,7 +91,7 @@ module inputs
   integer(int12) :: start_ix, end_ix, start_iy, end_iy, start_iz, end_iz, TempDepProp, heated_steps
   ! flags
   logical :: Check_Sparse_Full, Check_Stability, Check_Steady_State
-  logical :: WriteToTxt, LPercentage, InputTempDis
+  logical :: WriteToTxt, LPercentage, InputTempDis, CompressedOutput
   logical ::  Test_Run = .FALSE., FullRestart = .FALSE.
 
   ! Name of simiulation run
@@ -204,7 +204,7 @@ end subroutine read_all_files
   subroutine read_param(unit)
     implicit none
     integer:: unit, Reason
-    integer,dimension(45)::readvar
+    integer,dimension(46)::readvar
     character(1024)::buffer
 
     readvar(:)=0
@@ -221,6 +221,7 @@ subroutine read_param(unit)
     RunName = 'default'
     RunName = trim(adjustl(RunName))
     WriteToTxt = .FALSE.
+    CompressedOutput = .FALSE.
     ntime = 10
     heated_steps = 0
     write_every = 1
@@ -325,6 +326,7 @@ subroutine read_param(unit)
        CALL assignD(buffer,"BR",BR,readvar(43))
        CALL assignL(buffer,"T_BathCC",T_BathCC,readvar(44))
        CALL assignS(buffer,"CG_dir",CG_dir,readvar(45))
+       CALL assignL(buffer,"_CompressedOutput",CompressedOutput,readvar(46))
        !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
     end do
@@ -520,6 +522,10 @@ subroutine check_param(readvar,n)
        readvar(39) = 1
     end if
 
+    if (readvar(46) .eq. 0) then
+       readvar(46) = 1
+    end if
+
     if (any(readvar.eq.0)) then
        write(6,*)
        write(6,'(A43)') '###############################'
@@ -554,6 +560,7 @@ subroutine check_param(readvar,n)
        write(6,'(A35,L1)')      '  _FullRestart       = ', FullRestart
        write(6,'(A35,A)')       '  _RunName           = ', trim(RunName)
        write(6,'(A35,L1)')      '  _WriteToTxt        = ', WriteToTxt
+       write(6,'(A35,L1)')      '  _CompressedOutput  = ', CompressedOutput
        write(6,'(A35,I12)')     '   ntime       = ', ntime
        write(6,'(A35,I12)')     '   heattime    = ', heated_steps
        write(6,'(A35,I12)')     '   write_every = ', write_every
diff --git a/src/heatflow/mod_output.f90 b/src/heatflow/mod_output.f90
index 0235e4e..c518761 100644
--- a/src/heatflow/mod_output.f90
+++ b/src/heatflow/mod_output.f90
@@ -44,7 +44,7 @@
 module output
   use constants, only: real12, int12, TINY, fields
   use inputs, only: nx,ny,nz, time_step, grid, NA, Check_Steady_State, ntime, WriteToTxt
-  use inputs, only: Test_Run, freq, RunName, FullRestart, IVERB, write_every
+  use inputs, only: Test_Run, freq, RunName, FullRestart, IVERB, write_every, CompressedOutput
   use inputs, only: start_ix, end_ix, start_iy, end_iy, start_iz, end_iz
   use globe_data, only: Temp_p,Temp_pp, heat, heated_volume, logname
   implicit none
@@ -92,11 +92,17 @@ subroutine data_write(itime)
           ! find most recent log file and open it
           !---------------------------------------
           CALL last_log(logname,outdir)
-          open(logunit,file=logname)
-         !  print*, logunit
-         !  print*, logname
-          write(logunit,*) real((itime-1)*(time_step)), &
-               (Temp_cur(start_ix:end_ix, start_iy:end_iy, start_iz:end_iz))
+          if (CompressedOutput) then
+             open(logunit,file=logname, status='unknown', access='stream', position='append')
+             write(logunit) real((itime-1)*(time_step), kind=real12)
+             write(logunit) (Temp_cur(start_ix:end_ix, start_iy:end_iy, start_iz:end_iz))
+          else
+             open(logunit,file=logname)
+            !  print*, logunit
+            !  print*, logname
+             write(logunit,*) real((itime-1)*(time_step)), &
+                  (Temp_cur(start_ix:end_ix, start_iy:end_iy, start_iz:end_iz))
+          end if
           close(logunit)
           !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        end if
@@ -110,17 +116,23 @@ subroutine data_write(itime)
     ! write out to log file
     !---------------------------------------
     if (.not. Test_run) then
-      if (WriteToTxt) then
-         if (mod(itime, write_every) .eq. 0) then
-            write(*, *) 'Writing Temperature difference to file'
-            ! print*, logunit
-            ! print*, logname
-            open(logunit,file=logname, status='old', position='append')
-            write(logunit,*) real((itime-1)*(time_step)), &
-               (Temp_cur(start_ix:end_ix, start_iy:end_iy, start_iz:end_iz))
-            close(logunit)
-         end if
-      endif
+       if (mod(itime, write_every) .eq. 0) then
+          if (CompressedOutput) then
+             write(*, *) 'Writing Temperature (Compressed) to file'
+             open(logunit,file=logname, status='unknown', access='stream', position='append')
+             write(logunit) real((itime-1)*(time_step), kind=real12)
+             write(logunit) (Temp_cur(start_ix:end_ix, start_iy:end_iy, start_iz:end_iz))
+             close(logunit)
+          elseif (WriteToTxt) then
+             write(*, *) 'Writing Temperature difference to file'
+             ! print*, logunit
+             ! print*, logname
+             open(logunit,file=logname, status='old', position='append')
+             write(logunit,*) real((itime-1)*(time_step)), &
+                (Temp_cur(start_ix:end_ix, start_iy:end_iy, start_iz:end_iz))
+             close(logunit)
+          end if
+       end if
     end if
     !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -202,8 +214,13 @@ subroutine last_log(logname,outdir)
    i = 0
    flag=.true.
    do while (flag)
-      write(logname, '(A,A,I2.2)') trim(adjustl(outdir)) // 'output_' // &
-           trim(adjustl(RunName)),'_',  i
+      if (CompressedOutput) then
+         write(logname, '(A,A,I2.2,A)') trim(adjustl(outdir)) // 'output_' // &
+              trim(adjustl(RunName)),'_',  i, '.bin'
+      else
+         write(logname, '(A,A,I2.2)') trim(adjustl(outdir)) // 'output_' // &
+              trim(adjustl(RunName)),'_',  i
+      endif
       inquire(file=logname, exist=flag)
       i = i+1
    end do
diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90
index 54161af..a65e01d 100644
--- a/src/heatflow/mod_petsc_solver.f90
+++ b/src/heatflow/mod_petsc_solver.f90
@@ -12,7 +12,7 @@ module petsc_solver
   ! 'GAMG' = Algebraic Multigrid (best for elliptic PDEs, 10-20x faster)
   ! 'ILU'  = Incomplete LU (good general purpose, robust)
   ! 'LU'   = Direct solver (most robust, uses more memory)
-  character(len=10), parameter :: PRECONDITIONER = 'GAMG'  ! <-- Change here!
+  character(len=10), parameter :: PRECONDITIONER = 'ILU'  ! <-- Change here!
   ! ====================================
 
   ! Persistent PETSc objects (reused across timesteps for memory efficiency)

From c6cf9297d21a1163eb47349e5fae93ec68aeccd3 Mon Sep 17 00:00:00 2001
From: HarryMclean <hm556@exeter.ac.uk>
Date: Thu, 26 Mar 2026 19:02:06 +0000
Subject: [PATCH 26/28] Make PETsc dynamically discovered

---
 Makefile | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 43c8a8b..6a0f200 100644
--- a/Makefile
+++ b/Makefile
@@ -18,10 +18,34 @@ NCORES       := $(shell nproc)
 # Detect conda environment for BLAS/LAPACK (fallback if no system libs)
 CONDA_PREFIX ?= $(shell conda info --base 2>/dev/null || echo /home/hm556/miniforge3)
 
-# PETSc (system installation)
+# PETSc (discover dynamically when possible)
+PKG_CONFIG ?= pkg-config
+PETSC_PKG_CFLAGS := $(shell $(PKG_CONFIG) --cflags petsc 2>/dev/null || $(PKG_CONFIG) --cflags PETSc 2>/dev/null)
+PETSC_PKG_LIBS   := $(shell $(PKG_CONFIG) --libs petsc 2>/dev/null || $(PKG_CONFIG) --libs PETSc 2>/dev/null)
+
+ifdef PETSC_DIR
+PETSC_DIR_INC := -I$(PETSC_DIR)/include
+ifdef PETSC_ARCH
+PETSC_DIR_INC += -I$(PETSC_DIR)/$(PETSC_ARCH)/include
+PETSC_DIR_LIB := -L$(PETSC_DIR)/$(PETSC_ARCH)/lib -Wl,-rpath,$(PETSC_DIR)/$(PETSC_ARCH)/lib
+endif
+endif
+
+ifeq ($(strip $(PETSC_PKG_CFLAGS)),)
+PETSC_INC  := $(PETSC_DIR_INC)
+PETSC_LIB  := $(PETSC_DIR_LIB) -lpetsc
+PETSC_NOTE := (PETSc from PETSC_DIR/PETSC_ARCH)
+else
+PETSC_INC  := $(PETSC_PKG_CFLAGS)
+PETSC_LIB  := $(PETSC_PKG_LIBS)
+PETSC_NOTE := (PETSc via pkg-config)
+endif
+
+ifeq ($(strip $(PETSC_INC)),)
 PETSC_INC  := -I/usr/share/petsc/3.15/include -I/usr/lib/petscdir/petsc3.15/x86_64-linux-gnu-real/include
 PETSC_LIB  := -L/usr/lib/petscdir/petsc3.15/x86_64-linux-gnu-real/lib -lpetsc -Wl,-rpath,/usr/lib/petscdir/petsc3.15/x86_64-linux-gnu-real/lib
-PETSC_NOTE := (system PETSc 3.15)
+PETSC_NOTE := (legacy PETSc 3.15 fallback)
+endif
 
 # Use OpenBLAS for multi-threaded BLAS/LAPACK (better than reference BLAS/ATLAS)
 BLAS_FLAGS := -lopenblas -lgomp -lpthread -lm

From d922e0eca25724e6f604fa3f6052d3d064a7cca1 Mon Sep 17 00:00:00 2001
From: HarryMclean <hm556@exeter.ac.uk>
Date: Thu, 26 Mar 2026 20:19:21 +0000
Subject: [PATCH 27/28] Implement MPI solver run with "OMP_NUM_THREADS=1
 OPENBLAS_NUM_THREADS=1 mpiexec -n 2 ../HeatFlow/bin/ThermalFlow.x"

---
 CHANGELOG                         |   9 +
 Makefile                          |  48 +++-
 src/heatflow.f90                  |  16 +-
 src/heatflow/mod_inputs.f90       | 198 ++++++++-------
 src/heatflow/mod_petsc_solver.f90 | 410 +++++++++++++++++-------------
 src/heatflow/mod_setup.f90        |  52 ++--
 6 files changed, 442 insertions(+), 291 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 794b181..35a47e5 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -12,6 +12,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 ### Security
 
+##[2.0.0] - 2026-03-26
+## Added
+-Add Petsc solver
+-Add new constant gradient boundary conditions
+-Add new compressed output flag
+
+
+
+
 ##[1.0.0] - 2025-07-28
 ## Added
 -Additional boundary conditions
diff --git a/Makefile b/Makefile
index 6a0f200..d7715eb 100644
--- a/Makefile
+++ b/Makefile
@@ -9,8 +9,17 @@ SRC_DIR      := ./src
 BUILD_DIR    := ./obj
 BIN_DIR      := ./bin
 
-# Compiler (gfortran with OpenMP for threading)
-FC           := gfortran
+# Compiler (prefer system MPI wrapper for PETSc builds, allow user override)
+SYSTEM_PATH := PATH=/usr/bin:/bin
+ifeq ($(origin FC), default)
+ifneq ($(wildcard /usr/bin/mpifort),)
+FC := env $(SYSTEM_PATH) /usr/bin/mpifort
+else ifneq ($(wildcard /usr/bin/gfortran),)
+FC := env $(SYSTEM_PATH) /usr/bin/gfortran
+else
+FC := gfortran
+endif
+endif
 
 # Core count
 NCORES       := $(shell nproc)
@@ -19,7 +28,11 @@ NCORES       := $(shell nproc)
 CONDA_PREFIX ?= $(shell conda info --base 2>/dev/null || echo /home/hm556/miniforge3)
 
 # PETSc (discover dynamically when possible)
-PKG_CONFIG ?= pkg-config
+ifneq ($(wildcard /usr/bin/pkg-config),)
+PKG_CONFIG := env $(SYSTEM_PATH) /usr/bin/pkg-config
+else
+PKG_CONFIG := pkg-config
+endif
 PETSC_PKG_CFLAGS := $(shell $(PKG_CONFIG) --cflags petsc 2>/dev/null || $(PKG_CONFIG) --cflags PETSc 2>/dev/null)
 PETSC_PKG_LIBS   := $(shell $(PKG_CONFIG) --libs petsc 2>/dev/null || $(PKG_CONFIG) --libs PETSc 2>/dev/null)
 
@@ -47,8 +60,22 @@ PETSC_LIB  := -L/usr/lib/petscdir/petsc3.15/x86_64-linux-gnu-real/lib -lpetsc -W
 PETSC_NOTE := (legacy PETSc 3.15 fallback)
 endif
 
-# Use OpenBLAS for multi-threaded BLAS/LAPACK (better than reference BLAS/ATLAS)
-BLAS_FLAGS := -lopenblas -lgomp -lpthread -lm
+# BLAS/LAPACK backend
+OPENBLAS_LIBS := $(shell $(PKG_CONFIG) --libs openblas 2>/dev/null)
+CONDA_BLAS_LIB := $(firstword $(wildcard $(CONDA_PREFIX)/lib/libblas.so.3 $(CONDA_PREFIX)/lib/libblas.so))
+CONDA_LAPACK_LIB := $(firstword $(wildcard $(CONDA_PREFIX)/lib/liblapack.so.3 $(CONDA_PREFIX)/lib/liblapack.so))
+ifeq ($(strip $(OPENBLAS_LIBS)),)
+ifneq ($(strip $(CONDA_BLAS_LIB)$(CONDA_LAPACK_LIB)),)
+BLAS_FLAGS := -Wl,--disable-new-dtags -Wl,-rpath,$(CONDA_PREFIX)/lib -Wl,--no-as-needed $(CONDA_BLAS_LIB) $(CONDA_LAPACK_LIB) -Wl,--as-needed -lpthread -lm
+BLAS_NOTE := (OpenBLAS from CONDA_PREFIX)
+else
+BLAS_FLAGS := -lblas -llapack -lpthread -lm
+BLAS_NOTE := (system BLAS/LAPACK fallback)
+endif
+else
+BLAS_FLAGS := $(OPENBLAS_LIBS) -lpthread -lm
+BLAS_NOTE := (OpenBLAS via pkg-config)
+endif
 
 # Flags
 OPTFLAGS    := -O3
@@ -86,12 +113,14 @@ SRCS := \
 
 OBJS := $(addprefix $(BUILD_DIR)/,$(notdir $(SRCS:.f90=.o)))
 
+.NOTPARALLEL:
+
 .PHONY: all debug clean distclean run help show
 
 all: show $(TARGET)
 
 show:
-	@printf 'Building %s %s\n' '$(NAME)' '$(PETSC_NOTE)'
+	@printf 'Building %s %s %s\n' '$(NAME)' '$(PETSC_NOTE)' '$(BLAS_NOTE)'
 
 $(BIN_DIR) $(BUILD_DIR):
 	mkdir -p $@
@@ -112,8 +141,9 @@ debug: FFLAGS = $(DEBUGFLAGS)
 debug: clean show $(TARGET)
 
 run: $(TARGET)
-	OMP_NUM_THREADS=$(NCORES) \
-	OPENBLAS_NUM_THREADS=$(NCORES) \
+	mpiexec -n $(NCORES) \
+	OMP_NUM_THREADS=1 \
+	OPENBLAS_NUM_THREADS=1 \
 	OMP_PROC_BIND=spread \
 	OMP_PLACES=cores \
 	$< $(RUN_ARGS)
@@ -130,7 +160,7 @@ help:
 	@echo "Targets:"
 	@echo "  make / make all    - build optimized"
 	@echo "  make debug         - debug build"
-	@echo "  make run           - run with all cores"
+	@echo "  make run           - run distributed across all cores via MPI"
 	@echo "  make clean         - remove objects/modules"
 	@echo "  make distclean     - remove executable"
 	@echo "Variables:"
diff --git a/src/heatflow.f90 b/src/heatflow.f90
index 6817c67..cc16531 100644
--- a/src/heatflow.f90
+++ b/src/heatflow.f90
@@ -28,7 +28,7 @@ program HEATFLOW_V0_3
   use evolution, only: simulate
   use setup, only: set_global_variables
   use INITIAL, only: initial_evolve
-  use petsc_solver, only: petsc_init, petsc_finalize
+   use petsc_solver, only: petsc_init, petsc_finalize, petsc_is_root
 
   implicit none
    real(real12) :: cpustart, cpuend, cpustart2, progress
@@ -47,7 +47,7 @@ program HEATFLOW_V0_3
    !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^!
 
    ! give feedback to user that code has begun
-   write(*,*) 'Setup initialising' 
+   if (petsc_is_root()) write(*,*) 'Setup initialising'
    
    !-------------------------------------------------------------!
    ! Read parameters from input file and set global variables ...!
@@ -66,7 +66,7 @@ program HEATFLOW_V0_3
 
 
    ! give feedback to user that main simulation is begining
-   write(*,*) 'Setup complete, running simulation' 
+   if (petsc_is_root()) write(*,*) 'Setup complete, running simulation'
 
    !-------------------------------------------------------------!
    ! run simulation for 'ntime' time steps                       !
@@ -74,7 +74,7 @@ program HEATFLOW_V0_3
 
    do itime=1,ntime 
 
-      if (iverb.eq.0) then
+      if (petsc_is_root() .and. iverb.eq.0) then
          if (Lpercentage) then 
             progress = real(itime)/real(ntime)*100.0
             write(*,'(A,A,F12.4,A)', advance = 'no') achar(13)&
@@ -93,8 +93,8 @@ program HEATFLOW_V0_3
                                                 
                              
       ! Write results                           
-      CALL data_write(itime) 
-      if (IVERB.ge.3) CALL final_print                           
+      if (petsc_is_root()) CALL data_write(itime)
+      if (petsc_is_root() .and. IVERB.ge.3) CALL final_print
                                                                  
    end do  
    CALL petsc_finalize()
@@ -105,11 +105,11 @@ program HEATFLOW_V0_3
    ! calculate end time and print to user                        !
    !-------------------------------------------------------------!
    CALL cpu_time(cpuend)
-   write(*,'(A,F12.6)') ' time=', cpuend-cpustart                 
+   if (petsc_is_root()) write(*,'(A,F12.6)') ' time=', cpuend-cpustart
    !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^!
 
    ! give feedback to user that code has ended
-   write(*,*) 'all done'
+   if (petsc_is_root()) write(*,*) 'all done'
 
 end program HEATFLOW_V0_3
 
diff --git a/src/heatflow/mod_inputs.f90 b/src/heatflow/mod_inputs.f90
index d26f629..7f1fd9b 100644
--- a/src/heatflow/mod_inputs.f90
+++ b/src/heatflow/mod_inputs.f90
@@ -75,6 +75,7 @@
 module inputs
   use constants, only: real12, int12
   use constructions, only: heatblock, material
+  use mpi
   implicit none
 
   integer :: unit, newunit
@@ -367,18 +368,24 @@ end subroutine read_param
 !!!#################################################################################################
   subroutine check_param(readvar,n)
     implicit none
-    integer::n,i
+     integer::n,i,ierr,comm_rank
     integer,dimension(n)::readvar
+     logical :: is_root
     ! Not currently in use
 
+     call MPI_Comm_rank(MPI_COMM_WORLD, comm_rank, ierr)
+     is_root = (comm_rank == 0)
+
     if(any(readvar.gt.1)) then
-       write(6,*)
-       write(6,'(A43)') '###############################'
-       write(6,'(A43)') '##########   ERROR   ##########'
-       write(6,'(A43)') '###############################'
-       write(6,*)
-       write(6,'(A)') ' ---       Error in subroutine "check_param"      ---'
-       write(6,'(A)') ' --- ERROR: same KEYWORD apears more than once    ---'
+       if (is_root) then
+         write(6,*)
+         write(6,'(A43)') '###############################'
+         write(6,'(A43)') '##########   ERROR   ##########'
+         write(6,'(A43)') '###############################'
+         write(6,*)
+         write(6,'(A)') ' ---       Error in subroutine "check_param"      ---'
+         write(6,'(A)') ' --- ERROR: same KEYWORD apears more than once    ---'
+       end if
        stop
     end if
 
@@ -389,13 +396,15 @@ subroutine check_param(readvar,n)
        !------------------------------------------------------------------------------------
        ErrKB:if (((any(readvar(9:11).eq.0)) .or. any(readvar(29:31).eq.0)) &
             .and. (readvar(28) .eq. 0) )then
-          write(6,*)
-          write(6,'(A43)') '###############################'
-          write(6,'(A43)') '##########   ERORR   ##########'
-          write(6,'(A43)') '###############################'
-          write(6,*)
-          write(6,'(A)')   ' ---            Error in subroutine "check_param"            ---'
-          write(6,'(A)')   ' --- ERROR: KappaBoundx,y,z and KappaBound are not set       ---'
+         if (is_root) then
+           write(6,*)
+           write(6,'(A43)') '###############################'
+           write(6,'(A43)') '##########   ERORR   ##########'
+           write(6,'(A43)') '###############################'
+           write(6,*)
+           write(6,'(A)')   ' ---            Error in subroutine "check_param"            ---'
+           write(6,'(A)')   ' --- ERROR: KappaBoundx,y,z and KappaBound are not set       ---'
+         end if
           stop
        end if ErrKB
        !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -405,13 +414,15 @@ subroutine check_param(readvar,n)
 
         WarKBO:if (((all(readvar(9:11).eq.1)) .or. all(readvar(29:31).eq.1)) &
             .and. (readvar(28) .eq. 0) )then
-          write(6,*)
-          write(6,'(A43)') '###############################'
-          write(6,'(A43)') '##########   Warning   ##########'
-          write(6,'(A43)') '###############################'
-          write(6,*)
-          write(6,'(A)')   ' ---            Warning in subroutine "check_param"            ---'
-          write(6,'(A)')   ' --- Warning:  KappaBound is not set       ---'
+          if (is_root) then
+             write(6,*)
+             write(6,'(A43)') '###############################'
+             write(6,'(A43)') '##########   Warning   ##########'
+             write(6,'(A43)') '###############################'
+             write(6,*)
+             write(6,'(A)')   ' ---            Warning in subroutine "check_param"            ---'
+             write(6,'(A)')   ' --- Warning:  KappaBound is not set       ---'
+          end if
           readvar(28) = 1
           
        end if WarKBO
@@ -421,13 +432,15 @@ subroutine check_param(readvar,n)
        !------------------------------------------------------------------------------------
        WarKB:if (((any(readvar(9:11).eq.0)) .or. any(readvar(29:31).eq.0)) &
             .and. (readvar(28) .eq. 1) )then
-          write(6,*)
-          write(6,'(A43)') '###############################'
-          write(6,'(A43)') '##########  WARNING  ##########'
-          write(6,'(A43)') '###############################'
-          write(6,*)
-          write(6,'(A)')   ' ---           Warning in subroutine "check_param"           ---'
-          write(6,'(A)')   ' --- WARNING: KappaBoundx,y,z not set, using KappaBound      ---'
+         if (is_root) then
+           write(6,*)
+           write(6,'(A43)') '###############################'
+           write(6,'(A43)') '##########  WARNING  ##########'
+           write(6,'(A43)') '###############################'
+           write(6,*)
+           write(6,'(A)')   ' ---           Warning in subroutine "check_param"           ---'
+           write(6,'(A)')   ' --- WARNING: KappaBoundx,y,z not set, using KappaBound      ---'
+         end if
           kappaBoundx1 = KappaBound
           kappaBoundy1 = KappaBound
           kappaBoundz1 = KappaBound
@@ -438,39 +451,45 @@ subroutine check_param(readvar,n)
        !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
     elseif (((any(readvar(9:11).eq.0)) .or. any(readvar(29:31).eq.0)) &
          .or. (readvar(28) .gt. 0) ) then
-       write(6,*)
-       write(6,'(A43)') '###############################'
-       write(6,'(A43)') '##########  WARNING  ##########'
-       write(6,'(A43)') '###############################'
-       write(6,*)
-       write(6,'(A)')   ' ---            Warning in subroutine "check_param"             ---'
-       write(6,'(A)')   ' --- WARNING: Periodic Boundry set, set Kappa are ignored       ---'
+       if (is_root) then
+         write(6,*)
+         write(6,'(A43)') '###############################'
+         write(6,'(A43)') '##########  WARNING  ##########'
+         write(6,'(A43)') '###############################'
+         write(6,*)
+         write(6,'(A)')   ' ---            Warning in subroutine "check_param"             ---'
+         write(6,'(A)')   ' --- WARNING: Periodic Boundry set, set Kappa are ignored       ---'
+       end if
     end if PB
 
     !------------------------------------------------------------------------------------
     ! warning about missing bath temps. reassine to T_Bath
     !------------------------------------------------------------------------------------
     if ((readvar(42) .eq. 1) .and. (T_BathCG .gt. 0)) then
-      write(6,*)
-      write(6,'(A43)') '###############################'
-      write(6,'(A43)') '##########  WARNING  ##########'
-      write(6,'(A43)') '###############################'
-      write(6,*)
-      write(6,'(A)')   ' ---            Warning in subroutine "check_param"             ---'
-      write(6,'(A)')   ' --- WARNING: T_BathCG set T_Bath/ T_Bath x,y,z will not be used          ---'
+      if (is_root) then
+         write(6,*)
+         write(6,'(A43)') '###############################'
+         write(6,'(A43)') '##########  WARNING  ##########'
+         write(6,'(A43)') '###############################'
+         write(6,*)
+         write(6,'(A)')   ' ---            Warning in subroutine "check_param"             ---'
+         write(6,'(A)')   ' --- WARNING: T_BathCG set T_Bath/ T_Bath x,y,z will not be used          ---'
+      end if
       !set all T_Bath value checks to 1
       readvar(20:25) = 1 
       readvar(27) = 1
 
     end if
     WarBath:if ( any(readvar(20:25).eq.0) ) then
-       write(6,*)
-       write(6,'(A43)') '###############################'
-       write(6,'(A43)') '##########  WARNING  ##########'
-       write(6,'(A43)') '###############################'
-       write(6,*)
-       write(6,'(A)')   ' ---            Warning in subroutine "check_param"             ---'
-       write(6,'(A)')   ' --- WARNING: T_Bath x,y,z not set T_Bath will be used          ---'
+       if (is_root) then
+         write(6,*)
+         write(6,'(A43)') '###############################'
+         write(6,'(A43)') '##########  WARNING  ##########'
+         write(6,'(A43)') '###############################'
+         write(6,*)
+         write(6,'(A)')   ' ---            Warning in subroutine "check_param"             ---'
+         write(6,'(A)')   ' --- WARNING: T_Bath x,y,z not set T_Bath will be used          ---'
+       end if
        T_Bathx1 = T_Bath
        T_Bathx2 = T_Bath
        T_Bathy1 = T_Bath
@@ -481,13 +500,15 @@ subroutine check_param(readvar,n)
     
     !Check if T_BathCG less than 0
     if ((readvar(42) .eq. 1) .and. (T_BathCG .lt. 0.0_real12)) then
-      write(6,*)
-      write(6,'(A43)') '###############################'
-      write(6,'(A43)') '##########  WARNING  ##########'
-      write(6,'(A43)') '###############################'
-      write(6,*)
-      write(6,'(A)')   ' ---            Warning in subroutine "check_param"             ---'
-      write(6,'(A)')   ' --- WARNING: T_BathCG is negative, are you sure you?          ---'
+      if (is_root) then
+         write(6,*)
+         write(6,'(A43)') '###############################'
+         write(6,'(A43)') '##########  WARNING  ##########'
+         write(6,'(A43)') '###############################'
+         write(6,*)
+         write(6,'(A)')   ' ---            Warning in subroutine "check_param"             ---'
+         write(6,'(A)')   ' --- WARNING: T_BathCG is negative, are you sure you?          ---'
+      end if
     end if
     if (readvar(42) .eq. 0) then
       T_BathCG = 0
@@ -500,25 +521,29 @@ subroutine check_param(readvar,n)
     ! Further warnings
     !------------------------------------------------------------------------------------
     if (any(readvar(32:37).eq.0)) then
-       write(6,*)
-       write(6,'(A43)') '###############################'
-       write(6,'(A43)') '##########  WARNING  ##########'
-       write(6,'(A43)') '###############################'
-       write(6,*)
-       write(6,'(A)') ' ---       WARNING in subroutine "check_param"       ---'
-       write(6,'(A)') ' --- WARNING: Some or All output write cells paramters are not defined ---'
-       write(6,*) ' --- USING: ', 'Start_ix = ', start_ix, ', end_ix = ', end_ix, ', start_iy = ', &
+       if (is_root) then
+         write(6,*)
+         write(6,'(A43)') '###############################'
+         write(6,'(A43)') '##########  WARNING  ##########'
+         write(6,'(A43)') '###############################'
+         write(6,*)
+         write(6,'(A)') ' ---       WARNING in subroutine "check_param"       ---'
+         write(6,'(A)') ' --- WARNING: Some or All output write cells paramters are not defined ---'
+         write(6,*) ' --- USING: ', 'Start_ix = ', start_ix, ', end_ix = ', end_ix, ', start_iy = ', &
             start_iy,', end_iy = ', end_iy, ', start_iz = ', start_iz, ', end_iz = ', end_iz
+       end if
     end if
 
     if (readvar(39) .eq. 0) then
-       write(6,*)
-       write(6,'(A43)') '###############################'
-       write(6,'(A43)') '##########  WARNING  ##########'
-       write(6,'(A43)') '###############################'
-       write(6,*)
-       write(6,'(A)') ' ---       WARNING in subroutine "check_param"       ---'
-       write(6,'(A)') ' --- WARNING: TempDepProp not set, no action needed ---'
+       if (is_root) then
+         write(6,*)
+         write(6,'(A43)') '###############################'
+         write(6,'(A43)') '##########  WARNING  ##########'
+         write(6,'(A43)') '###############################'
+         write(6,*)
+         write(6,'(A)') ' ---       WARNING in subroutine "check_param"       ---'
+         write(6,'(A)') ' --- WARNING: TempDepProp not set, no action needed ---'
+       end if
        readvar(39) = 1
     end if
 
@@ -527,20 +552,21 @@ subroutine check_param(readvar,n)
     end if
 
     if (any(readvar.eq.0)) then
-       write(6,*)
-       write(6,'(A43)') '###############################'
-       write(6,'(A43)') '##########  WARNING  ##########'
-       write(6,'(A43)') '###############################'
-       write(6,*)
-       write(6,'(A)') ' ---       WARNING in subroutine "check_param"       ---'
-       write(6,'(A)') ' --- WARNING: Essential parameters missing    ---'
-       ! Print all indices of readvar that are 0
-       do i = 1, size(readvar)
-          if (readvar(i) == 0) then
+       if (is_root) then
+         write(6,*)
+         write(6,'(A43)') '###############################'
+         write(6,'(A43)') '##########  WARNING  ##########'
+         write(6,'(A43)') '###############################'
+         write(6,*)
+         write(6,'(A)') ' ---       WARNING in subroutine "check_param"       ---'
+         write(6,'(A)') ' --- WARNING: Essential parameters missing    ---'
+         do i = 1, size(readvar)
+           if (readvar(i) == 0) then
              write(6, '(A,I3)') 'Index ', i, ' of readvar is 0'
-          end if
-       end do
-       write(6,*)
+           end if
+         end do
+         write(6,*)
+       end if
     end if
     !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -548,7 +574,7 @@ subroutine check_param(readvar,n)
     !------------------------------------------------------------------------------------
     ! verbos to check for errors
     !------------------------------------------------------------------------------------
-    if(IVERB .ge. 1) then
+    if(is_root .and. IVERB .ge. 1) then
        write(6,'(A)')           ' vebose printing option'
        write(6,'(A)')           ' running calculation with :'
        write(6,'(A35,I6)')      '   IVERB             = ', IVERB
diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90
index a65e01d..be8871c 100644
--- a/src/heatflow/mod_petsc_solver.f90
+++ b/src/heatflow/mod_petsc_solver.f90
@@ -2,17 +2,17 @@ module petsc_solver
 #include "petsc/finclude/petscsys.h"
 #include "petsc/finclude/petscksp.h"
   use petscksp
-  use iso_c_binding
+  use mpi
   implicit none
   private
-  public :: petsc_init, petsc_finalize, solve_petsc_csr, petsc_cleanup
+  public :: petsc_init, petsc_finalize, solve_petsc_csr, petsc_cleanup, petsc_is_root, petsc_world_size
 
   ! ===== PRECONDITIONER SELECTION =====
   ! Change this to switch between preconditioners:
   ! 'GAMG' = Algebraic Multigrid (best for elliptic PDEs, 10-20x faster)
   ! 'ILU'  = Incomplete LU (good general purpose, robust)
   ! 'LU'   = Direct solver (most robust, uses more memory)
-  character(len=10), parameter :: PRECONDITIONER = 'ILU'  ! <-- Change here!
+  character(len=10), parameter :: PRECONDITIONER = 'GAMG'
   ! ====================================
 
   ! Persistent PETSc objects (reused across timesteps for memory efficiency)
@@ -22,14 +22,33 @@ module petsc_solver
   KSP, save :: ksp_saved = PETSC_NULL_KSP
   logical, save :: initialized = .false.
   integer, save :: n_saved = 0
+  integer, save :: comm_rank_saved = 0
+  integer, save :: comm_size_saved = 1
+  integer, save :: row_start_saved = 1
+  integer, save :: row_end_saved = 0
+  PetscInt, allocatable, save :: ia_saved(:), ja_saved(:)
+  PetscScalar, allocatable, save :: aval_saved(:)
+  PetscInt, allocatable, save :: diag_nnz_saved(:), offdiag_nnz_saved(:)
+  integer, allocatable, save :: recvcounts_saved(:), displs_saved(:)
+  real(8), allocatable, save :: b_local_saved(:), x_local_saved(:)
 
 contains
 
   subroutine petsc_init()
     integer :: ierr
     call PetscInitialize(PETSC_NULL_CHARACTER, ierr)
+    call MPI_Comm_rank(PETSC_COMM_WORLD, comm_rank_saved, ierr)
+    call MPI_Comm_size(PETSC_COMM_WORLD, comm_size_saved, ierr)
   end subroutine petsc_init
 
+  logical function petsc_is_root()
+    petsc_is_root = (comm_rank_saved == 0)
+  end function petsc_is_root
+
+  integer function petsc_world_size()
+    petsc_world_size = comm_size_saved
+  end function petsc_world_size
+
   subroutine petsc_finalize()
     integer :: ierr
     call petsc_cleanup()
@@ -37,226 +56,273 @@ subroutine petsc_finalize()
   end subroutine petsc_finalize
 
   subroutine petsc_cleanup()
-    ! Clean up persistent PETSc objects
     integer :: ierr
+
     if (A_saved /= PETSC_NULL_MAT) call MatDestroy(A_saved, ierr)
     if (bb_saved /= PETSC_NULL_VEC) call VecDestroy(bb_saved, ierr)
     if (xx_saved /= PETSC_NULL_VEC) call VecDestroy(xx_saved, ierr)
     if (ksp_saved /= PETSC_NULL_KSP) call KSPDestroy(ksp_saved, ierr)
+    if (allocated(ia_saved)) deallocate(ia_saved)
+    if (allocated(ja_saved)) deallocate(ja_saved)
+    if (allocated(aval_saved)) deallocate(aval_saved)
+    if (allocated(diag_nnz_saved)) deallocate(diag_nnz_saved)
+    if (allocated(offdiag_nnz_saved)) deallocate(offdiag_nnz_saved)
+    if (allocated(recvcounts_saved)) deallocate(recvcounts_saved)
+    if (allocated(displs_saved)) deallocate(displs_saved)
+    if (allocated(b_local_saved)) deallocate(b_local_saved)
+    if (allocated(x_local_saved)) deallocate(x_local_saved)
+
     A_saved = PETSC_NULL_MAT
     bb_saved = PETSC_NULL_VEC
     xx_saved = PETSC_NULL_VEC
     ksp_saved = PETSC_NULL_KSP
     initialized = .false.
     n_saved = 0
+    row_start_saved = 1
+    row_end_saved = 0
   end subroutine petsc_cleanup
 
+  subroutine compute_partition(n, rank, nproc, row_start, row_end)
+    integer, intent(in) :: n, rank, nproc
+    integer, intent(out) :: row_start, row_end
+    integer :: base_rows, remainder_rows, local_rows
+
+    base_rows = n / nproc
+    remainder_rows = mod(n, nproc)
+    local_rows = base_rows
+    if (rank < remainder_rows) local_rows = local_rows + 1
+
+    row_start = rank * base_rows + min(rank, remainder_rows) + 1
+    row_end = row_start + local_rows - 1
+  end subroutine compute_partition
+
+  subroutine preallocate_local_rows(n, ia, ja)
+    integer, intent(in) :: n
+    integer, intent(in) :: ia(:), ja(:)
+    integer :: local_row, global_row, entry_idx, nlocal
+    integer :: diag_begin, diag_end
+
+    nlocal = max(0, row_end_saved - row_start_saved + 1)
+    allocate(diag_nnz_saved(nlocal), offdiag_nnz_saved(nlocal))
+    diag_nnz_saved = 0
+    offdiag_nnz_saved = 0
+
+    diag_begin = row_start_saved
+    diag_end = row_end_saved
+    do local_row = 1, nlocal
+      global_row = row_start_saved + local_row - 1
+      do entry_idx = ia(global_row), ia(global_row + 1) - 1
+        if (ja(entry_idx) >= diag_begin .and. ja(entry_idx) <= diag_end) then
+          diag_nnz_saved(local_row) = diag_nnz_saved(local_row) + 1
+        else
+          offdiag_nnz_saved(local_row) = offdiag_nnz_saved(local_row) + 1
+        end if
+      end do
+    end do
+  end subroutine preallocate_local_rows
+
+  subroutine build_distributed_matrix(n, ia, ja, aval)
+    integer, intent(in) :: n
+    integer, intent(in) :: ia(:), ja(:)
+    real(8), intent(in) :: aval(:)
+
+    PetscInt :: row_idx(1)
+    PetscInt, allocatable :: cols0(:)
+    PetscScalar, allocatable :: vals0(:)
+    integer :: ierr, global_row, local_row, nlocal, row_nnz, max_row_nnz
+
+    call preallocate_local_rows(n, ia, ja)
+    nlocal = max(0, row_end_saved - row_start_saved + 1)
+    max_row_nnz = max(1, maxval(ia(2:n + 1) - ia(1:n)))
+
+    call MatCreate(PETSC_COMM_WORLD, A_saved, ierr)
+    call MatSetSizes(A_saved, nlocal, nlocal, n, n, ierr)
+    call MatSetType(A_saved, MATAIJ, ierr)
+    call MatSeqAIJSetPreallocation(A_saved, 0, diag_nnz_saved, ierr)
+    call MatMPIAIJSetPreallocation(A_saved, 0, diag_nnz_saved, 0, offdiag_nnz_saved, ierr)
+
+    allocate(cols0(max_row_nnz), vals0(max_row_nnz))
+    do local_row = 1, nlocal
+      global_row = row_start_saved + local_row - 1
+      row_nnz = ia(global_row + 1) - ia(global_row)
+      if (row_nnz <= 0) cycle
+
+      row_idx(1) = global_row - 1
+      cols0(1:row_nnz) = ja(ia(global_row):ia(global_row + 1) - 1) - 1
+      vals0(1:row_nnz) = aval(ia(global_row):ia(global_row + 1) - 1)
+      call MatSetValues(A_saved, 1, row_idx, row_nnz, cols0, vals0, INSERT_VALUES, ierr)
+    end do
+    deallocate(cols0, vals0)
+
+    call MatAssemblyBegin(A_saved, MAT_FINAL_ASSEMBLY, ierr)
+    call MatAssemblyEnd(A_saved, MAT_FINAL_ASSEMBLY, ierr)
+  end subroutine build_distributed_matrix
+
+  subroutine update_distributed_matrix(n, ia, ja, aval)
+    integer, intent(in) :: n
+    integer, intent(in) :: ia(:), ja(:)
+    real(8), intent(in) :: aval(:)
+
+    PetscInt :: row_idx(1)
+    PetscInt, allocatable :: cols0(:)
+    PetscScalar, allocatable :: vals0(:)
+    integer :: ierr, global_row, local_row, nlocal, row_nnz, max_row_nnz
+
+    nlocal = max(0, row_end_saved - row_start_saved + 1)
+    max_row_nnz = max(1, maxval(ia(2:n + 1) - ia(1:n)))
+    allocate(cols0(max_row_nnz), vals0(max_row_nnz))
+
+    call MatZeroEntries(A_saved, ierr)
+    do local_row = 1, nlocal
+      global_row = row_start_saved + local_row - 1
+      row_nnz = ia(global_row + 1) - ia(global_row)
+      if (row_nnz <= 0) cycle
+
+      row_idx(1) = global_row - 1
+      cols0(1:row_nnz) = ja(ia(global_row):ia(global_row + 1) - 1) - 1
+      vals0(1:row_nnz) = aval(ia(global_row):ia(global_row + 1) - 1)
+      call MatSetValues(A_saved, 1, row_idx, row_nnz, cols0, vals0, INSERT_VALUES, ierr)
+    end do
+    deallocate(cols0, vals0)
+
+    call MatAssemblyBegin(A_saved, MAT_FINAL_ASSEMBLY, ierr)
+    call MatAssemblyEnd(A_saved, MAT_FINAL_ASSEMBLY, ierr)
+  end subroutine update_distributed_matrix
+
   subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
-    integer,  intent(in) :: n
-    integer,  intent(in) :: ia(:), ja(:)
-    real(8),  intent(in) :: aval(:), b(:)
-    real(8),  intent(inout) :: x(:)
-    real(8),  intent(in) :: rtol
-    integer,  intent(in) :: maxit
-    
-    PC  :: pc
-    integer :: ierr, i, row_nz, start_k, its
-    integer, allocatable :: cols0(:), idx(:), d_nnz(:)
-    PetscInt :: idx_array(1)
-    PetscScalar :: val_array(1)
-    real(8), allocatable :: vals(:)
+    integer, intent(in) :: n
+    integer, intent(in) :: ia(:), ja(:)
+    real(8), intent(in) :: aval(:), b(:)
+    real(8), intent(inout) :: x(:)
+    real(8), intent(in) :: rtol
+    integer, intent(in) :: maxit
+
+    PC :: pc
+    integer :: ierr, its
     real(8) :: rnorm
     logical :: rebuild_needed
+    PetscScalar, pointer :: vec_array(:)
+    integer :: nlocal, rank_idx
 
-    ! write(*,'(A,I0)') ' [DEBUG] Entered solve_petsc_csr, n=', n
-    ! call flush(6)
-    
-    if (size(ia) /= n+1) stop 'solve_petsc_csr: ia size mismatch'
+    if (size(ia) /= n + 1) stop 'solve_petsc_csr: ia size mismatch'
     if (size(b) /= n .or. size(x) /= n) stop 'solve_petsc_csr: vector size mismatch'
 
-    ! write(*,'(A)') ' [DEBUG] Size checks passed'
-    ! call flush(6)
-
-    ! Determine if we need to rebuild the matrix structure
     rebuild_needed = .false.
     if (.not. initialized) rebuild_needed = .true.
     if (n /= n_saved) rebuild_needed = .true.
-    
-    ! write(*,'(A,L1)') ' [DEBUG] rebuild_needed=', rebuild_needed
-    ! call flush(6)
-    
-    ! Create PETSc objects on first call or if size changed
+
+    if (.not. rebuild_needed .and. allocated(ia_saved)) then
+      if (size(ia_saved) /= size(ia) .or. size(ja_saved) /= size(ja) .or. size(aval_saved) /= size(aval)) then
+        rebuild_needed = .true.
+      else if (.not. all(ia_saved == ia - 1) .or. .not. all(ja_saved == ja - 1)) then
+        rebuild_needed = .true.
+      end if
+    end if
+
     if (rebuild_needed) then
-      ! write(*,'(A)') ' [DEBUG] Starting PETSc object creation...'
-      ! call flush(6)
-      
-      ! Clean up old objects if they exist
       if (initialized) call petsc_cleanup()
-      
-      ! write(*,'(A)') ' [DEBUG] Preallocating matrix...'
-      ! call flush(6)
-      
-      ! Preallocate matrix with exact nonzeros per row (saves memory)
-      allocate(d_nnz(n))
-      do i = 1, n
-        d_nnz(i) = ia(i+1) - ia(i)
+
+      call compute_partition(n, comm_rank_saved, comm_size_saved, row_start_saved, row_end_saved)
+      nlocal = max(0, row_end_saved - row_start_saved + 1)
+
+      allocate(ia_saved(size(ia)), ja_saved(size(ja)), aval_saved(size(aval)))
+      ia_saved = ia - 1
+      ja_saved = ja - 1
+      aval_saved = aval
+
+      call build_distributed_matrix(n, ia, ja, aval)
+
+      allocate(recvcounts_saved(comm_size_saved), displs_saved(comm_size_saved))
+      do rank_idx = 0, comm_size_saved - 1
+        call compute_partition(n, rank_idx, comm_size_saved, ierr, its)
+        recvcounts_saved(rank_idx + 1) = max(0, its - ierr + 1)
+        displs_saved(rank_idx + 1) = ierr - 1
       end do
-      
-      ! write(*,'(A,I0,A,I0)') ' [DEBUG] Creating matrix: n=', n, ', max_nnz/row=', maxval(d_nnz)
-      ! call flush(6)
-      
-      ! Create matrix with exact preallocation (most memory-efficient)
-      call MatCreateSeqAIJ(PETSC_COMM_SELF, n, n, 0, d_nnz, A_saved, ierr)
-      if (ierr /= 0) then
-        write(0,*) "ERROR: MatCreateSeqAIJ failed with ierr=", ierr
-        write(0,*) "  Matrix size may exceed system limits"
-        write(0,*) "  n=", n, ", nnz=", sum(int(d_nnz,8)), ", max_nnz/row=", maxval(d_nnz)
-        stop
-      end if
-      
-      deallocate(d_nnz)
-      
-      ! Create persistent vectors
-      call VecCreateSeq(PETSC_COMM_SELF, n, bb_saved, ierr)
-      call VecCreateSeq(PETSC_COMM_SELF, n, xx_saved, ierr)
-      
-      ! Create and configure KSP solver (persistent across timesteps)
-      call KSPCreate(PETSC_COMM_SELF, ksp_saved, ierr)
+
+      allocate(b_local_saved(max(1, nlocal)), x_local_saved(max(1, nlocal)))
+
+      call VecCreateMPI(PETSC_COMM_WORLD, nlocal, n, bb_saved, ierr)
+      call VecDuplicate(bb_saved, xx_saved, ierr)
+
+      call KSPCreate(PETSC_COMM_WORLD, ksp_saved, ierr)
       call KSPSetOperators(ksp_saved, A_saved, A_saved, ierr)
+      call KSPSetInitialGuessNonzero(ksp_saved, PETSC_TRUE, ierr)
       call KSPGetPC(ksp_saved, pc, ierr)
-      
-      ! Select preconditioner based on parameter at top of module
+
       select case (trim(PRECONDITIONER))
         case ('GAMG')
-          ! Algebraic Multigrid - Best for elliptic PDEs with varying coefficients
-          ! Optimal O(1) iterations, 10-20x faster than ILU for large problems
           call PCSetType(pc, PCGAMG, ierr)
-          call KSPSetType(ksp_saved, KSPGMRES, ierr)  ! GMRES works well with AMG
-          write(*,'(A)') ' [Solver] Using GAMG (Algebraic Multigrid) preconditioner with GMRES'
-          
+          call KSPSetType(ksp_saved, KSPGMRES, ierr)
+          if (petsc_is_root()) then
+            write(*,'(A,I0,A)') ' [Solver] Using GAMG (Algebraic Multigrid) with GMRES across ', &
+                 comm_size_saved, ' MPI ranks'
+          end if
+
         case ('ILU')
-          ! Incomplete LU - Good general purpose, robust
           call PCSetType(pc, PCILU, ierr)
-          call KSPSetType(ksp_saved, KSPBCGS, ierr)   ! BiCGSTAB works well with ILU
-          write(*,'(A)') ' [Solver] Using ILU preconditioner with BiCGSTAB'
-          
+          call KSPSetType(ksp_saved, KSPBCGS, ierr)
+          if (petsc_is_root()) then
+            write(*,'(A,I0,A)') ' [Solver] Using ILU preconditioner with BiCGSTAB across ', &
+                 comm_size_saved, ' MPI ranks'
+          end if
+
         case ('LU')
-          ! Direct LU - Most robust, more memory intensive
           call PCSetType(pc, PCLU, ierr)
-          call KSPSetType(ksp_saved, KSPPREONLY, ierr) ! Direct solve
-          write(*,'(A)') ' [Solver] Using direct LU solver'
-          
+          call KSPSetType(ksp_saved, KSPPREONLY, ierr)
+          if (petsc_is_root()) then
+            write(*,'(A,I0,A)') ' [Solver] Using direct LU solver across ', comm_size_saved, ' MPI ranks'
+          end if
+
         case default
-          write(*,'(A,A)') ' [Warning] Unknown preconditioner: ', trim(PRECONDITIONER)
-          write(*,'(A)') '           Defaulting to ILU'
+          if (petsc_is_root()) then
+            write(*,'(A,A)') ' [Warning] Unknown preconditioner: ', trim(PRECONDITIONER)
+            write(*,'(A)') '           Defaulting to ILU'
+          end if
           call PCSetType(pc, PCILU, ierr)
           call KSPSetType(ksp_saved, KSPBCGS, ierr)
       end select
-      
-      call KSPSetTolerances(ksp_saved, rtol, PETSC_DEFAULT_REAL, &
-                           PETSC_DEFAULT_REAL, maxit, ierr)
+
+      call KSPSetTolerances(ksp_saved, rtol, PETSC_DEFAULT_REAL, PETSC_DEFAULT_REAL, maxit, ierr)
       call KSPSetNormType(ksp_saved, KSP_NORM_UNPRECONDITIONED, ierr)
       call KSPSetFromOptions(ksp_saved, ierr)
-      
+
       initialized = .true.
       n_saved = n
+    else
+      if (any(aval_saved /= aval)) then
+        aval_saved = aval
+        call update_distributed_matrix(n, ia, ja, aval)
+      end if
     end if
 
-    ! Update matrix values (always needed each timestep)
-    call MatZeroEntries(A_saved, ierr)
-    do i = 1, n
-       row_nz = ia(i+1) - ia(i)
-       if (row_nz > 0) then
-          start_k = ia(i)
-          allocate(cols0(row_nz), vals(row_nz))
-          ! Convert column indices from 1-based to 0-based for PETSc
-          cols0 = ja(start_k:start_k+row_nz-1) - 1
-          vals  = aval(start_k:start_k+row_nz-1)
-          
-          ! Set row i-1 (0-based) with column indices cols0 (0-based)
-          call MatSetValues(A_saved, 1, (/i-1/), row_nz, cols0, vals, INSERT_VALUES, ierr)
-          deallocate(cols0, vals)
-       end if
-    end do
-    call MatAssemblyBegin(A_saved, MAT_FINAL_ASSEMBLY, ierr)
-    call MatAssemblyEnd(A_saved, MAT_FINAL_ASSEMBLY, ierr)
-    
-    ! Optional: Verify matrix assembly (uncomment for debugging)
-    ! call MatView(A_saved, PETSC_VIEWER_STDOUT_SELF, ierr)
-
-    ! Update RHS vector in batches to avoid memory issues with huge systems
-    block
-      integer, parameter :: VEC_CHUNK = 1000000
-      integer :: vec_start, vec_end, vec_len, k
-      integer, allocatable :: idx_vec(:)
-      
-      do vec_start = 1, n, VEC_CHUNK
-        vec_end = min(vec_start + VEC_CHUNK - 1, n)
-        vec_len = vec_end - vec_start + 1
-        
-        allocate(idx_vec(vec_len))
-        idx_vec = [(vec_start + k - 2, k=1,vec_len)]  ! 0-based indices
-        
-        call VecSetValues(bb_saved, vec_len, idx_vec, b(vec_start:vec_end), INSERT_VALUES, ierr)
-        deallocate(idx_vec)
-      end do
-    end block
-    call VecAssemblyBegin(bb_saved,ierr); call VecAssemblyEnd(bb_saved,ierr)
-
-    ! Update initial guess in batches
-    block
-      integer, parameter :: VEC_CHUNK = 1000000
-      integer :: vec_start, vec_end, vec_len, k
-      integer, allocatable :: idx_vec(:)
-      
-      do vec_start = 1, n, VEC_CHUNK
-        vec_end = min(vec_start + VEC_CHUNK - 1, n)
-        vec_len = vec_end - vec_start + 1
-        
-        allocate(idx_vec(vec_len))
-        idx_vec = [(vec_start + k - 2, k=1,vec_len)]  ! 0-based indices
-        
-        call VecSetValues(xx_saved, vec_len, idx_vec, x(vec_start:vec_end), INSERT_VALUES, ierr)
-        deallocate(idx_vec)
-      end do
-    end block
-    call VecAssemblyBegin(xx_saved,ierr); call VecAssemblyEnd(xx_saved,ierr)
+    nlocal = max(0, row_end_saved - row_start_saved + 1)
+    if (nlocal > 0) then
+      b_local_saved(1:nlocal) = b(row_start_saved:row_end_saved)
+      x_local_saved(1:nlocal) = x(row_start_saved:row_end_saved)
+    end if
+
+    call VecGetArrayF90(bb_saved, vec_array, ierr)
+    if (nlocal > 0) vec_array(1:nlocal) = b_local_saved(1:nlocal)
+    call VecRestoreArrayF90(bb_saved, vec_array, ierr)
+
+    call VecGetArrayF90(xx_saved, vec_array, ierr)
+    if (nlocal > 0) vec_array(1:nlocal) = x_local_saved(1:nlocal)
+    call VecRestoreArrayF90(xx_saved, vec_array, ierr)
 
-    ! Solve the system
     call KSPSolve(ksp_saved, bb_saved, xx_saved, ierr)
-    
     if (ierr /= 0) then
-       write(0,*) "ERROR: KSPSolve failed with error code:", ierr
-       stop
+      write(0,*) 'ERROR: KSPSolve failed with error code:', ierr
+      stop
     end if
-    
+
     call KSPGetIterationNumber(ksp_saved, its, ierr)
     call KSPGetResidualNorm(ksp_saved, rnorm, ierr)
 
-    ! Extract solution vector using batched VecGetValues
-    block
-      integer, parameter :: CHUNK_SIZE = 100000
-      PetscInt, allocatable :: idx_batch(:)
-      PetscScalar, allocatable :: val_batch(:)
-      integer :: i_start, i_end, chunk_len, j
-      
-      do i_start = 1, n, CHUNK_SIZE
-        i_end = min(i_start + CHUNK_SIZE - 1, n)
-        chunk_len = i_end - i_start + 1
-        
-        allocate(idx_batch(chunk_len), val_batch(chunk_len))
-        
-        do j = 1, chunk_len
-          idx_batch(j) = i_start + j - 2
-        end do
-        
-        call VecGetValues(xx_saved, chunk_len, idx_batch, val_batch, ierr)
-        x(i_start:i_end) = val_batch(1:chunk_len)
-        
-        deallocate(idx_batch, val_batch)
-      end do
-    end block
+    call VecGetArrayF90(xx_saved, vec_array, ierr)
+    if (nlocal > 0) x_local_saved(1:nlocal) = vec_array(1:nlocal)
+    call VecRestoreArrayF90(xx_saved, vec_array, ierr)
 
+    call MPI_Allgatherv(x_local_saved, nlocal, MPI_DOUBLE_PRECISION, x, recvcounts_saved, displs_saved, &
+         MPI_DOUBLE_PRECISION, PETSC_COMM_WORLD, ierr)
   end subroutine solve_petsc_csr
-
 end module petsc_solver
\ No newline at end of file
diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90
index 2b3ebe6..57cd2f7 100644
--- a/src/heatflow/mod_setup.f90
+++ b/src/heatflow/mod_setup.f90
@@ -16,6 +16,7 @@
 !!!#################################################################################################
 module setup
   use constants, only: real12, int12, TINY
+   use mpi
   use inputs, only: nx, ny, nz, NA, grid, time_step, kappaBoundx1, kappaBoundy1, kappaBoundz1 
   use inputs, only: Check_Sparse_Full, Check_Stability, ntime,IVERB, Periodicx, Periodicy
   use inputs, only: Periodicz ! 
@@ -36,9 +37,14 @@ module setup
 !!!#################################################################################################
    subroutine set_global_variables()
       integer(int12) :: ix,iy,iz,index
+      integer :: ierr, comm_rank
+      logical :: is_root
       real(real12) :: kappa,kappa3D,h_conv,heat_capacity,rho,sound_speed,tau, em
       real(real12), dimension(3) :: vel
 
+      call MPI_Comm_rank(MPI_COMM_WORLD, comm_rank, ierr)
+      is_root = (comm_rank == 0)
+
       allocate(Temp_cur(nx, ny, nz))
       allocate(Temp_p(NA))
       allocate(Temp_pp(NA))
@@ -52,12 +58,14 @@ subroutine set_global_variables()
       ! can be expanded to include more properties at a 
       ! later date
       !---------------------------------------------------
-      write(*,*) "Setting up material properties"
-      write(*,'(A,I10,A)') " Processing ", NA, " grid cells..."
+      if (is_root) then
+         write(*,*) "Setting up material properties"
+         write(*,'(A,I10,A)') " Processing ", NA, " grid cells..."
+      end if
       index = 0
       do iz = 1, nz
          ! Progress reporting every 10% for large grids
-         if (mod(iz-1, max(1,nz/10)) == 0 .and. iz > 1) then
+         if (is_root .and. mod(iz-1, max(1,nz/10)) == 0 .and. iz > 1) then
             write(*,'(A,I3,A)') "   Progress: ", int(100.0*real(iz)/real(nz)), "%"
          end if
          do iy = 1, ny
@@ -81,7 +89,7 @@ subroutine set_global_variables()
       !---------------------------------------------------
       ! Check if the sparse matrix matches the full matrix
       !---------------------------------------------------
-      write(*,*) "Building sparse H matrix..."
+      if (is_root) write(*,*) "Building sparse H matrix..."
       if (Check_Sparse_Full) then
          print*, "CHECK SPARSE FULL"
          CALL build_Hmatrix()
@@ -89,7 +97,7 @@ subroutine set_global_variables()
          ! Build CSR format directly (acsr, ja, ia are allocated inside sparse_Hmatrix)
          CALL sparse_Hmatrix()
          ! No need for COO->CSR conversion anymore, it's already in CSR format!
-         write(*,*) "Sparse matrix setup complete."
+         if (is_root) write(*,*) "Sparse matrix setup complete."
       end if
       !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -103,6 +111,8 @@ end subroutine set_global_variables
 !!!#################################################################################################
    subroutine sparse_Hmatrix()
      implicit none
+      integer :: ierr, comm_rank
+      logical :: is_root
       real(real12) :: H0 ! Holds the value of the H matrix
       integer(int12) :: i, j, count, k, row ! i and j are the row and column of the H matrix
       integer(int12) :: nnz_estimate
@@ -112,14 +122,12 @@ subroutine sparse_Hmatrix()
       real(real12), allocatable, dimension(:) :: row_vals
       integer(int12), allocatable, dimension(:) :: row_cols
       integer(int12) :: row_count, max_row_size
+
+      call MPI_Comm_rank(MPI_COMM_WORLD, comm_rank, ierr)
+      is_root = (comm_rank == 0)
       
       ra%n = NA ! The number of rows in the H matrix
-      ra%len = len ! The number of non-zero elements in the H matrix
-      ! Allocate the arrays to hold the H matrix in sparse storage
-      allocate(ra%val(len), ra%irow(len), ra%jcol(len))
-      ra%val(:)=0
-      ra%irow(:)=-2
-      ra%jcol(:)=-1
+      ra%len = 0
       addit = [1] ! The values to add to the row to get the column
       if (Periodicx) addit = [addit, (nx-1)]
       if (ny .gt. 1) addit = [addit, nx] ! Add the values to add to the row to get the column
@@ -127,6 +135,18 @@ subroutine sparse_Hmatrix()
       if (nz .gt. 1) addit = [addit, nx*ny]  ! Add the values to add to the row to get the column
       if ((Periodicz).and.(nz .gt. 1)) addit = [addit, (nz-1)*ny*nx]
 
+      max_row_size = 1 + 2*size(addit,1)
+      nnz_estimate = NA * max_row_size
+
+      if (allocated(acsr)) deallocate(acsr)
+      if (allocated(ja)) deallocate(ja)
+      if (allocated(ia)) deallocate(ia)
+      allocate(acsr(nnz_estimate), ja(nnz_estimate), ia(NA+1))
+      allocate(row_vals(max_row_size), row_cols(max_row_size))
+      acsr = 0.0_real12
+      ja = 0
+      ia = 0
+
       
       !write(6,*) NA, nx,ny,nz
       !write(6,*) addit,size(addit,1)
@@ -138,10 +158,10 @@ subroutine sparse_Hmatrix()
       ia(1) = 1 ! CSR row pointer (1-based for Fortran)
       
       ! Build CSR format row-by-row
-      write(*,'(A)') " Building CSR matrix row-by-row..."
+      if (is_root) write(*,'(A)') " Building CSR matrix row-by-row..."
       parent_loop: do row = 1, NA
          ! Progress reporting every 10%
-         if (mod(row-1, max(1,NA/10)) == 0 .and. row > 1) then
+         if (is_root .and. mod(row-1, max(1,NA/10)) == 0 .and. row > 1) then
             write(*,'(A,I3,A,I12,A)') "   Progress: ", int(100.0*real(row)/real(NA)), &
                  "%, nnz=", count, ""
          end if
@@ -200,12 +220,12 @@ subroutine sparse_Hmatrix()
       
       ! Trim arrays to actual size if we over-estimated
       if (count < size(acsr)) then
-         write(*,'(A,I12,A,I12)') " Trimming arrays from ", size(acsr), " to ", count
+         if (is_root) write(*,'(A,I12,A,I12)') " Trimming arrays from ", size(acsr), " to ", count
          call trim_csr_arrays(acsr, ja, count)
       end if
       
-      deallocate(row_vals, row_cols)
-      write(*,'(A,I12,A)') " CSR matrix built successfully. Actual nonzeros: ", count, ""
+      deallocate(addit, row_vals, row_cols)
+      if (is_root) write(*,'(A,I12,A)') " CSR matrix built successfully. Actual nonzeros: ", count, ""
    end subroutine sparse_Hmatrix
 !!!#################################################################################################
 

From 5fe5f036a1985576bb0ba25d7afad8ed0f55bd88 Mon Sep 17 00:00:00 2001
From: Ned Taylor <71959356+nedtaylor@users.noreply.github.com>
Date: Mon, 30 Mar 2026 11:57:46 +0100
Subject: [PATCH 28/28] Improve fpm compilation

---
 fpm.toml                          | 16 +++++++++++++--
 src/heatflow/mod_petsc_solver.f90 | 33 ++++++++++++++++++++-----------
 2 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/fpm.toml b/fpm.toml
index a1f0b5d..fcc5f3f 100644
--- a/fpm.toml
+++ b/fpm.toml
@@ -9,12 +9,24 @@ description = "A Fortran executable for modelling Cattaneo heat flow"
 [preprocess.cpp]
 suffixes = ["F90", "f90"]
 
+[dependencies]
+mpi = "*"
+
+[features]
+petsc.build.external-modules = ["petscksp"]
+petsc.build.link = ["petsc"]
+petsc.flags = "-I../petsc/include -I../petsc/arch-darwin-c-debug/include"
+petsc.link-time-flags = "-L../petsc/arch-darwin-c-debug/lib -Wl,-rpath,../petsc/arch-darwin-c-debug/lib"
+
+[profiles]
+default = ["petsc"]
+
 [library]
 source-dir="src"
 
 [fortran]
-implicit-typing = false
-implicit-external = false
+implicit-typing = true
+implicit-external = true
 source-form = "free"
 
 [[executable]]
diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90
index be8871c..65bbf45 100644
--- a/src/heatflow/mod_petsc_solver.f90
+++ b/src/heatflow/mod_petsc_solver.f90
@@ -27,6 +27,7 @@ module petsc_solver
   integer, save :: row_start_saved = 1
   integer, save :: row_end_saved = 0
   PetscInt, allocatable, save :: ia_saved(:), ja_saved(:)
+  PetscInt, allocatable, save :: local_indices_saved(:)
   PetscScalar, allocatable, save :: aval_saved(:)
   PetscInt, allocatable, save :: diag_nnz_saved(:), offdiag_nnz_saved(:)
   integer, allocatable, save :: recvcounts_saved(:), displs_saved(:)
@@ -64,6 +65,7 @@ subroutine petsc_cleanup()
     if (ksp_saved /= PETSC_NULL_KSP) call KSPDestroy(ksp_saved, ierr)
     if (allocated(ia_saved)) deallocate(ia_saved)
     if (allocated(ja_saved)) deallocate(ja_saved)
+    if (allocated(local_indices_saved)) deallocate(local_indices_saved)
     if (allocated(aval_saved)) deallocate(aval_saved)
     if (allocated(diag_nnz_saved)) deallocate(diag_nnz_saved)
     if (allocated(offdiag_nnz_saved)) deallocate(offdiag_nnz_saved)
@@ -201,8 +203,7 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
     integer :: ierr, its
     real(8) :: rnorm
     logical :: rebuild_needed
-    PetscScalar, pointer :: vec_array(:)
-    integer :: nlocal, rank_idx
+    integer :: nlocal, rank_idx, local_row
 
     if (size(ia) /= n + 1) stop 'solve_petsc_csr: ia size mismatch'
     if (size(b) /= n .or. size(x) /= n) stop 'solve_petsc_csr: vector size mismatch'
@@ -230,6 +231,13 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
       ja_saved = ja - 1
       aval_saved = aval
 
+      allocate(local_indices_saved(max(1, nlocal)))
+      if (nlocal > 0) then
+        do local_row = 1, nlocal
+          local_indices_saved(local_row) = row_start_saved + local_row - 2
+        end do
+      end if
+
       call build_distributed_matrix(n, ia, ja, aval)
 
       allocate(recvcounts_saved(comm_size_saved), displs_saved(comm_size_saved))
@@ -301,13 +309,16 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
       x_local_saved(1:nlocal) = x(row_start_saved:row_end_saved)
     end if
 
-    call VecGetArrayF90(bb_saved, vec_array, ierr)
-    if (nlocal > 0) vec_array(1:nlocal) = b_local_saved(1:nlocal)
-    call VecRestoreArrayF90(bb_saved, vec_array, ierr)
-
-    call VecGetArrayF90(xx_saved, vec_array, ierr)
-    if (nlocal > 0) vec_array(1:nlocal) = x_local_saved(1:nlocal)
-    call VecRestoreArrayF90(xx_saved, vec_array, ierr)
+    call VecSet(bb_saved, 0.0d0, ierr)
+    call VecSet(xx_saved, 0.0d0, ierr)
+    if (nlocal > 0) then
+      call VecSetValues(bb_saved, nlocal, local_indices_saved, b_local_saved, INSERT_VALUES, ierr)
+      call VecSetValues(xx_saved, nlocal, local_indices_saved, x_local_saved, INSERT_VALUES, ierr)
+    end if
+    call VecAssemblyBegin(bb_saved, ierr)
+    call VecAssemblyEnd(bb_saved, ierr)
+    call VecAssemblyBegin(xx_saved, ierr)
+    call VecAssemblyEnd(xx_saved, ierr)
 
     call KSPSolve(ksp_saved, bb_saved, xx_saved, ierr)
     if (ierr /= 0) then
@@ -318,9 +329,7 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit)
     call KSPGetIterationNumber(ksp_saved, its, ierr)
     call KSPGetResidualNorm(ksp_saved, rnorm, ierr)
 
-    call VecGetArrayF90(xx_saved, vec_array, ierr)
-    if (nlocal > 0) x_local_saved(1:nlocal) = vec_array(1:nlocal)
-    call VecRestoreArrayF90(xx_saved, vec_array, ierr)
+    if (nlocal > 0) call VecGetValues(xx_saved, nlocal, local_indices_saved, x_local_saved, ierr)
 
     call MPI_Allgatherv(x_local_saved, nlocal, MPI_DOUBLE_PRECISION, x, recvcounts_saved, displs_saved, &
          MPI_DOUBLE_PRECISION, PETSC_COMM_WORLD, ierr)