From 51a5af4031365af07b3e26d4a502f72f926023db Mon Sep 17 00:00:00 2001 From: hm556 Date: Tue, 2 Sep 2025 13:05:51 +0100 Subject: [PATCH 01/28] Add ability to read velocity vector from mat.in --- src/heatflow/mod_constructions.f90 | 2 ++ src/heatflow/mod_inputs.f90 | 23 +++++++++++++++++++++-- src/heatflow/mod_material.f90 | 6 +++++- src/heatflow/mod_setup.f90 | 6 ++++-- 4 files changed, 32 insertions(+), 5 deletions(-) diff --git a/src/heatflow/mod_constructions.f90 b/src/heatflow/mod_constructions.f90 index b233f7c..384ce09 100644 --- a/src/heatflow/mod_constructions.f90 +++ b/src/heatflow/mod_constructions.f90 @@ -26,6 +26,7 @@ module constructions integer(int12) :: imaterial_type !what type of material it is in the mat.in real(real12) :: volume !volume of the block real(real12), dimension(3) :: Length !length of the block in 3 dimensions + real(real12), dimension(3) :: vel !center of the block in 3 dimensions integer(int12) :: iheater !whether the block is a heater real(real12) :: kappa, rho, heat_capacity, tau, em !physical properties of the material end type heatblock @@ -39,6 +40,7 @@ module constructions real(real12) :: rho !density of the material real(real12) :: sound_speed !speed of sound in the material real(real12) :: tau !relaxation time + real(real12), dimension(3) :: vel !velocity of the material real(real12) :: em !emisstivity logical :: source !whether the material is a source of heat ?? end type material diff --git a/src/heatflow/mod_inputs.f90 b/src/heatflow/mod_inputs.f90 index 4d29245..e8b3025 100644 --- a/src/heatflow/mod_inputs.f90 +++ b/src/heatflow/mod_inputs.f90 @@ -657,7 +657,7 @@ subroutine read_mat(unit) type(material), dimension(100) :: dum_mat character(1024) :: buffer integer :: reason, j - integer, dimension(8) :: readvarmat + integer, dimension(9) :: readvarmat integer :: i, index i=0 @@ -703,7 +703,8 @@ subroutine read_mat(unit) CALL assignD(buffer,"rho" ,dum_mat(i)%rho ,readvarmat(5))! assign rho CALL assignD(buffer,"sound_speed" ,dum_mat(i)%sound_speed ,readvarmat(6))! assign sound_speed CALL assignD(buffer,"tau" ,dum_mat(i)%tau ,readvarmat(7))! assign tau - CALL assignD(buffer,"em" ,dum_mat(i)%em ,readvarmat(8))! assign e + CALL assignD(buffer,"em" ,dum_mat(i)%em ,readvarmat(8))! assign e + CALL assignV(buffer,"vel" ,dum_mat(i)%vel ,readvarmat(9)) ! assign velocity end do read ! Check for duplicate indices @@ -827,5 +828,23 @@ function val(buffer) end function val !!!################################################################################################# +!!!################################################################################################# +!!! assign velocity +!!!################################################################################################# + subroutine assignV(buffer, keyword, variable, found) + implicit none + integer::found + character(1024)::buffer1,buffer2 + character(*)::buffer,keyword + real(real12), dimension(3)::variable + buffer1=buffer(:scan(buffer,"=")-1) + if(scan("=",buffer).ne.0) buffer2=val(buffer) + if(trim(adjustl(buffer1)).eq.trim(adjustl(keyword))& + .and.trim(adjustl(buffer2)).ne.'') then + found=found+1 + read(buffer2,*) variable(1), variable(2), variable(3) + end if + end subroutine assignV + end module inputs diff --git a/src/heatflow/mod_material.f90 b/src/heatflow/mod_material.f90 index 6ca7192..9dfcc8c 100644 --- a/src/heatflow/mod_material.f90 +++ b/src/heatflow/mod_material.f90 @@ -51,12 +51,13 @@ module materials !!! - em, the emissivity of the material. !!!######################################################################### -subroutine material(imaterial_type,kappa,kappa3D,h_conv,heat_capacity,rho,sound_speed,tau, em) +subroutine material(imaterial_type,kappa,kappa3D,h_conv,heat_capacity,rho,sound_speed,tau, em, vel) integer(int12), intent(in) ::imaterial_type integer(int12) :: i, tmp real(real12), intent(inout) :: kappa3D, kappa, h_conv, heat_capacity, sound_speed, rho, tau real(real12), intent(inout) :: em + real(real12), dimension(3), intent(inout) :: vel logical :: found !!!============================================= @@ -96,6 +97,9 @@ subroutine material(imaterial_type,kappa,kappa3D,h_conv,heat_capacity,rho,sound_ rho = input_materials(i)%rho sound_speed = input_materials(i)%sound_speed tau = input_materials(i)%tau + vel(1) = input_materials(i)%vel(1) + vel(2) = input_materials(i)%vel(2) + vel(3) = input_materials(i)%vel(3) em = input_materials(i)%em exit mat_loop diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90 index 797fa35..b03906f 100644 --- a/src/heatflow/mod_setup.f90 +++ b/src/heatflow/mod_setup.f90 @@ -34,7 +34,7 @@ module setup subroutine set_global_variables() integer(int12) :: ix,iy,iz,index real(real12) :: kappa,kappa3D,h_conv,heat_capacity,rho,sound_speed,tau, em - + real(real12), dimension(3) :: vel allocate(Temp_cur(nx, ny, nz)) allocate(Temp_p(NA)) allocate(Temp_pp(NA)) @@ -55,13 +55,15 @@ subroutine set_global_variables() do ix = 1, nx index = index + 1 CALL material(grid(ix,iy,iz)%imaterial_type,& - kappa,kappa3D,h_conv,heat_capacity,rho,sound_speed,tau, em) + kappa,kappa3D,h_conv,heat_capacity,rho,sound_speed,tau, em, vel) grid(ix,iy,iz)%kappa = kappa grid(ix,iy,iz)%rho = rho grid(ix,iy,iz)%heat_capacity = heat_capacity grid(ix,iy,iz)%tau = tau*inverse_time*inverse_time grid(ix,iy,iz)%em = em + grid(ix,iy,iz)%vel(:) = vel(:) lin_rhoc(index) = rho*heat_capacity + print*, grid(ix,iy,iz)%vel(:) if (Check_Stability) CALL stability(kappa, rho, heat_capacity, ix, iy, iz) end do end do From 3bfe37a626be810799a55ce2f666a90615d11dd4 Mon Sep 17 00:00:00 2001 From: hm556 Date: Tue, 2 Sep 2025 14:25:49 +0100 Subject: [PATCH 02/28] Add convection to hmatrix and boundary vector --- src/heatflow/mod_boundary.f90 | 56 ++++++++++++++-- src/heatflow/mod_hmatrix.f90 | 121 +++++++++++++++++++++++++++++++++- src/heatflow/mod_setup.f90 | 1 - 3 files changed, 170 insertions(+), 8 deletions(-) diff --git a/src/heatflow/mod_boundary.f90 b/src/heatflow/mod_boundary.f90 index d94a0bf..88ad7ba 100644 --- a/src/heatflow/mod_boundary.f90 +++ b/src/heatflow/mod_boundary.f90 @@ -84,7 +84,7 @@ subroutine boundary(B) kappaHarm = (2*kappa*kappaBoundx1/(kappa+kappaBoundx1)) / & (grid(ix, iy, iz)%Length(1)**2) if (kappa .ne. kappaBoundx1) kappaHarm = kappaHarm*BR - B(I) = B(I) + (kappaHarm) * T_Bathx1 + B(I) = B(I) + (kappaHarm) * T_Bathx1 !+ boundray_term_vel(1_int12,iy,iz,T_Bathx1) end if end if if (ix .eq. nx) then @@ -94,7 +94,7 @@ subroutine boundary(B) kappaHarm = (2*kappa*kappaBoundNx/(kappa+kappaBoundNx)) / & (grid(ix, iy, iz)%Length(1)**2) if (kappa .ne. kappaBoundNx) kappaHarm = kappaHarm*BR - B(I) = B(I) + (kappaHarm) * T_Bathx2 + B(I) = B(I) + (kappaHarm) * T_Bathx2 !+ boundray_term_vel(nx,iy,iz,T_Bathx2) end if end if end if @@ -107,7 +107,7 @@ subroutine boundary(B) kappaHarm = (2*kappa*kappaBoundy1/(kappa+kappaBoundy1)) / & (grid(ix, iy, iz)%Length(2)**2) if (kappa .ne. kappaBoundy1) kappaHarm = kappaHarm*BR - B(I) = B(I) + (kappaHarm) * T_Bathy1 + B(I) = B(I) + (kappaHarm) * T_Bathy1 !+ boundray_term_vel(ix,1_int12,iz,T_Bathy1) end if end if if (iy .eq. ny) then @@ -117,7 +117,7 @@ subroutine boundary(B) kappaHarm = (2*kappa*kappaBoundNy/(kappa+kappaBoundNy)) / & (grid(ix, iy, iz)%Length(2)**2) if (kappa .ne. kappaBoundNy) kappaHarm = kappaHarm*BR - B(I) = B(I) + (kappaHarm) * T_Bathy2 + B(I) = B(I) + (kappaHarm) * T_Bathy2 !+ boundray_term_vel(ix,ny,iz,T_Bathy2) end if end if end if @@ -130,7 +130,7 @@ subroutine boundary(B) kappaHarm = (2*kappa*kappaBoundz1/(kappa+kappaBoundz1)) / & (grid(ix, iy, iz)%Length(3)**2) if (kappa .ne. kappaBoundz1) kappaHarm = kappaHarm*BR - B(I) = B(I) + (kappaHarm) * T_Bathz1 + B(I) = B(I) + (kappaHarm) * T_Bathz1 !+ boundray_term_vel(ix,iy,1_int12,T_Bathz1) end if end if if (iz .eq. nz) then @@ -140,7 +140,7 @@ subroutine boundary(B) kappaHarm = (2*kappa*kappaBoundNz/(kappa+kappaBoundNz)) / & (grid(ix, iy, iz)%Length(3)**2) if (kappa .ne. kappaBoundNz) kappaHarm = kappaHarm*BR - B(I) = B(I) + (kappaHarm) * T_Bathz2 + B(I) = B(I) + (kappaHarm) * T_Bathz2 !+ boundray_term_vel(ix,iy,nz,T_Bathz2) end if end if end if @@ -180,5 +180,49 @@ function constantboundarytempgrad(I) result(temp) ! so we need to add T_BathCG to the temperature at the boundary end function constantboundarytempgrad !!!############################################################################################### + + !!!######################################################################## + !!! This subroutine calculates the value of the convective term of the H matrix... + !!! ...at the boundary. + !!!######################################################################## + function boundray_term_vel(x_b, y_b, z_b, TB) result(vel_conv) + integer(int12), intent(in) :: x_b, y_b, z_b + real(real12) :: vel_conv + real(real12) :: rho, CV, TB + real(real12), dimension(3) :: vel_in, vel_out + + !------------------------------------------------------------ + ! The boundary term is calculated of the boundary grid point. + !------------------------------------------------------------ + + + rho = grid(x_b,y_b,z_b)%rho + CV = grid(x_b,y_b,z_b)%heat_capacity + vel_in = grid(x_b,y_b,z_b)%vel + vel_out = grid(x_b,y_b,z_b)%vel + + if (x_b .eq. 1_int12) then + vel_conv = vel_in(1)*rho*CV*(1.0_real12/(2.0_real12*grid(x_b,y_b,z_b)%Length(1))) + vel_conv = -vel_conv + else if (x_b .eq. nx) then + vel_conv = vel_in(1)*rho*CV*(1.0_real12/(2.0_real12*grid(x_b,y_b,z_b)%Length(1))) + + else if (y_b .eq. 1_int12) then + vel_conv = vel_in(2)*rho*CV*(1.0_real12/(2.0_real12*grid(x_b,y_b,z_b)%Length(2))) + vel_conv = -vel_conv + else if (y_b .eq. ny) then + vel_conv = vel_in(2)*rho*CV*(1.0_real12/(2.0_real12*grid(x_b,y_b,z_b)%Length(2))) + else if (z_b .eq. 1_int12) then + vel_conv = vel_in(3)*rho*CV*(1.0_real12/(2.0_real12*grid(x_b,y_b,z_b)%Length(3))) + vel_conv = -vel_conv + else if (z_b .eq. nz) then + vel_conv = vel_in(3)*rho*CV*(1.0_real12/(2.0_real12*grid(x_b,y_b,z_b)%Length(3))) + else + vel_conv = 0.0_real12 + end if + vel_conv = vel_conv*TB + !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + end function boundray_term_vel + !!!######################################################################## end module boundary_vector diff --git a/src/heatflow/mod_hmatrix.f90 b/src/heatflow/mod_hmatrix.f90 index 9e0b219..e1d268d 100644 --- a/src/heatflow/mod_hmatrix.f90 +++ b/src/heatflow/mod_hmatrix.f90 @@ -148,7 +148,8 @@ function hmatrixfunc(i, j) result(H) if (x .eq. 1) then H=0.0_real12 else - H = A ! X left neighbor (left cell interaction) + H = A ! X left neighbor (left cell interaction) + H = H + calculate_convective_conductivity(xm, y, z, x, y, z) end if end if @@ -157,6 +158,7 @@ function hmatrixfunc(i, j) result(H) H=0.0_real12 else H = B ! X right neighbor (right cell interaction) + H = H + calculate_convective_conductivity(xp, y, z, x, y, z) end if end if @@ -165,6 +167,7 @@ function hmatrixfunc(i, j) result(H) H=0.0_real12 else H = D ! Y down neighbor (down cell interaction) + H = H + calculate_convective_conductivity(x, ym, z, x, y, z) end if end if if ((i-j) .eq. -nx) then @@ -172,6 +175,7 @@ function hmatrixfunc(i, j) result(H) H=0.0_real12 else H = E ! Y up neighbor (up cell interaction) + H = H + calculate_convective_conductivity(x, yp, z, x, y, z) end if end if @@ -181,6 +185,7 @@ function hmatrixfunc(i, j) result(H) else !write(*,*) 'F this is forward (in) z',F H = F ! Z in neighbor (forward cell interaction) !!!Frank had this as G during testing + H = H + calculate_convective_conductivity(x, y, zm, x, y, z) end if end if @@ -190,6 +195,7 @@ function hmatrixfunc(i, j) result(H) else !write(*,*) 'G this is backward (out) z?',G H = G ! Z out neighbor (backward cell interaction) !!!Frank Had this as F during testing + H = H + calculate_convective_conductivity(x, y, zp, x, y, z) end if end if @@ -391,5 +397,118 @@ subroutine boundry_diag_term(x_b, y_b, z_b, x, y, z, kappa_ab) end subroutine boundry_diag_term !!!######################################################################## + !!!######################################################################## + !!! subroutine to calculate the convective conductivity between two points + !!!######################################################################## + function calculate_convective_conductivity(x_in, y_in, z_in, x_out, y_out, z_out) result(vel_conv) + integer(int12), intent(in) :: x_in, y_in, z_in, x_out, y_out, z_out + real(real12) :: vel_conv + real(real12) :: rho, CV + real(real12), dimension(3) :: vel_in, vel_out + + vel_conv = 0.0_real12 + + ! if not an edge element + if ((x_in .ge. 1) .and. (x_in .le. nx) .and. (y_in .ge. 1) .and. & + (y_in .le. ny) .and. (z_in .ge. 1) .and. (z_in .le. nz)) then + + rho = grid(x_out,y_out,z_out)%rho + CV = grid(x_out,y_out,z_out)%heat_capacity + vel_in = grid(x_in,y_in,z_in)%vel + vel_out = grid(x_out,y_out,z_out)%vel + + if (x_in .ne. x_out) then + if (vel_in(1) .ne. vel_out(1)) then + vel_conv = 0.0_real12 + else + vel_conv = vel_in(1)*rho*CV*(1.0_real12/(2.0_real12*grid(x_out,y_out,z_out)%Length(1))) + if (x_in .lt. x_out) then + vel_conv = -vel_conv + end if + end if + + else if (y_in .ne. y_out) then + if (vel_in(2) .ne. vel_out(2)) then + vel_conv = 0.0_real12 + else + vel_conv = vel_in(2)*rho*CV*(1.0_real12/(2.0_real12*grid(x_out,y_out,z_out)%Length(2))) + if (y_in .lt. y_out) then + vel_conv = -vel_conv + end if + end if + + else if (z_in .ne. z_out) then + if (vel_in(3) .ne. vel_out(3)) then + vel_conv = 0.0_real12 + else + vel_conv = vel_in(3)*rho*CV*(1.0_real12/(2.0_real12*grid(x_out,y_out,z_out)%Length(3))) + if (z_in .lt. z_out) then + vel_conv = -vel_conv + end if + end if + end if + + else + vel_conv = 0.0_real12 + end if + vel_conv = vel_conv + end function calculate_convective_conductivity + + !!!######################################################################## + + !!!######################################################################## + !!! This subroutine calculates the value of the convective term of the H matrix... + !!! ...at the boundary. + !!!######################################################################## + subroutine boundry_diag_term_vel(x_b, y_b, z_b, x, y, z, vel_conv) + integer(int12), intent(in) :: x_b, y_b, z_b, x, y, z + real(real12), intent(out) :: vel_conv + real(real12) :: rho, CV + real(real12), dimension(3) :: vel_in, vel_out + + !------------------------------------------------------------ + ! The boundary term is calculated of the boundary grid point. + !------------------------------------------------------------ + + + rho = grid(x,y,z)%rho + CV = grid(x,y,z)%heat_capacity + vel_in = grid(x_b,y_b,z_b)%vel + vel_out = grid(x,y,z)%vel + + if (x_b .eq. 1) then + if (vel_in(1) .ne. vel_out(1)) then + vel_conv = 0.0_real12 + else + vel_conv = vel_in(1)*rho*CV*(1.0_real12/(2.0_real12*grid(x,y,z)%Length(1))) + if (x_b .lt. x) then + vel_conv = -vel_conv + end if + end if + + else if (y_b .ne. y) then + if (vel_in(2) .ne. vel_out(2)) then + vel_conv = 0.0_real12 + else + vel_conv = vel_in(2)*rho*CV*(1.0_real12/(2.0_real12*grid(x,y,z)%Length(2))) + if (y_b .lt. y) then + vel_conv = -vel_conv + end if + end if + + else if (z_b .ne. z) then + if (vel_in(3) .ne. vel_out(3)) then + vel_conv = 0.0_real12 + else + vel_conv = vel_in(3)*rho*CV*(1.0_real12/(2.0_real12*grid(x,y,z)%Length(3))) + if (z_b .lt. z) then + vel_conv = -vel_conv + end if + end if + end if + !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + end subroutine boundry_diag_term_vel + !!!######################################################################## + end module hmatrixmod diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90 index b03906f..accbaaf 100644 --- a/src/heatflow/mod_setup.f90 +++ b/src/heatflow/mod_setup.f90 @@ -63,7 +63,6 @@ subroutine set_global_variables() grid(ix,iy,iz)%em = em grid(ix,iy,iz)%vel(:) = vel(:) lin_rhoc(index) = rho*heat_capacity - print*, grid(ix,iy,iz)%vel(:) if (Check_Stability) CALL stability(kappa, rho, heat_capacity, ix, iy, iz) end do end do From 09face91e46ae9bdd5baf84d1ec684a501bee505 Mon Sep 17 00:00:00 2001 From: hm556 Date: Tue, 2 Sep 2025 14:44:16 +0100 Subject: [PATCH 03/28] Make fix bug, needs knew solver to work, current implementation of solver only works for symmetric Hmatrix --- src/heatflow/mod_boundary.f90 | 12 ++++++------ src/heatflow/mod_setup.f90 | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/heatflow/mod_boundary.f90 b/src/heatflow/mod_boundary.f90 index 88ad7ba..cb8ab30 100644 --- a/src/heatflow/mod_boundary.f90 +++ b/src/heatflow/mod_boundary.f90 @@ -84,7 +84,7 @@ subroutine boundary(B) kappaHarm = (2*kappa*kappaBoundx1/(kappa+kappaBoundx1)) / & (grid(ix, iy, iz)%Length(1)**2) if (kappa .ne. kappaBoundx1) kappaHarm = kappaHarm*BR - B(I) = B(I) + (kappaHarm) * T_Bathx1 !+ boundray_term_vel(1_int12,iy,iz,T_Bathx1) + B(I) = B(I) + (kappaHarm) * T_Bathx1 + boundray_term_vel(1_int12,iy,iz,T_Bathx1) end if end if if (ix .eq. nx) then @@ -94,7 +94,7 @@ subroutine boundary(B) kappaHarm = (2*kappa*kappaBoundNx/(kappa+kappaBoundNx)) / & (grid(ix, iy, iz)%Length(1)**2) if (kappa .ne. kappaBoundNx) kappaHarm = kappaHarm*BR - B(I) = B(I) + (kappaHarm) * T_Bathx2 !+ boundray_term_vel(nx,iy,iz,T_Bathx2) + B(I) = B(I) + (kappaHarm) * T_Bathx2 + boundray_term_vel(nx,iy,iz,T_Bathx2) end if end if end if @@ -107,7 +107,7 @@ subroutine boundary(B) kappaHarm = (2*kappa*kappaBoundy1/(kappa+kappaBoundy1)) / & (grid(ix, iy, iz)%Length(2)**2) if (kappa .ne. kappaBoundy1) kappaHarm = kappaHarm*BR - B(I) = B(I) + (kappaHarm) * T_Bathy1 !+ boundray_term_vel(ix,1_int12,iz,T_Bathy1) + B(I) = B(I) + (kappaHarm) * T_Bathy1 + boundray_term_vel(ix,1_int12,iz,T_Bathy1) end if end if if (iy .eq. ny) then @@ -117,7 +117,7 @@ subroutine boundary(B) kappaHarm = (2*kappa*kappaBoundNy/(kappa+kappaBoundNy)) / & (grid(ix, iy, iz)%Length(2)**2) if (kappa .ne. kappaBoundNy) kappaHarm = kappaHarm*BR - B(I) = B(I) + (kappaHarm) * T_Bathy2 !+ boundray_term_vel(ix,ny,iz,T_Bathy2) + B(I) = B(I) + (kappaHarm) * T_Bathy2 + boundray_term_vel(ix,ny,iz,T_Bathy2) end if end if end if @@ -130,7 +130,7 @@ subroutine boundary(B) kappaHarm = (2*kappa*kappaBoundz1/(kappa+kappaBoundz1)) / & (grid(ix, iy, iz)%Length(3)**2) if (kappa .ne. kappaBoundz1) kappaHarm = kappaHarm*BR - B(I) = B(I) + (kappaHarm) * T_Bathz1 !+ boundray_term_vel(ix,iy,1_int12,T_Bathz1) + B(I) = B(I) + (kappaHarm) * T_Bathz1 + boundray_term_vel(ix,iy,1_int12,T_Bathz1) end if end if if (iz .eq. nz) then @@ -140,7 +140,7 @@ subroutine boundary(B) kappaHarm = (2*kappa*kappaBoundNz/(kappa+kappaBoundNz)) / & (grid(ix, iy, iz)%Length(3)**2) if (kappa .ne. kappaBoundNz) kappaHarm = kappaHarm*BR - B(I) = B(I) + (kappaHarm) * T_Bathz2 !+ boundray_term_vel(ix,iy,nz,T_Bathz2) + B(I) = B(I) + (kappaHarm) * T_Bathz2 + boundray_term_vel(ix,iy,nz,T_Bathz2) end if end if end if diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90 index accbaaf..11dc0bf 100644 --- a/src/heatflow/mod_setup.f90 +++ b/src/heatflow/mod_setup.f90 @@ -141,6 +141,7 @@ subroutine sparse_Hmatrix() ra%irow(count) = i ! The row of the H matrix ra%jcol(count) = j ! The column of the H matrix count = count + 1 ! The number of non-zero elements in the H matrix + H0=hmatrixfunc(j,i) ! The value of the H matrix ra%val(count) = H0 ! The value of the H matrix ra%irow(count) = j ! The row of the H matrix ra%jcol(count) = i ! The column of the H matrix From 247f862b80448d0775ba0a33cbab42928d1b44ee Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 2 Sep 2025 22:38:08 +0100 Subject: [PATCH 04/28] Add non symmatrix sparse matrix solver from the intel library mkl --- Makefile | 80 ++--- src/heatflow/mkl_pardiso.f90 | 452 +++++++++++++++++++++++++++++ src/heatflow/mod_constructions.f90 | 4 +- src/heatflow/mod_evolve.f90 | 21 +- src/heatflow/mod_global.f90 | 5 +- src/heatflow/mod_setup.f90 | 5 + src/heatflow/mod_sparse_solver.f90 | 334 +++++++++++++++++++++ src/heatflow/mod_tempdep.f90 | 58 ++-- 8 files changed, 885 insertions(+), 74 deletions(-) create mode 100644 src/heatflow/mkl_pardiso.f90 create mode 100644 src/heatflow/mod_sparse_solver.f90 diff --git a/Makefile b/Makefile index 57e89be..af38c3a 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,8 @@ #################################################################### # 11 Jun 2024 # #################################################################### -# SHELL = /bin/sh -# -# The machine (platform) identifier to append to the library names -# PLAT = _linux -# -# - ########################################## # CODE DIRECTORIES AND FILES @@ -20,38 +13,46 @@ BIN_DIR := ./bin SRC_DIR := ./src BUILD_DIR = ./obj -SRCS := /heatflow/mod_constants.f90 \ - /heatflow/mod_constructions.f90 \ - /heatflow/mod_SPtype.f90 \ - /heatflow/mod_global.f90 \ - /heatflow/mod_Sparse.f90 \ - /heatflow/mod_inputs.f90 \ - /heatflow/mod_material.f90 \ - /heatflow/mod_hmatrix.f90 \ - /heatflow/mod_init_evolve.f90 \ - /heatflow/mod_setup.f90 \ - /heatflow/mod_boundary.f90 \ - /heatflow/mod_heating.f90 \ - /heatflow/mod_cattaneo.f90 \ - /heatflow/mod_tempdep.f90 \ - /heatflow/mod_evolve.f90 \ - /heatflow/mod_output.f90 \ +SRCS := heatflow/mod_constants.f90 \ + heatflow/mod_constructions.f90 \ + heatflow/mod_SPtype.f90 \ + heatflow/mod_global.f90 \ + heatflow/mod_Sparse.f90 \ + heatflow/mod_inputs.f90 \ + heatflow/mod_material.f90 \ + heatflow/mod_hmatrix.f90 \ + heatflow/mod_init_evolve.f90 \ + heatflow/mkl_pardiso.f90 \ + heatflow/mod_sparse_solver.f90 \ + heatflow/mod_setup.f90 \ + heatflow/mod_boundary.f90 \ + heatflow/mod_heating.f90 \ + heatflow/mod_cattaneo.f90 \ + heatflow/mod_tempdep.f90 \ + heatflow/mod_evolve.f90 \ + heatflow/mod_output.f90 \ heatflow.f90 -OBJS := $(addprefix $(SRC_DIR)/,$(SRCS)) +OBJS := $(addprefix $(BUILD_DIR)/,$(notdir $(SRCS:.f90=.o))) + +# MKL configuration +MKLROOT ?= /opt/intel/oneapi/mkl/latest +MKL_LIB_DIR = $(MKLROOT)/lib/intel64 +MKL_INCLUDE_DIR = $(MKLROOT)/include +MKL_FLAGS = -L$(MKL_LIB_DIR) -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl -FFLAGS = -O3 -MODULEFLAGS = -J +FFLAGS = -O3 -I$(MKL_INCLUDE_DIR) +MODULEFLAGS = -J$(BUILD_DIR) FC = gfortran ########################################## -# LIBRARY SECTION +# TARGETS ########################################## -MKLROOT?="/usr/local/intel/parallel_studio_xe_2017/compilers_and_libraries_2017/linux/mkl/lib/intel64_lin" - - NAME = ThermalFlow.x programs = $(BIN_DIR)/$(NAME) + +.PHONY: all debug clean OMP + all: $(programs) $(BIN_DIR): @@ -60,11 +61,22 @@ $(BIN_DIR): $(BUILD_DIR): mkdir -p $@ -$(programs) : $(OBJS) | $(BIN_DIR) $(BUILD_DIR) - $(FC) -O3 -fopenmp $(MODULEFLAGS) $(BUILD_DIR) $(OBJS) -o $@ +# Pattern rule for compiling Fortran files +$(BUILD_DIR)/%.o: $(SRC_DIR)/heatflow/%.f90 | $(BUILD_DIR) + $(FC) $(FFLAGS) $(MODULEFLAGS) -c $< -o $@ + +$(BUILD_DIR)/%.o: $(SRC_DIR)/%.f90 | $(BUILD_DIR) + $(FC) $(FFLAGS) $(MODULEFLAGS) -c $< -o $@ -debug : $(OBJS) - $(FC) -O0 -Wall -g -ffpe-trap=invalid,zero,overflow,underflow -fbacktrace -fcheck=all -fbounds-check $(MODULEFLAGS) $(BUILD_DIR) $(OBJS) -o $(programs) +$(programs): $(OBJS) | $(BIN_DIR) + $(FC) -O3 -fopenmp $(OBJS) -o $@ $(MKL_FLAGS) + +debug: FFLAGS = -O0 -Wall -g -ffpe-trap=invalid,zero,overflow,underflow -fbacktrace -fcheck=all -fbounds-check -I$(MKL_INCLUDE_DIR) +debug: $(OBJS) | $(BIN_DIR) + $(FC) $(FFLAGS) $(OBJS) -o $(programs) $(MKL_FLAGS) OMP: $(programs) ./util/DShell/omp_exec.sh + +clean: + rm -f $(BUILD_DIR)/*.o $(BUILD_DIR)/*.mod $(programs) \ No newline at end of file diff --git a/src/heatflow/mkl_pardiso.f90 b/src/heatflow/mkl_pardiso.f90 new file mode 100644 index 0000000..0ff7527 --- /dev/null +++ b/src/heatflow/mkl_pardiso.f90 @@ -0,0 +1,452 @@ +!******************************************************************************* +! Copyright(C) 2004-2013 Intel Corporation. All Rights Reserved. +! +! The source code, information and material ("Material") contained herein is +! owned by Intel Corporation or its suppliers or licensors, and title to such +! Material remains with Intel Corporation or its suppliers or licensors. The +! Material contains proprietary information of Intel or its suppliers and +! licensors. The Material is protected by worldwide copyright laws and treaty +! provisions. No part of the Material may be used, copied, reproduced, +! modified, published, uploaded, posted, transmitted, distributed or disclosed +! in any way without Intel's prior express written permission. No license +! under any patent, copyright or other intellectual property rights in the +! Material is granted to or conferred upon you, either expressly, by +! implication, inducement, estoppel or otherwise. Any license under such +! intellectual property rights must be express and approved by Intel in +! writing. +! +! *Third Party trademarks are the property of their respective owners. +! +! Unless otherwise agreed by Intel in writing, you may not remove or alter +! this notice or any other notice embedded in Materials by Intel or Intel's +! suppliers or licensors in any way. +! +!******************************************************************************* +! Content : MKL PARDISO Fortran-90 header file +! +! Contains PARDISO routine definition. +! For CDECL use only. +! +!******************************************************************************* +!DEC$ IF .NOT. DEFINED( __MKL_PARDISO_F90 ) + +!DEC$ DEFINE __MKL_PARDISO_F90 + + MODULE MKL_PARDISO_PRIVATE + + TYPE MKL_PARDISO_HANDLE; INTEGER(KIND=8) DUMMY; END TYPE + + INTEGER, PARAMETER :: PARDISO_OOC_FILE_NAME = 1 + + END MODULE MKL_PARDISO_PRIVATE + + MODULE MKL_PARDISO + USE MKL_PARDISO_PRIVATE + +! +! Subroutine prototype for PARDISO +! + + INTERFACE PARDISO + SUBROUTINE PARDISO_S( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + INTEGER, INTENT(IN) :: MAXFCT + INTEGER, INTENT(IN) :: MNUM + INTEGER, INTENT(IN) :: MTYPE + INTEGER, INTENT(IN) :: PHASE + INTEGER, INTENT(IN) :: N + INTEGER, INTENT(IN) :: IA(*) + INTEGER, INTENT(IN) :: JA(*) + INTEGER, INTENT(INOUT) :: PERM(*) + INTEGER, INTENT(IN) :: NRHS + INTEGER, INTENT(INOUT) :: IPARM(*) + INTEGER, INTENT(IN) :: MSGLVL + INTEGER, INTENT(OUT) :: ERROR + + REAL(KIND=4), INTENT(IN) :: A(*) + REAL(KIND=4), INTENT(INOUT) :: B(*) + REAL(KIND=4), INTENT(OUT) :: X(*) + END SUBROUTINE PARDISO_S + + SUBROUTINE PARDISO_D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + INTEGER, INTENT(IN) :: MAXFCT + INTEGER, INTENT(IN) :: MNUM + INTEGER, INTENT(IN) :: MTYPE + INTEGER, INTENT(IN) :: PHASE + INTEGER, INTENT(IN) :: N + INTEGER, INTENT(IN) :: IA(*) + INTEGER, INTENT(IN) :: JA(*) + INTEGER, INTENT(INOUT) :: PERM(*) + INTEGER, INTENT(IN) :: NRHS + INTEGER, INTENT(INOUT) :: IPARM(*) + INTEGER, INTENT(IN) :: MSGLVL + INTEGER, INTENT(OUT) :: ERROR + + REAL(KIND=8), INTENT(IN) :: A(*) + REAL(KIND=8), INTENT(INOUT) :: B(*) + REAL(KIND=8), INTENT(OUT) :: X(*) + END SUBROUTINE PARDISO_D + + SUBROUTINE PARDISO_SC( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + INTEGER, INTENT(IN) :: MAXFCT + INTEGER, INTENT(IN) :: MNUM + INTEGER, INTENT(IN) :: MTYPE + INTEGER, INTENT(IN) :: PHASE + INTEGER, INTENT(IN) :: N + INTEGER, INTENT(IN) :: IA(*) + INTEGER, INTENT(IN) :: JA(*) + INTEGER, INTENT(INOUT) :: PERM(*) + INTEGER, INTENT(IN) :: NRHS + INTEGER, INTENT(INOUT) :: IPARM(*) + INTEGER, INTENT(IN) :: MSGLVL + INTEGER, INTENT(OUT) :: ERROR + + COMPLEX(KIND=4), INTENT(IN) :: A(*) + COMPLEX(KIND=4), INTENT(INOUT) :: B(*) + COMPLEX(KIND=4), INTENT(OUT) :: X(*) + END SUBROUTINE PARDISO_SC + + SUBROUTINE PARDISO_DC( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + INTEGER, INTENT(IN) :: MAXFCT + INTEGER, INTENT(IN) :: MNUM + INTEGER, INTENT(IN) :: MTYPE + INTEGER, INTENT(IN) :: PHASE + INTEGER, INTENT(IN) :: N + INTEGER, INTENT(IN) :: IA(*) + INTEGER, INTENT(IN) :: JA(*) + INTEGER, INTENT(INOUT) :: PERM(*) + INTEGER, INTENT(IN) :: NRHS + INTEGER, INTENT(INOUT) :: IPARM(*) + INTEGER, INTENT(IN) :: MSGLVL + INTEGER, INTENT(OUT) :: ERROR + COMPLEX(KIND=8), INTENT(IN) :: A(*) + COMPLEX(KIND=8), INTENT(INOUT) :: B(*) + COMPLEX(KIND=8), INTENT(OUT) :: X(*) + END SUBROUTINE PARDISO_DC + + + SUBROUTINE PARDISO_S_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + INTEGER, INTENT(IN) :: MAXFCT + INTEGER, INTENT(IN) :: MNUM + INTEGER, INTENT(IN) :: MTYPE + INTEGER, INTENT(IN) :: PHASE + INTEGER, INTENT(IN) :: N + INTEGER, INTENT(IN) :: IA(*) + INTEGER, INTENT(IN) :: JA(*) + INTEGER, INTENT(INOUT) :: PERM(*) + INTEGER, INTENT(IN) :: NRHS + INTEGER, INTENT(INOUT) :: IPARM(*) + INTEGER, INTENT(IN) :: MSGLVL + INTEGER, INTENT(OUT) :: ERROR + REAL(KIND=4), INTENT(IN) :: A(*) + REAL(KIND=4), INTENT(INOUT) :: B(N,*) + REAL(KIND=4), INTENT(OUT) :: X(N,*) + END SUBROUTINE PARDISO_S_2D + + SUBROUTINE PARDISO_D_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + INTEGER, INTENT(IN) :: MAXFCT + INTEGER, INTENT(IN) :: MNUM + INTEGER, INTENT(IN) :: MTYPE + INTEGER, INTENT(IN) :: PHASE + INTEGER, INTENT(IN) :: N + INTEGER, INTENT(IN) :: IA(*) + INTEGER, INTENT(IN) :: JA(*) + INTEGER, INTENT(INOUT) :: PERM(*) + INTEGER, INTENT(IN) :: NRHS + INTEGER, INTENT(INOUT) :: IPARM(*) + INTEGER, INTENT(IN) :: MSGLVL + INTEGER, INTENT(OUT) :: ERROR + REAL(KIND=8), INTENT(IN) :: A(*) + REAL(KIND=8), INTENT(INOUT) :: B(N,*) + REAL(KIND=8), INTENT(OUT) :: X(N,*) + END SUBROUTINE PARDISO_D_2D + + SUBROUTINE PARDISO_SC_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + INTEGER, INTENT(IN) :: MAXFCT + INTEGER, INTENT(IN) :: MNUM + INTEGER, INTENT(IN) :: MTYPE + INTEGER, INTENT(IN) :: PHASE + INTEGER, INTENT(IN) :: N + INTEGER, INTENT(IN) :: IA(*) + INTEGER, INTENT(IN) :: JA(*) + INTEGER, INTENT(INOUT) :: PERM(*) + INTEGER, INTENT(IN) :: NRHS + INTEGER, INTENT(INOUT) :: IPARM(*) + INTEGER, INTENT(IN) :: MSGLVL + INTEGER, INTENT(OUT) :: ERROR + COMPLEX(KIND=4), INTENT(IN) :: A(*) + COMPLEX(KIND=4), INTENT(INOUT) :: B(N,*) + COMPLEX(KIND=4), INTENT(OUT) :: X(N,*) + END SUBROUTINE PARDISO_SC_2D + + SUBROUTINE PARDISO_DC_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + INTEGER, INTENT(IN) :: MAXFCT + INTEGER, INTENT(IN) :: MNUM + INTEGER, INTENT(IN) :: MTYPE + INTEGER, INTENT(IN) :: PHASE + INTEGER, INTENT(IN) :: N + INTEGER, INTENT(IN) :: IA(*) + INTEGER, INTENT(IN) :: JA(*) + INTEGER, INTENT(INOUT) :: PERM(*) + INTEGER, INTENT(IN) :: NRHS + INTEGER, INTENT(INOUT) :: IPARM(*) + INTEGER, INTENT(IN) :: MSGLVL + INTEGER, INTENT(OUT) :: ERROR + COMPLEX(KIND=8), INTENT(IN) :: A(*) + COMPLEX(KIND=8), INTENT(INOUT) :: B(N,*) + COMPLEX(KIND=8), INTENT(OUT) :: X(N,*) + END SUBROUTINE PARDISO_DC_2D + END INTERFACE +! +! Subroutine prototype for PARDISO_64 +! +! Note: The pardiso_64 interface is not supported on IA-32 architecture. +! If called on IA-32, error = -12 is returned. +! + INTERFACE PARDISO_64 + SUBROUTINE PARDISO_S_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + INTEGER(KIND=8), INTENT(IN) :: MAXFCT + INTEGER(KIND=8), INTENT(IN) :: MNUM + INTEGER(KIND=8), INTENT(IN) :: MTYPE + INTEGER(KIND=8), INTENT(IN) :: PHASE + INTEGER(KIND=8), INTENT(IN) :: N + INTEGER(KIND=8), INTENT(IN) :: IA(*) + INTEGER(KIND=8), INTENT(IN) :: JA(*) + INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) + INTEGER(KIND=8), INTENT(IN) :: NRHS + INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) + INTEGER(KIND=8), INTENT(IN) :: MSGLVL + INTEGER(KIND=8), INTENT(OUT) :: ERROR + REAL(KIND=4), INTENT(IN) :: A(*) + REAL(KIND=4), INTENT(INOUT) :: B(*) + REAL(KIND=4), INTENT(OUT) :: X(*) + END SUBROUTINE PARDISO_S_64 + + SUBROUTINE PARDISO_D_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + INTEGER(KIND=8), INTENT(IN) :: MAXFCT + INTEGER(KIND=8), INTENT(IN) :: MNUM + INTEGER(KIND=8), INTENT(IN) :: MTYPE + INTEGER(KIND=8), INTENT(IN) :: PHASE + INTEGER(KIND=8), INTENT(IN) :: N + INTEGER(KIND=8), INTENT(IN) :: IA(*) + INTEGER(KIND=8), INTENT(IN) :: JA(*) + INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) + INTEGER(KIND=8), INTENT(IN) :: NRHS + INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) + INTEGER(KIND=8), INTENT(IN) :: MSGLVL + INTEGER(KIND=8), INTENT(OUT) :: ERROR + REAL(KIND=8), INTENT(IN) :: A(*) + REAL(KIND=8), INTENT(INOUT) :: B(*) + REAL(KIND=8), INTENT(OUT) :: X(*) + END SUBROUTINE PARDISO_D_64 + + SUBROUTINE PARDISO_SC_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + INTEGER(KIND=8), INTENT(IN) :: MAXFCT + INTEGER(KIND=8), INTENT(IN) :: MNUM + INTEGER(KIND=8), INTENT(IN) :: MTYPE + INTEGER(KIND=8), INTENT(IN) :: PHASE + INTEGER(KIND=8), INTENT(IN) :: N + INTEGER(KIND=8), INTENT(IN) :: IA(*) + INTEGER(KIND=8), INTENT(IN) :: JA(*) + INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) + INTEGER(KIND=8), INTENT(IN) :: NRHS + INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) + INTEGER(KIND=8), INTENT(IN) :: MSGLVL + INTEGER(KIND=8), INTENT(OUT) :: ERROR + COMPLEX(KIND=4), INTENT(IN) :: A(*) + COMPLEX(KIND=4), INTENT(INOUT) :: B(*) + COMPLEX(KIND=4), INTENT(OUT) :: X(*) + END SUBROUTINE PARDISO_SC_64 + + SUBROUTINE PARDISO_DC_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + INTEGER(KIND=8), INTENT(IN) :: MAXFCT + INTEGER(KIND=8), INTENT(IN) :: MNUM + INTEGER(KIND=8), INTENT(IN) :: MTYPE + INTEGER(KIND=8), INTENT(IN) :: PHASE + INTEGER(KIND=8), INTENT(IN) :: N + INTEGER(KIND=8), INTENT(IN) :: IA(*) + INTEGER(KIND=8), INTENT(IN) :: JA(*) + INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) + INTEGER(KIND=8), INTENT(IN) :: NRHS + INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) + INTEGER(KIND=8), INTENT(IN) :: MSGLVL + INTEGER(KIND=8), INTENT(OUT) :: ERROR + COMPLEX(KIND=8), INTENT(IN) :: A(*) + COMPLEX(KIND=8), INTENT(INOUT) :: B(*) + COMPLEX(KIND=8), INTENT(OUT) :: X(*) + END SUBROUTINE PARDISO_DC_64 + + SUBROUTINE PARDISO_S_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + INTEGER(KIND=8), INTENT(IN) :: MAXFCT + INTEGER(KIND=8), INTENT(IN) :: MNUM + INTEGER(KIND=8), INTENT(IN) :: MTYPE + INTEGER(KIND=8), INTENT(IN) :: PHASE + INTEGER(KIND=8), INTENT(IN) :: N + INTEGER(KIND=8), INTENT(IN) :: IA(*) + INTEGER(KIND=8), INTENT(IN) :: JA(*) + INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) + INTEGER(KIND=8), INTENT(IN) :: NRHS + INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) + INTEGER(KIND=8), INTENT(IN) :: MSGLVL + INTEGER(KIND=8), INTENT(OUT) :: ERROR + REAL(KIND=4), INTENT(IN) :: A(*) + REAL(KIND=4), INTENT(INOUT) :: B(N,*) + REAL(KIND=4), INTENT(OUT) :: X(N,*) + END SUBROUTINE PARDISO_S_64_2D + + SUBROUTINE PARDISO_D_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + INTEGER(KIND=8), INTENT(IN) :: MAXFCT + INTEGER(KIND=8), INTENT(IN) :: MNUM + INTEGER(KIND=8), INTENT(IN) :: MTYPE + INTEGER(KIND=8), INTENT(IN) :: PHASE + INTEGER(KIND=8), INTENT(IN) :: N + INTEGER(KIND=8), INTENT(IN) :: IA(*) + INTEGER(KIND=8), INTENT(IN) :: JA(*) + INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) + INTEGER(KIND=8), INTENT(IN) :: NRHS + INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) + INTEGER(KIND=8), INTENT(IN) :: MSGLVL + INTEGER(KIND=8), INTENT(OUT) :: ERROR + REAL(KIND=8), INTENT(IN) :: A(*) + REAL(KIND=8), INTENT(INOUT) :: B(N,*) + REAL(KIND=8), INTENT(OUT) :: X(N,*) + END SUBROUTINE PARDISO_D_64_2D + + SUBROUTINE PARDISO_SC_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + INTEGER(KIND=8), INTENT(IN) :: MAXFCT + INTEGER(KIND=8), INTENT(IN) :: MNUM + INTEGER(KIND=8), INTENT(IN) :: MTYPE + INTEGER(KIND=8), INTENT(IN) :: PHASE + INTEGER(KIND=8), INTENT(IN) :: N + INTEGER(KIND=8), INTENT(IN) :: IA(*) + INTEGER(KIND=8), INTENT(IN) :: JA(*) + INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) + INTEGER(KIND=8), INTENT(IN) :: NRHS + INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) + INTEGER(KIND=8), INTENT(IN) :: MSGLVL + INTEGER(KIND=8), INTENT(OUT) :: ERROR + COMPLEX(KIND=4), INTENT(IN) :: A(*) + COMPLEX(KIND=4), INTENT(INOUT) :: B(N,*) + COMPLEX(KIND=4), INTENT(OUT) :: X(N,*) + END SUBROUTINE PARDISO_SC_64_2D + + SUBROUTINE PARDISO_DC_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + INTEGER(KIND=8), INTENT(IN) :: MAXFCT + INTEGER(KIND=8), INTENT(IN) :: MNUM + INTEGER(KIND=8), INTENT(IN) :: MTYPE + INTEGER(KIND=8), INTENT(IN) :: PHASE + INTEGER(KIND=8), INTENT(IN) :: N + INTEGER(KIND=8), INTENT(IN) :: IA(*) + INTEGER(KIND=8), INTENT(IN) :: JA(*) + INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) + INTEGER(KIND=8), INTENT(IN) :: NRHS + INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) + INTEGER(KIND=8), INTENT(IN) :: MSGLVL + INTEGER(KIND=8), INTENT(OUT) :: ERROR + COMPLEX(KIND=8), INTENT(IN) :: A(*) + COMPLEX(KIND=8), INTENT(INOUT) :: B(N,*) + COMPLEX(KIND=8), INTENT(OUT) :: X(N,*) + END SUBROUTINE PARDISO_DC_64_2D + + END INTERFACE + + INTERFACE + + SUBROUTINE PARDISOINIT(PT, MTYPE, IPARM) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(OUT) :: PT(*) + INTEGER, INTENT(IN) :: MTYPE + INTEGER, INTENT(OUT) :: IPARM(*) + END SUBROUTINE PARDISOINIT + + END INTERFACE + + INTERFACE PARDISO_GET + + FUNCTION PARDISO_GETENV(PT, OptName, StrVal) + USE MKL_PARDISO_PRIVATE + INTEGER PARDISO_GETENV + TYPE(MKL_PARDISO_HANDLE), INTENT(IN) :: PT(*) + INTEGER, INTENT(IN) :: OptName + CHARACTER(*), INTENT(OUT) :: StrVal + END FUNCTION PARDISO_GETENV + + END INTERFACE + + INTERFACE PARDISO_SET + + FUNCTION PARDISO_SETENV(PT, OptName, StrVal) + USE MKL_PARDISO_PRIVATE + INTEGER PARDISO_SETENV + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + INTEGER, INTENT(IN) :: OptName + INTEGER, INTENT(IN) :: StrVal(*) + END FUNCTION PARDISO_SETENV + + END INTERFACE + + INTERFACE PARDISO_PIV + + FUNCTION MKL_PARDISO_PIVOT( AII, BII, EPS) + REAL(KIND=8) :: AII, BII, EPS + INTEGER MKL_PARDISO_PIVOT + END + END INTERFACE PARDISO_PIV + + INTERFACE PARDISO_GETDIAG + + SUBROUTINE PARDISO_GETDIAG_D(PT, DIAG_FACT, DIAG_A, MNUM, EPS) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + REAL(KIND=8), INTENT(INOUT) :: DIAG_FACT, DIAG_A + INTEGER, INTENT(IN) :: MNUM + INTEGER, INTENT(INOUT) :: EPS + END + + SUBROUTINE PARDISO_GETDIAG_Z(PT, DIAG_FACT, DIAG_A, MNUM, EPS) + USE MKL_PARDISO_PRIVATE + TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) + COMPLEX(KIND=8), INTENT(INOUT) :: DIAG_FACT, DIAG_A + INTEGER, INTENT(IN) :: MNUM + INTEGER, INTENT(INOUT) :: EPS + END + + END INTERFACE PARDISO_GETDIAG + + + + + END MODULE MKL_PARDISO + +!DEC$ ENDIF diff --git a/src/heatflow/mod_constructions.f90 b/src/heatflow/mod_constructions.f90 index 384ce09..b0de444 100644 --- a/src/heatflow/mod_constructions.f90 +++ b/src/heatflow/mod_constructions.f90 @@ -26,9 +26,9 @@ module constructions integer(int12) :: imaterial_type !what type of material it is in the mat.in real(real12) :: volume !volume of the block real(real12), dimension(3) :: Length !length of the block in 3 dimensions - real(real12), dimension(3) :: vel !center of the block in 3 dimensions + real(real12), dimension(3) :: vel !vel of the block in 3 dimensions integer(int12) :: iheater !whether the block is a heater - real(real12) :: kappa, rho, heat_capacity, tau, em !physical properties of the material + real(real12) :: kappa, rho, heat_capacity, tau, em !physical properties of the material end type heatblock !!Defines the material to be used in the simulation type material diff --git a/src/heatflow/mod_evolve.f90 b/src/heatflow/mod_evolve.f90 index 1209b25..74f3951 100644 --- a/src/heatflow/mod_evolve.f90 +++ b/src/heatflow/mod_evolve.f90 @@ -26,10 +26,12 @@ module evolution use sptype, only: I4B use solver, only: linbcg use globe_data, only: Temp_p, Temp_pp, inverse_time, heat, lin_rhoc + use globe_data, only: acsr, ja, ia use heating, only: heater use boundary_vector, only: boundary use cattaneo, only: S_catS - use tempdep, only: ChangeProp +! use tempdep, only: ChangeProp + use sparse_solver, only: bicgstab implicit none private @@ -45,8 +47,9 @@ module evolution subroutine simulate(itime) integer(int12), intent(in) :: itime real(real12), dimension(NA) :: S, x, Q, Qdens, S_CAT, B - integer(int12) :: ncg, itol, itmax !, iss - integer(I4B) :: iter + real(real12), dimension(:), allocatable :: x0 + integer:: ncg, itol, itmax !, iss + integer :: iter real(real12) :: e, err, tol !---------------------- @@ -147,9 +150,11 @@ subroutine simulate(itime) err=E - CALL linbcg(S,x,itol=int(itol,I4B),tol=tol, itmax=int(itmax,I4B), iter=iter, & - err=E) + CALL bicgstab(acsr, ia, ja, S, itmax, x, x0, iter) + ! CALL linbcg(S,x,itol=int(itol,I4B),tol=tol, itmax=int(itmax,I4B), iter=iter, & + ! err=E) + x=x0 if (any(isnan(x(:)))) then write(0,*) "fatal error: NAN in x tempurature vector" write(0,*) 'time step ', itime, " T ", sum(Temp_p)/size(Temp_p), E ,iter @@ -169,9 +174,9 @@ subroutine simulate(itime) Temp_pp = Temp_p Temp_p = x - if (TempDepProp .eq. 1) then - CALL ChangeProp() - end if + ! if (TempDepProp .eq. 1) then + ! CALL ChangeProp() + ! end if !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ end subroutine simulate diff --git a/src/heatflow/mod_global.f90 b/src/heatflow/mod_global.f90 index 5207578..94aeb26 100644 --- a/src/heatflow/mod_global.f90 +++ b/src/heatflow/mod_global.f90 @@ -4,7 +4,7 @@ !!!################################################################################################# module globe_data use sptype, only: sprs2_dp, diag_sprs_dp - use constants, only: real12 + use constants, only: real12, int12 implicit none real(real12), dimension(:,:,:), allocatable :: Temp_cur ! Current temperature real(real12), dimension(:), allocatable :: Temp_p, Temp_pp ! Previous and previous previous temperature @@ -13,6 +13,9 @@ module globe_data real(real12), dimension(:), allocatable :: lin_rhoc ! 1D array for HeatCapacity*Rho TYPE(sprs2_dp) :: ra !Techniqually rH TYPE(diag_sprs_dp) :: da !Techniqually dH + real(real12), dimension(:), allocatable :: acsr + integer (kind=8), dimension(:), allocatable :: ja + integer (kind=8), dimension(:), allocatable :: ia end module globe_data diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90 index 11dc0bf..d0011ab 100644 --- a/src/heatflow/mod_setup.f90 +++ b/src/heatflow/mod_setup.f90 @@ -21,8 +21,10 @@ module setup use inputs, only: Periodicz ! use hmatrixmod, only: hmatrixfunc use globe_data, only: ra, Temp_cur, Temp_p, Temp_pp,inverse_time, heat, lin_rhoc, Q_P + use globe_data, only: acsr, ja, ia use solver, only: SRSin use materials, only: material + use sparse_solver, only: coo2csr implicit none contains @@ -77,6 +79,9 @@ subroutine set_global_variables() CALL build_Hmatrix() else CALL sparse_Hmatrix() + ! Allocate the arrays to hold the H matrix in CSR format + allocate(acsr(ra%len), ja(ra%len), ia(ra%n+1)) + CALL coo2csr(ra%n, ra%len, ra%val, ra%irow, ra%jcol, acsr, ja, ia) end if !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/heatflow/mod_sparse_solver.f90 b/src/heatflow/mod_sparse_solver.f90 new file mode 100644 index 0000000..95a925d --- /dev/null +++ b/src/heatflow/mod_sparse_solver.f90 @@ -0,0 +1,334 @@ +module sparse_solver + + use mkl_pardiso + implicit none + + private + + public :: coo2csr, & + bicgstab, & + solve_pardiso + +contains + + subroutine coo2csr( nrow, & + nnz, & + a, & + ir, & + jc, & + acsr, & + ja, & + ia ) + + !--------------------------------------------------------------------------! + !! coocsr converts coo to csr. + ! + ! discussion: + ! + ! this routine converts a matrix that is stored in coo coordinate format + ! a, ir, jc into a csr row general sparse acsr, ja, ia format. + ! + ! parameters: + ! + ! input, integer nrow, the row dimension of the matrix. + ! + ! input, integer nnz, the number of nonzero elements in the matrix. + ! + ! a, + ! ir, + ! jc = matrix in coordinate format. a(k), ir(k), jc(k) store the nnz + ! nonzero elements of the matrix with a(k) = actual real value of + ! the elements, ir(k) = its row number and jc(k) = its column + ! number. the order of the elements is arbitrary. + ! + ! on return: + ! + ! ir is destroyed + ! + ! output, real acsr(nnz), ja(nnz), ia(nrow+1), the matrix in csr + ! compressed sparse row format. + !--------------------------------------------------------------------------! + + ! Arguments. + integer (kind=8), intent(in) :: nrow + integer (kind=8), intent(in) :: nnz + real(8), dimension(nnz), intent(in) :: a + integer (kind=8), dimension(nnz), intent(in) :: ir + integer (kind=8), dimension(nnz), intent(in) :: jc + real(8), dimension(nnz), intent(out) :: acsr + integer (kind=8), dimension(nnz), intent(out) :: ja + integer (kind=8), dimension(nrow+1), intent(out) :: ia + + ! Local variables. + integer (kind=8) :: i, iad, j, k, k0 + real(kind=8) :: x + + ia(1:nrow+1) = 0 + + ! determine the row lengths. + + do k = 1, nnz + ia(ir(k)) = ia(ir(k)) + 1 + end do + + ! the starting position of each row. + + k = 1 + + do j = 1, nrow + 1 + + k0 = ia(j) + ia(j) = k + k = k + k0 + + end do + + ! go through the structure once more. fill in output matrix. + + do k = 1, nnz + + i = ir(k) + j = jc(k) + x = a(k) + iad = ia(i) + acsr(iad) = x + ja(iad) = j + ia(i) = iad + 1 + + end do + + ! shift back ia. + + do j = nrow, 1, -1 + ia(j+1) = ia(j) + end do + + ia(1) = 1 + + return + + end subroutine coo2csr + + !------------------------------------------------------------------- + ! BiConjugate Gradient (Stabilised) Method + !------------------------------------------------------------------- + + subroutine bicgstab( acsr, & + ia, & + ja, & + b, & + maxiter, & + initGuess, & + x, & + iter ) + + ! Arguments + real(8), dimension(:), intent(in) :: acsr + integer (kind=8), dimension(:), intent(in) :: ia + integer (kind=8), dimension(:), intent(in) :: ja + real(8), dimension(:), intent(in) :: b + integer, intent(in) :: maxiter + real(8), dimension(:), intent(in) :: initGuess + real(8), dimension(:), allocatable, intent(out) :: x + integer, intent(out) :: iter + + ! Local variables + integer :: i, j, k, n + real(8), parameter :: cc = 1.0e-9 + real(8) :: alpha,beta,delta0,delta,delta_old,omega + real(8), dimension(:), allocatable :: r, p, s, rst, temp1, temp2 + + n = size(b,1) + + allocate(x(n)) + allocate(r(n)) + allocate(p(n)) + allocate(s(n)) + allocate(rst(n)) + allocate(temp1(n)) + allocate(temp2(n)) + + call mkl_dcsrgemv("N",n,acsr,ia,ja,x,temp1) + + r = b - temp1 + + call random_number(rst) + + p = r + delta = dot_product(rst,r) + + write(*,'(a,1x,f15.3)') "Starting delta: ", delta + + delta0 = delta + + do i = 1, maxiter + + if ( norm2(r) /= norm2(r) ) then + write(*,'(a)') "Error in solver: residual NaN" + exit + end if + + if(mod(i,1000).eq.0) then + write(*,'(a,1x,i6)') 'Iteration number: ',i + write(*,'(a,1x,f15.3)') "Residual ratio: ", norm2(r)/cc + end if + + call mkl_dcsrgemv("N",n,acsr,ia,ja,p,temp1) ! temp1=A*p + + alpha = delta/dot_product(rst,temp1) + s = r - alpha*temp1 + + call mkl_dcsrgemv("N",n,acsr,ia,ja,s,temp2) ! temp2=A*s + + omega = dot_product(s,temp2)/dot_product(temp2,temp2) + x = x + alpha*p + omega*s + r = s - omega*temp2 + delta_old = delta + delta = dot_product(rst,r) + beta = (delta/delta_old)*(alpha/omega) + p = r + beta*(p - omega*temp1) + + if(norm2(r) .lt. cc) then + iter = i + return + end if + + if(i.eq.maxiter) then + write(*,'(a)') "Maximum iterations reached." + write(*,'(a)') "Convergence not achieved." + write(*,'(a,1x,f15.3)') "Norm of residual: ", norm2(r) + write(*,'(a,1x,f15.3)') "Convergence criterion: ", cc + if((norm2(r)/cc) .lt. 2.d0) then + write(*,'(a)') "The residual is within a small",& + "range of the convergence criterion." + write(*,'(a)') "Perhaps increasing iteration ", & + "count may help." + end if + end if + + end do + + end subroutine bicgstab + + !------------------------------------------------------------------- + ! END BiConjugate Gradient (Stabilised) Method + !------------------------------------------------------------------- + + !------------------------------------------------------------------- + ! PARDISO Direct Solver + !------------------------------------------------------------------- + + subroutine solve_pardiso( acsr, & + b, & + ia, & + ja, & + x ) + + use mkl_pardiso + + ! Arguments + real(8), dimension(:), intent(in) :: acsr + real(8), dimension(:), intent(inout) :: b + integer,dimension(:), intent(in) :: ia + integer,dimension(:), intent(in) :: ja + real(8), dimension(:), allocatable, intent(out) :: x + + ! Local variables + type(mkl_pardiso_handle), dimension(:), allocatable :: pt + integer :: i,maxfct,mnum,mtype,phase,n,nrhs,error,msglvl,nnz,error1 + integer, dimension(:), allocatable :: iparm + integer,dimension(1) :: idum + real(8),dimension(1) :: ddum + + n = size(b,1) + nnz = size(acsr,1) + nrhs = 1 + maxfct = 1 + mnum = 1 + + if (.not.(allocated(x))) allocate(x(n)) + + allocate(iparm(64)) !set up pardiso control parameter + + do i=1,64 + iparm(i) = 0 + end do + + iparm(1) = 1 ! no solver default + iparm(2) = 2 ! fill-in reordering from metis + iparm(4) = 0 ! no iterative-direct algorithm + iparm(5) = 0 ! no user fill-in reducing permutation + iparm(6) = 0 ! =0 solution on the first n compoments of x + iparm(8) = 2 ! numbers of iterative refinement steps + iparm(10) = 13 ! perturbe the pivot elements with 1e-13 + iparm(11) = 1 ! use nonsymmetric permutation and scaling mps + iparm(13) = 0 ! maximum weighted matching algorithm is + !switched-off (default for symmetric). + ! try iparm(13) = 1 in case of inaccuracy + iparm(14) = 0 ! output: number of perturbed pivots + iparm(18) = -1 ! output: number of nonzeros in the factor lu + iparm(19) = -1 ! output: mflops for lu factorization + iparm(20) = 0 ! output: numbers of cg iterations + + error = 0 ! initialize error flag + msglvl = 0 ! 0=no output, 1=print statistical information + mtype = 11 ! real and unsymmetric matrix + + ! Initiliaze the internal solver memory pointer. + ! This is only necessary for the first call of the solver. + + allocate (pt(64)) + do i=1,64 + pt(i)%dummy = 0 + end do + + phase = 11 ! Only reordering and symbolic factorization + + call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, & + idum, nrhs, iparm, msglvl, ddum, ddum, error) + + if (error /= 0) then + write(*,*) 'the following error was detected: ', error + goto 1000 + end if + + phase = 22 ! only factorization + call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, & + idum, nrhs, iparm, msglvl, ddum, ddum, error) + if (error /= 0) then + write(*,*) 'the following error was detected: ', error + goto 1000 + endif + + ! back substitution and iterative refinement + iparm(8) = 2 ! max numbers of iterative refinement steps + phase = 33 ! only solving + call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, & + idum, nrhs, iparm, msglvl, b, x, error) + write(*,*) 'solve completed ... ' + if (error /= 0) then + write(*,*) 'the following error was detected: ', error + goto 1000 + endif + +1000 continue + ! termination and release of memory + phase = -1 ! release internal memory + call pardiso (pt,maxfct,mnum,mtype,phase,n,ddum,idum,idum, & + idum, nrhs, iparm, msglvl, ddum, ddum, error1) + + if (error1 /= 0) then + write(*,*) 'the following release error was detected: ', & + error1 + stop 1 + endif + + if ( error /= 0 ) stop 1 + + end subroutine solve_pardiso + + !------------------------------------------------------------------- + ! END PARDISO Direct Solver + !------------------------------------------------------------------- + +end module sparse_solver diff --git a/src/heatflow/mod_tempdep.f90 b/src/heatflow/mod_tempdep.f90 index 475617c..b8b936a 100755 --- a/src/heatflow/mod_tempdep.f90 +++ b/src/heatflow/mod_tempdep.f90 @@ -27,7 +27,7 @@ module TempDep use inputs, only: Grid, TempDepProp, Nz, Ny, Nx - use setup, only: sparse_Hmatrix + ! use setup, only: sparse_Hmatrix use globe_data, only: Temp_p, lin_rhoc use constants, only: real12, int12 @@ -35,34 +35,34 @@ module TempDep contains - subroutine ChangeProp() - character(len=100) :: filename - integer(int12) :: ix,iy,iz, index - logical :: res - index = 1 - !Loop over all the grid points - do iz = 1, Nz - do iy = 1, Ny - do ix = 1, Nx - ! Construct the filename for the Material table asscoiated with the grid point - filename = trim('./inputs/MatTable' // & - trim(adjustl(char(Grid(ix, iy, iz)%imaterial_type)))) - inquire(file=filename, exist=res) - if (res) then - ! Read the temperature dependent properties from the file - ! CALL ReadTempDepTable(filename, ix, iy, iz, index) - else - ! File does not exist, continue to the next grid point - continue - end if - ! Read the temperature dependent properties from the file - index = index + 1 - end do - end do - end do - ! Construct the sparse matrix - CALL sparse_Hmatrix() - end subroutine ChangeProp + ! subroutine ChangeProp() + ! character(len=100) :: filename + ! integer(int12) :: ix,iy,iz, index + ! logical :: res + ! index = 1 + ! !Loop over all the grid points + ! do iz = 1, Nz + ! do iy = 1, Ny + ! do ix = 1, Nx + ! ! Construct the filename for the Material table asscoiated with the grid point + ! filename = trim('./inputs/MatTable' // & + ! trim(adjustl(char(Grid(ix, iy, iz)%imaterial_type)))) + ! inquire(file=filename, exist=res) + ! if (res) then + ! ! Read the temperature dependent properties from the file + ! ! CALL ReadTempDepTable(filename, ix, iy, iz, index) + ! else + ! ! File does not exist, continue to the next grid point + ! continue + ! end if + ! ! Read the temperature dependent properties from the file + ! index = index + 1 + ! end do + ! end do + ! end do + ! ! Construct the sparse matrix + ! CALL sparse_Hmatrix() + ! end subroutine ChangeProp ! subroutine ReadTempDepTable(filename, ix, iy, iz, index) ! character(len=*), intent(in) :: filename From 2fe28bc7c551e8ab498f42d77923dbd0a45f1661 Mon Sep 17 00:00:00 2001 From: Harry Mclean <130392970+HarryMclean@users.noreply.github.com> Date: Tue, 2 Sep 2025 22:45:01 +0100 Subject: [PATCH 05/28] Fix typo --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0c78934..1dcc697 100644 --- a/README.md +++ b/README.md @@ -77,15 +77,15 @@ make install This will build and install the executable in the following directory: ``` -${HOME}/.local/HeatFLow/bin/HeatFlow +${HOME}/.local/HeatFlow/bin/HeatFlow ``` This executable can now be called to run the HeatFlow software package and simulate heat transport. -If the `${HOME}/.local/HeatFLow/bin` is added to your `PATH` environment variable, then the program can be called as a terminal command. +If the `${HOME}/.local/HeatFlow/bin` is added to your `PATH` environment variable, then the program can be called as a terminal command. This can be done with the following command (works on a per-terminal basis, if you want to update it for all, include this in your source shell file): ``` -export PATH="${PATH}:${HOME}/.local/HeatFLow/bin" +export PATH="${PATH}:${HOME}/.local/HeatFlow/bin" ``` To execute the program, use the following command: From 0f1ddc0e1fc0c690338a02ac01e460a404370680 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 2 Sep 2025 23:11:02 +0100 Subject: [PATCH 06/28] Remove not used mat.in variables --- src/heatflow/mod_constructions.f90 | 6 +++--- src/heatflow/mod_inputs.f90 | 15 ++++++--------- src/heatflow/mod_material.f90 | 6 +++--- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/src/heatflow/mod_constructions.f90 b/src/heatflow/mod_constructions.f90 index b0de444..6e8afc0 100644 --- a/src/heatflow/mod_constructions.f90 +++ b/src/heatflow/mod_constructions.f90 @@ -34,11 +34,11 @@ module constructions type material integer(int12) :: index !identifier for the material real(real12) :: heat_capacity !heat capacity of the material - real(real12) :: h_conv !convective heat transfer coefficient + ! real(real12) :: h_conv !convective heat transfer coefficient real(real12) :: kappa !thermal conductivity - real(real12) :: kappa3D !three-dimensional thermal conductivity + ! real(real12) :: kappa3D !three-dimensional thermal conductivity real(real12) :: rho !density of the material - real(real12) :: sound_speed !speed of sound in the material + ! real(real12) :: sound_speed !speed of sound in the material real(real12) :: tau !relaxation time real(real12), dimension(3) :: vel !velocity of the material real(real12) :: em !emisstivity diff --git a/src/heatflow/mod_inputs.f90 b/src/heatflow/mod_inputs.f90 index e8b3025..553e925 100644 --- a/src/heatflow/mod_inputs.f90 +++ b/src/heatflow/mod_inputs.f90 @@ -657,7 +657,7 @@ subroutine read_mat(unit) type(material), dimension(100) :: dum_mat character(1024) :: buffer integer :: reason, j - integer, dimension(9) :: readvarmat + integer, dimension(6) :: readvarmat integer :: i, index i=0 @@ -697,14 +697,11 @@ subroutine read_mat(unit) CALL assignD(buffer,"heat_capacity",dum_mat(i)%heat_capacity,readvarmat(1))! assign heatCapacity - CALL assignD(buffer,"h_conv" ,dum_mat(i)%h_conv ,readvarmat(2))! assign h_conv - CALL assignD(buffer,"kappa" ,dum_mat(i)%kappa ,readvarmat(3))! assign kappa - CALL assignD(buffer,"kappa3D" ,dum_mat(i)%kappa3D ,readvarmat(4))! assign kappa3D - CALL assignD(buffer,"rho" ,dum_mat(i)%rho ,readvarmat(5))! assign rho - CALL assignD(buffer,"sound_speed" ,dum_mat(i)%sound_speed ,readvarmat(6))! assign sound_speed - CALL assignD(buffer,"tau" ,dum_mat(i)%tau ,readvarmat(7))! assign tau - CALL assignD(buffer,"em" ,dum_mat(i)%em ,readvarmat(8))! assign e - CALL assignV(buffer,"vel" ,dum_mat(i)%vel ,readvarmat(9)) ! assign velocity + CALL assignD(buffer,"kappa" ,dum_mat(i)%kappa ,readvarmat(2))! assign kappa + CALL assignD(buffer,"rho" ,dum_mat(i)%rho ,readvarmat(3))! assign rho + CALL assignD(buffer,"tau" ,dum_mat(i)%tau ,readvarmat(4))! assign tau + CALL assignD(buffer,"em" ,dum_mat(i)%em ,readvarmat(5))! assign e + CALL assignV(buffer,"vel" ,dum_mat(i)%vel ,readvarmat(6)) ! assign velocity end do read ! Check for duplicate indices diff --git a/src/heatflow/mod_material.f90 b/src/heatflow/mod_material.f90 index 9dfcc8c..65b9fdf 100644 --- a/src/heatflow/mod_material.f90 +++ b/src/heatflow/mod_material.f90 @@ -91,11 +91,11 @@ subroutine material(imaterial_type,kappa,kappa3D,h_conv,heat_capacity,rho,sound_ if (tmp .eq. imaterial_type) then found = .true. heat_capacity = input_materials(i)%heat_capacity - h_conv = input_materials(i)%h_conv + ! h_conv = input_materials(i)%h_conv kappa = input_materials(i)%kappa - kappa3D = input_materials(i)%kappa3D + ! kappa3D = input_materials(i)%kappa3D rho = input_materials(i)%rho - sound_speed = input_materials(i)%sound_speed + ! sound_speed = input_materials(i)%sound_speed tau = input_materials(i)%tau vel(1) = input_materials(i)%vel(1) vel(2) = input_materials(i)%vel(2) From 31cf45dec51bbcba482d2656735bdec01a9627c9 Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 3 Sep 2025 09:19:43 +0100 Subject: [PATCH 07/28] Make with the parallel libraries --- Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index af38c3a..a4750e5 100644 --- a/Makefile +++ b/Makefile @@ -39,7 +39,8 @@ OBJS := $(addprefix $(BUILD_DIR)/,$(notdir $(SRCS:.f90=.o))) MKLROOT ?= /opt/intel/oneapi/mkl/latest MKL_LIB_DIR = $(MKLROOT)/lib/intel64 MKL_INCLUDE_DIR = $(MKLROOT)/include -MKL_FLAGS = -L$(MKL_LIB_DIR) -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl +#MKL_FLAGS = -L$(MKL_LIB_DIR) -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl +MKL_FLAGS = -L$(MKL_LIB_DIR) -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl FFLAGS = -O3 -I$(MKL_INCLUDE_DIR) MODULEFLAGS = -J$(BUILD_DIR) @@ -79,4 +80,4 @@ OMP: $(programs) ./util/DShell/omp_exec.sh clean: - rm -f $(BUILD_DIR)/*.o $(BUILD_DIR)/*.mod $(programs) \ No newline at end of file + rm -f $(BUILD_DIR)/*.o $(BUILD_DIR)/*.mod $(programs) From 440e16840eb13c4a9bed2754809afe6b5131cad8 Mon Sep 17 00:00:00 2001 From: hm556 Date: Thu, 4 Sep 2025 11:56:54 +0100 Subject: [PATCH 08/28] Switch to pardiso solver --- src/heatflow/mod_evolve.f90 | 20 +++++++++++--------- src/heatflow/mod_global.f90 | 4 ++-- src/heatflow/mod_setup.f90 | 3 ++- src/heatflow/mod_sparse_solver.f90 | 17 ++++++++++------- 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/heatflow/mod_evolve.f90 b/src/heatflow/mod_evolve.f90 index 74f3951..ada2569 100644 --- a/src/heatflow/mod_evolve.f90 +++ b/src/heatflow/mod_evolve.f90 @@ -31,7 +31,7 @@ module evolution use boundary_vector, only: boundary use cattaneo, only: S_catS ! use tempdep, only: ChangeProp - use sparse_solver, only: bicgstab + use sparse_solver, only: bicgstab, solve_pardiso implicit none private @@ -46,8 +46,8 @@ module evolution !!!################################################################################################# subroutine simulate(itime) integer(int12), intent(in) :: itime - real(real12), dimension(NA) :: S, x, Q, Qdens, S_CAT, B - real(real12), dimension(:), allocatable :: x0 + real(real12), dimension(NA) :: S, Q, Qdens, S_CAT, B + real(real12), dimension(:), allocatable :: x integer:: ncg, itol, itmax !, iss integer :: iter real(real12) :: e, err, tol @@ -140,21 +140,23 @@ subroutine simulate(itime) ! iter: Output - gives the number of the final iteration. ! err: Output - records the error of the final iteration. ! iss: Input - sets the Sparse Storage type (1=SRS, 2=SDS). - x=Temp_p+(Temp_p-Temp_pp) - if (any(x-Temp_p .lt. TINY)) x=x+TINY !avoid nan solver issue + ! x=Temp_p+(Temp_p-Temp_pp) + ! if (any(x-Temp_p .lt. TINY)) x=x+TINY !avoid nan solver issue itol=1 tol=1.e-32_real12 itmax=50000 ncg = 0 - iter=ncg + iter= 0 err=E - CALL bicgstab(acsr, ia, ja, S, itmax, x, x0, iter) + ! CALL bicgstab(acsr, ia, ja, S, itmax, x, x0, iter) + + CALL solve_pardiso(acsr, S, ia, ja, x) ! CALL linbcg(S,x,itol=int(itol,I4B),tol=tol, itmax=int(itmax,I4B), iter=iter, & - ! err=E) + ! err=E) - x=x0 + ! if (any(isnan(x(:)))) then write(0,*) "fatal error: NAN in x tempurature vector" write(0,*) 'time step ', itime, " T ", sum(Temp_p)/size(Temp_p), E ,iter diff --git a/src/heatflow/mod_global.f90 b/src/heatflow/mod_global.f90 index 94aeb26..f31ce2e 100644 --- a/src/heatflow/mod_global.f90 +++ b/src/heatflow/mod_global.f90 @@ -14,8 +14,8 @@ module globe_data TYPE(sprs2_dp) :: ra !Techniqually rH TYPE(diag_sprs_dp) :: da !Techniqually dH real(real12), dimension(:), allocatable :: acsr - integer (kind=8), dimension(:), allocatable :: ja - integer (kind=8), dimension(:), allocatable :: ia + integer, dimension(:), allocatable :: ja + integer, dimension(:), allocatable :: ia end module globe_data diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90 index d0011ab..40c53bd 100644 --- a/src/heatflow/mod_setup.f90 +++ b/src/heatflow/mod_setup.f90 @@ -81,7 +81,8 @@ subroutine set_global_variables() CALL sparse_Hmatrix() ! Allocate the arrays to hold the H matrix in CSR format allocate(acsr(ra%len), ja(ra%len), ia(ra%n+1)) - CALL coo2csr(ra%n, ra%len, ra%val, ra%irow, ra%jcol, acsr, ja, ia) + CALL coo2csr(ra%n, ra%len, ra%val, ra%irow, ra%jcol, acsr, ja, ia) + ! print*, ra%val end if !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/heatflow/mod_sparse_solver.f90 b/src/heatflow/mod_sparse_solver.f90 index 95a925d..7242657 100644 --- a/src/heatflow/mod_sparse_solver.f90 +++ b/src/heatflow/mod_sparse_solver.f90 @@ -56,8 +56,8 @@ subroutine coo2csr( nrow, & integer (kind=8), dimension(nnz), intent(in) :: ir integer (kind=8), dimension(nnz), intent(in) :: jc real(8), dimension(nnz), intent(out) :: acsr - integer (kind=8), dimension(nnz), intent(out) :: ja - integer (kind=8), dimension(nrow+1), intent(out) :: ia + integer, dimension(nnz), intent(out) :: ja + integer, dimension(nrow+1), intent(out) :: ia ! Local variables. integer (kind=8) :: i, iad, j, k, k0 @@ -151,16 +151,17 @@ subroutine bicgstab( acsr, & call mkl_dcsrgemv("N",n,acsr,ia,ja,x,temp1) r = b - temp1 - + call random_number(rst) p = r + delta = dot_product(rst,r) - + write(*,'(a,1x,f15.3)') "Starting delta: ", delta delta0 = delta - + do i = 1, maxiter if ( norm2(r) /= norm2(r) ) then @@ -187,7 +188,7 @@ subroutine bicgstab( acsr, & delta = dot_product(rst,r) beta = (delta/delta_old)*(alpha/omega) p = r + beta*(p - omega*temp1) - + if(norm2(r) .lt. cc) then iter = i return @@ -247,7 +248,9 @@ subroutine solve_pardiso( acsr, & mnum = 1 if (.not.(allocated(x))) allocate(x(n)) - + + + allocate(iparm(64)) !set up pardiso control parameter do i=1,64 From 96c2ee3732b0711f5f3ff6b25ae9bac86fd3e4ae Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 4 Sep 2025 12:36:18 +0100 Subject: [PATCH 09/28] Fix bug with output file --- src/heatflow/mod_global.f90 | 1 + src/heatflow/mod_output.f90 | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/heatflow/mod_global.f90 b/src/heatflow/mod_global.f90 index f31ce2e..0fb61d6 100644 --- a/src/heatflow/mod_global.f90 +++ b/src/heatflow/mod_global.f90 @@ -16,6 +16,7 @@ module globe_data real(real12), dimension(:), allocatable :: acsr integer, dimension(:), allocatable :: ja integer, dimension(:), allocatable :: ia + character(len=1024) :: logname ! Log file name end module globe_data diff --git a/src/heatflow/mod_output.f90 b/src/heatflow/mod_output.f90 index 5a024b1..8482ef7 100644 --- a/src/heatflow/mod_output.f90 +++ b/src/heatflow/mod_output.f90 @@ -46,7 +46,7 @@ module output use inputs, only: nx,ny,nz, time_step, grid, NA, Check_Steady_State, ntime, WriteToTxt use inputs, only: Test_Run, freq, RunName, FullRestart, IVERB, write_every use inputs, only: start_ix, end_ix, start_iy, end_iy, start_iz, end_iz - use globe_data, only: Temp_p,Temp_pp, heat, heated_volume + use globe_data, only: Temp_p,Temp_pp, heat, heated_volume, logname implicit none contains @@ -54,9 +54,11 @@ subroutine data_write(itime) implicit none integer(int12), intent(in) :: itime real(real12), dimension(nx,ny,nz) :: CT, Temp_cur - integer(int12) :: ix, iy, iz, indexA, logunit - character(len=1024) :: file_prefix, file_extension, outdir, logname + integer(int12) :: ix, iy, iz, indexA + character(len=1024) :: file_prefix, file_extension, outdir + integer :: logunit + logunit = 20 file_prefix = 'Temperture_' outdir='./outputs/' file_extension = '.out' @@ -90,9 +92,12 @@ subroutine data_write(itime) ! find most recent log file and open it !--------------------------------------- CALL last_log(logname,outdir) - open(newunit=logunit,file=logname) + open(logunit,file=logname) + ! print*, logunit + ! print*, logname write(logunit,*) real((itime-1)*(time_step)), & (Temp_cur(start_ix:end_ix, start_iy:end_iy, start_iz:end_iz)) + close(logunit) !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ end if end if @@ -108,8 +113,12 @@ subroutine data_write(itime) if (WriteToTxt) then if (mod(itime, write_every) .eq. 0) then write(*, *) 'Writing Temperature difference to file' + ! print*, logunit + ! print*, logname + open(logunit,file=logname, status='old', position='append') write(logunit,*) real((itime-1)*(time_step)), & (Temp_cur(start_ix:end_ix, start_iy:end_iy, start_iz:end_iz)) + close(logunit) end if endif end if From 95f9009b0162e7b88df9e97aa781217a8afa5b70 Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 11 Sep 2025 13:58:09 +0100 Subject: [PATCH 10/28] Fix typo --- src/heatflow/mod_heating.f90 | 33 +++++++++++++++++++++++---------- src/heatflow/mod_output.f90 | 2 +- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/heatflow/mod_heating.f90 b/src/heatflow/mod_heating.f90 index 831396f..bb4a1d3 100644 --- a/src/heatflow/mod_heating.f90 +++ b/src/heatflow/mod_heating.f90 @@ -30,7 +30,7 @@ subroutine heater(itime, Q, Qdens) integer(int12) :: ix, iy, iz, IA ,heated_num real(real12) :: time, POWER, time_pulse, x, x2 real(real12) :: rho, volume, heat_capacity, area, tau, sum_temp - + logical :: shared_power ! Initialize variables IA = 0 Q = 0._real12 @@ -53,7 +53,7 @@ subroutine heater(itime, Q, Qdens) heat_capacity = grid(ix,iy,iz)%heat_capacity area = grid(ix,iy,iz)%Length(1)*grid(ix,iy,iz)%Length(2) !??? !tau divided by time_step squared in setup.f90 - tau = grid(ix,iy,iz)%tau*(time_step**2.0_real12) + tau = grid(ix,iy,iz)%tau*(time_step) ! select heater case select case(grid(ix,iy,iz)%iheater) @@ -114,13 +114,22 @@ subroutine heater(itime, Q, Qdens) Q(IA) = 0.0 end if !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + case(10) + Q(IA) = POWER - (tau*(POWER)) + + case(11) + Q(IA) = POWER + + case(12) + Q(IA) = POWER + (tau*(POWER)) end select !------------------------------ ! If emissitivity is not zero, then calculate the radiative heating !------------------------------ - Q(IA) = Q(IA) - grid(ix,iy,iz)%em * grid(ix,iy,iz)%length(1)*& - grid(ix,iy,iz)%length(2)*StefBoltz & - * ((Temp_p(IA)**4.0_real12) - (T_Bath**4.0_real12)) + ! Q(IA) = Q(IA) - grid(ix,iy,iz)%em * grid(ix,iy,iz)%length(1)*& + ! grid(ix,iy,iz)%length(2)*StefBoltz & + ! * ((Temp_p(IA)**4.0_real12) - (T_Bath**4.0_real12)) !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -161,11 +170,15 @@ subroutine heater(itime, Q, Qdens) ! Normalize all heat sources by the heated volume - if (heated_volume .gt. 0.0) then - Qdens(:) = Q(:) / heated_volume - heated_temp = sum_temp / heated_volume - end if - + shared_power = .False. + if (shared_power) then + if (heated_volume .gt. 0.0) then + Qdens(:) = Q(:) / heated_volume + heated_temp = sum_temp / heated_volume + end if + else + Qdens(:) = Q(:) / grid(1,1,1)%volume + end if end subroutine heater diff --git a/src/heatflow/mod_output.f90 b/src/heatflow/mod_output.f90 index 8482ef7..0235e4e 100644 --- a/src/heatflow/mod_output.f90 +++ b/src/heatflow/mod_output.f90 @@ -70,7 +70,7 @@ subroutine data_write(itime) do iz = 1, nz do iy = 1, ny do ix = 1, nx - Temp_cur(ix,iy,iz) = Temp_pp(indexA) + Temp_cur(ix,iy,iz) = Temp_p(indexA) indexA = indexA+1 end do end do From 072c957ed83b1d6910499d0077010d05c740a91e Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 11 Sep 2025 15:17:11 +0100 Subject: [PATCH 11/28] Add cattaneo heating --- src/heatflow/mod_heating.f90 | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/heatflow/mod_heating.f90 b/src/heatflow/mod_heating.f90 index bb4a1d3..5d95655 100644 --- a/src/heatflow/mod_heating.f90 +++ b/src/heatflow/mod_heating.f90 @@ -14,7 +14,7 @@ module Heating use constants, only: real12, int12, pi, StefBoltz use globe_data, only: Temp_p, Temp_pp, Heat, heated_volume, Q_P, heated_temp use inputs, only: nx,ny,nz, grid, NA, power_in, time_step, heated_steps, T_System, freq, ntime, & - T_Bath + T_Bath, icattaneo use materials, only: material implicit none contains @@ -81,6 +81,15 @@ subroutine heater(itime, Q, Qdens) else Q(IA) = 0.0_real12 end if + + if (icattaneo .eq. 1) then + if (itime .eq. 1) then + Q(IA) = Q(IA) + (tau*(POWER)) + end if + if (itime .eq. heated_steps+1) then + Q(IA) = Q(IA) - (tau*(POWER)) + end if + end if !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ case(3) !------------------------------ From 5a375f5a4636f6cdc7bacb999d08bae445f997f7 Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 11 Sep 2025 19:57:00 +0100 Subject: [PATCH 12/28] Adjust heating --- src/heatflow/mod_heating.f90 | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/heatflow/mod_heating.f90 b/src/heatflow/mod_heating.f90 index 5d95655..f33eb5f 100644 --- a/src/heatflow/mod_heating.f90 +++ b/src/heatflow/mod_heating.f90 @@ -78,10 +78,6 @@ subroutine heater(itime, Q, Qdens) !------------------------------ if ( time .le. time_pulse ) then Q(IA) = POWER - else - Q(IA) = 0.0_real12 - end if - if (icattaneo .eq. 1) then if (itime .eq. 1) then Q(IA) = Q(IA) + (tau*(POWER)) @@ -90,6 +86,12 @@ subroutine heater(itime, Q, Qdens) Q(IA) = Q(IA) - (tau*(POWER)) end if end if + + else + Q(IA) = 0.0_real12 + end if + + !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ case(3) !------------------------------ From 3b4da8aa5a0ef966a7ad8cea39f66699447bc72c Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 1 Oct 2025 18:22:07 +0100 Subject: [PATCH 13/28] Working Bicgstab --- Makefile | 8 +++- src/heatflow/mod_boundary.f90 | 12 +++--- src/heatflow/mod_evolve.f90 | 4 +- src/heatflow/mod_global.f90 | 4 +- src/heatflow/mod_hmatrix.f90 | 12 +++--- src/heatflow/mod_sparse_solver.f90 | 62 ++++++++++++++++++++++++++++-- 6 files changed, 81 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index a4750e5..7c65e97 100644 --- a/Makefile +++ b/Makefile @@ -42,9 +42,11 @@ MKL_INCLUDE_DIR = $(MKLROOT)/include #MKL_FLAGS = -L$(MKL_LIB_DIR) -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl MKL_FLAGS = -L$(MKL_LIB_DIR) -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl -FFLAGS = -O3 -I$(MKL_INCLUDE_DIR) +#FFLAGS = -O3 -I$(MKL_INCLUDE_DIR) MODULEFLAGS = -J$(BUILD_DIR) FC = gfortran +NCORES := $(shell nproc) +FFLAGS = -O3 -fopenmp -I$(MKL_INCLUDE_DIR) ########################################## # TARGETS @@ -72,6 +74,10 @@ $(BUILD_DIR)/%.o: $(SRC_DIR)/%.f90 | $(BUILD_DIR) $(programs): $(OBJS) | $(BIN_DIR) $(FC) -O3 -fopenmp $(OBJS) -o $@ $(MKL_FLAGS) +.PHONY: run +run: $(programs) + OMP_NUM_THREADS=$(NCORES) MKL_NUM_THREADS=$(NCORES) MKL_DYNAMIC=FALSE OMP_PROC_BIND=spread OMP_PLACES=cores ./bin/$(NAME) + debug: FFLAGS = -O0 -Wall -g -ffpe-trap=invalid,zero,overflow,underflow -fbacktrace -fcheck=all -fbounds-check -I$(MKL_INCLUDE_DIR) debug: $(OBJS) | $(BIN_DIR) $(FC) $(FFLAGS) $(OBJS) -o $(programs) $(MKL_FLAGS) diff --git a/src/heatflow/mod_boundary.f90 b/src/heatflow/mod_boundary.f90 index cb8ab30..88ad7ba 100644 --- a/src/heatflow/mod_boundary.f90 +++ b/src/heatflow/mod_boundary.f90 @@ -84,7 +84,7 @@ subroutine boundary(B) kappaHarm = (2*kappa*kappaBoundx1/(kappa+kappaBoundx1)) / & (grid(ix, iy, iz)%Length(1)**2) if (kappa .ne. kappaBoundx1) kappaHarm = kappaHarm*BR - B(I) = B(I) + (kappaHarm) * T_Bathx1 + boundray_term_vel(1_int12,iy,iz,T_Bathx1) + B(I) = B(I) + (kappaHarm) * T_Bathx1 !+ boundray_term_vel(1_int12,iy,iz,T_Bathx1) end if end if if (ix .eq. nx) then @@ -94,7 +94,7 @@ subroutine boundary(B) kappaHarm = (2*kappa*kappaBoundNx/(kappa+kappaBoundNx)) / & (grid(ix, iy, iz)%Length(1)**2) if (kappa .ne. kappaBoundNx) kappaHarm = kappaHarm*BR - B(I) = B(I) + (kappaHarm) * T_Bathx2 + boundray_term_vel(nx,iy,iz,T_Bathx2) + B(I) = B(I) + (kappaHarm) * T_Bathx2 !+ boundray_term_vel(nx,iy,iz,T_Bathx2) end if end if end if @@ -107,7 +107,7 @@ subroutine boundary(B) kappaHarm = (2*kappa*kappaBoundy1/(kappa+kappaBoundy1)) / & (grid(ix, iy, iz)%Length(2)**2) if (kappa .ne. kappaBoundy1) kappaHarm = kappaHarm*BR - B(I) = B(I) + (kappaHarm) * T_Bathy1 + boundray_term_vel(ix,1_int12,iz,T_Bathy1) + B(I) = B(I) + (kappaHarm) * T_Bathy1 !+ boundray_term_vel(ix,1_int12,iz,T_Bathy1) end if end if if (iy .eq. ny) then @@ -117,7 +117,7 @@ subroutine boundary(B) kappaHarm = (2*kappa*kappaBoundNy/(kappa+kappaBoundNy)) / & (grid(ix, iy, iz)%Length(2)**2) if (kappa .ne. kappaBoundNy) kappaHarm = kappaHarm*BR - B(I) = B(I) + (kappaHarm) * T_Bathy2 + boundray_term_vel(ix,ny,iz,T_Bathy2) + B(I) = B(I) + (kappaHarm) * T_Bathy2 !+ boundray_term_vel(ix,ny,iz,T_Bathy2) end if end if end if @@ -130,7 +130,7 @@ subroutine boundary(B) kappaHarm = (2*kappa*kappaBoundz1/(kappa+kappaBoundz1)) / & (grid(ix, iy, iz)%Length(3)**2) if (kappa .ne. kappaBoundz1) kappaHarm = kappaHarm*BR - B(I) = B(I) + (kappaHarm) * T_Bathz1 + boundray_term_vel(ix,iy,1_int12,T_Bathz1) + B(I) = B(I) + (kappaHarm) * T_Bathz1 !+ boundray_term_vel(ix,iy,1_int12,T_Bathz1) end if end if if (iz .eq. nz) then @@ -140,7 +140,7 @@ subroutine boundary(B) kappaHarm = (2*kappa*kappaBoundNz/(kappa+kappaBoundNz)) / & (grid(ix, iy, iz)%Length(3)**2) if (kappa .ne. kappaBoundNz) kappaHarm = kappaHarm*BR - B(I) = B(I) + (kappaHarm) * T_Bathz2 + boundray_term_vel(ix,iy,nz,T_Bathz2) + B(I) = B(I) + (kappaHarm) * T_Bathz2 !+ boundray_term_vel(ix,iy,nz,T_Bathz2) end if end if end if diff --git a/src/heatflow/mod_evolve.f90 b/src/heatflow/mod_evolve.f90 index ada2569..3156bd2 100644 --- a/src/heatflow/mod_evolve.f90 +++ b/src/heatflow/mod_evolve.f90 @@ -150,9 +150,9 @@ subroutine simulate(itime) err=E - ! CALL bicgstab(acsr, ia, ja, S, itmax, x, x0, iter) + call bicgstab(acsr, ia, ja, S, itmax, Temp_p, x, iter) - CALL solve_pardiso(acsr, S, ia, ja, x) + ! CALL solve_pardiso(acsr, S, ia, ja, x) ! CALL linbcg(S,x,itol=int(itol,I4B),tol=tol, itmax=int(itmax,I4B), iter=iter, & ! err=E) diff --git a/src/heatflow/mod_global.f90 b/src/heatflow/mod_global.f90 index 0fb61d6..439952c 100644 --- a/src/heatflow/mod_global.f90 +++ b/src/heatflow/mod_global.f90 @@ -14,8 +14,8 @@ module globe_data TYPE(sprs2_dp) :: ra !Techniqually rH TYPE(diag_sprs_dp) :: da !Techniqually dH real(real12), dimension(:), allocatable :: acsr - integer, dimension(:), allocatable :: ja - integer, dimension(:), allocatable :: ia + integer(kind=8), dimension(:), allocatable :: ja + integer(kind=8), dimension(:), allocatable :: ia character(len=1024) :: logname ! Log file name end module globe_data diff --git a/src/heatflow/mod_hmatrix.f90 b/src/heatflow/mod_hmatrix.f90 index e1d268d..fb7b747 100644 --- a/src/heatflow/mod_hmatrix.f90 +++ b/src/heatflow/mod_hmatrix.f90 @@ -149,7 +149,7 @@ function hmatrixfunc(i, j) result(H) H=0.0_real12 else H = A ! X left neighbor (left cell interaction) - H = H + calculate_convective_conductivity(xm, y, z, x, y, z) + !H = H + calculate_convective_conductivity(xm, y, z, x, y, z) end if end if @@ -158,7 +158,7 @@ function hmatrixfunc(i, j) result(H) H=0.0_real12 else H = B ! X right neighbor (right cell interaction) - H = H + calculate_convective_conductivity(xp, y, z, x, y, z) + !H = H + calculate_convective_conductivity(xp, y, z, x, y, z) end if end if @@ -167,7 +167,7 @@ function hmatrixfunc(i, j) result(H) H=0.0_real12 else H = D ! Y down neighbor (down cell interaction) - H = H + calculate_convective_conductivity(x, ym, z, x, y, z) + !H = H + calculate_convective_conductivity(x, ym, z, x, y, z) end if end if if ((i-j) .eq. -nx) then @@ -175,7 +175,7 @@ function hmatrixfunc(i, j) result(H) H=0.0_real12 else H = E ! Y up neighbor (up cell interaction) - H = H + calculate_convective_conductivity(x, yp, z, x, y, z) + !H = H + calculate_convective_conductivity(x, yp, z, x, y, z) end if end if @@ -185,7 +185,7 @@ function hmatrixfunc(i, j) result(H) else !write(*,*) 'F this is forward (in) z',F H = F ! Z in neighbor (forward cell interaction) !!!Frank had this as G during testing - H = H + calculate_convective_conductivity(x, y, zm, x, y, z) + !H = H + calculate_convective_conductivity(x, y, zm, x, y, z) end if end if @@ -195,7 +195,7 @@ function hmatrixfunc(i, j) result(H) else !write(*,*) 'G this is backward (out) z?',G H = G ! Z out neighbor (backward cell interaction) !!!Frank Had this as F during testing - H = H + calculate_convective_conductivity(x, y, zp, x, y, z) + !H = H + calculate_convective_conductivity(x, y, zp, x, y, z) end if end if diff --git a/src/heatflow/mod_sparse_solver.f90 b/src/heatflow/mod_sparse_solver.f90 index 7242657..f085b87 100644 --- a/src/heatflow/mod_sparse_solver.f90 +++ b/src/heatflow/mod_sparse_solver.f90 @@ -56,8 +56,8 @@ subroutine coo2csr( nrow, & integer (kind=8), dimension(nnz), intent(in) :: ir integer (kind=8), dimension(nnz), intent(in) :: jc real(8), dimension(nnz), intent(out) :: acsr - integer, dimension(nnz), intent(out) :: ja - integer, dimension(nrow+1), intent(out) :: ia + integer(kind=8), dimension(nnz), intent(out) :: ja + integer(kind=8), dimension(nrow+1), intent(out) :: ia ! Local variables. integer (kind=8) :: i, iad, j, k, k0 @@ -240,7 +240,8 @@ subroutine solve_pardiso( acsr, & integer, dimension(:), allocatable :: iparm integer,dimension(1) :: idum real(8),dimension(1) :: ddum - + integer :: badcol, missing_diag, k + logical :: found n = size(b,1) nnz = size(acsr,1) nrhs = 1 @@ -284,7 +285,60 @@ subroutine solve_pardiso( acsr, & do i=1,64 pt(i)%dummy = 0 end do - + + !---------------- CSR integrity / diagnostic checks ---------------- + + badcol = 0 + + write(*,*) 'PARDISO debug:' + write(*,*) ' n =', n + write(*,*) ' nnz =', nnz + write(*,*) ' ia(1) =', ia(1), ' ia(n+1)=', ia(n+1), ' ia(n+1)-1=', ia(n+1)-1 + + if (ia(1) /= 1) stop 'ERROR: ia(1) must be 1' + if (ia(n+1)-1 /= nnz) stop 'ERROR: ia end mismatch' + + do i=1,n + if (ia(i) > ia(i+1)) then + write(*,*) 'Row pointer decreases at row', i + stop 'ERROR: ia not monotone' + end if + end do + + do k=1,nnz + if (ja(k) < 1 .or. ja(k) > n) then + badcol = badcol + 1 + if (badcol <= 10) write(*,*) 'Bad column index k=',k,' ja=',ja(k) + end if + end do + if (badcol > 0) then + write(*,*) 'Total bad columns =', badcol + stop 'ERROR: invalid ja entries' + end if + + ! Check each row has a diagonal and (optionally) detect duplicates + missing_diag = 0 + do i=1,n + found = .false. + if (ia(i) < ia(i+1)) then + ! simple duplicate check (requires row segment unsorted ascending to be meaningful) + do k = ia(i), ia(i+1)-1 + if (ja(k) == i) then + if (acsr(k) == 0.0d0) then + write(*,*) 'Zero diagonal at row', i + stop 'ERROR: zero diagonal' + end if + found = .true. + end if + end do + end if + if (.not. found) then + missing_diag = missing_diag + 1 + if (missing_diag <= 10) write(*,*) 'Missing diagonal at row', i + end if + end do + if (missing_diag > 0) stop 'ERROR: missing diagonals' + !------------------------------------------------------------------- phase = 11 ! Only reordering and symbolic factorization call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, & From 2bd3703af911b879ce0c7fd1da5c275057c6c801 Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 1 Oct 2025 19:15:36 +0100 Subject: [PATCH 14/28] Switch to petsc --- Makefile | 188 ++++++++++++++++++----------- src/heatflow.f90 | 6 +- src/heatflow/mod_evolve.f90 | 22 +++- src/heatflow/mod_petsc_solver.f90 | 94 +++++++++++++++ src/heatflow/mod_sparse_solver.f90 | 4 +- 5 files changed, 235 insertions(+), 79 deletions(-) create mode 100644 src/heatflow/mod_petsc_solver.f90 diff --git a/Makefile b/Makefile index 7c65e97..840a5a8 100644 --- a/Makefile +++ b/Makefile @@ -1,89 +1,133 @@ #################################################################### -# 11 Jun 2024 # +# HeatFlow Build System (MKL + optional PETSc + OpenMP) #################################################################### -SHELL = /bin/sh -PLAT = _linux - -########################################## -# CODE DIRECTORIES AND FILES -########################################## -mkfile_path := $(abspath $(firstword $(MAKEFILE_LIST))) -mkfile_dir := $(dir $(mkfile_path)) -BIN_DIR := ./bin -SRC_DIR := ./src -BUILD_DIR = ./obj - -SRCS := heatflow/mod_constants.f90 \ - heatflow/mod_constructions.f90 \ - heatflow/mod_SPtype.f90 \ - heatflow/mod_global.f90 \ - heatflow/mod_Sparse.f90 \ - heatflow/mod_inputs.f90 \ - heatflow/mod_material.f90 \ - heatflow/mod_hmatrix.f90 \ - heatflow/mod_init_evolve.f90 \ - heatflow/mkl_pardiso.f90 \ - heatflow/mod_sparse_solver.f90 \ - heatflow/mod_setup.f90 \ - heatflow/mod_boundary.f90 \ - heatflow/mod_heating.f90 \ - heatflow/mod_cattaneo.f90 \ - heatflow/mod_tempdep.f90 \ - heatflow/mod_evolve.f90 \ - heatflow/mod_output.f90 \ - heatflow.f90 -OBJS := $(addprefix $(BUILD_DIR)/,$(notdir $(SRCS:.f90=.o))) - -# MKL configuration -MKLROOT ?= /opt/intel/oneapi/mkl/latest -MKL_LIB_DIR = $(MKLROOT)/lib/intel64 -MKL_INCLUDE_DIR = $(MKLROOT)/include -#MKL_FLAGS = -L$(MKL_LIB_DIR) -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl -MKL_FLAGS = -L$(MKL_LIB_DIR) -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl +SHELL = /bin/sh + +# Directories +SRC_DIR := ./src +BUILD_DIR := ./obj +BIN_DIR := ./bin + +# Compiler +FC := gfortran + +# Core count +NCORES := $(shell nproc) + +# MKL +MKLROOT ?= /opt/intel/oneapi/mkl/latest +MKL_LIB_DIR := $(MKLROOT)/lib/intel64 +MKL_INCLUDE := $(MKLROOT)/include +MKL_FLAGS := -L$(MKL_LIB_DIR) -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl + +# PETSc (manual fallback if petsc-config missing) +PETSC_PREFIX := /usr/lib/petscdir/petsc3.15/x86_64-linux-gnu-real +PETSC_FINCLUDE := /usr/share/petsc/3.15/include +PETSC_AINCLUDE := $(PETSC_PREFIX)/include +PETSC_LIBDIR := $(PETSC_PREFIX)/lib + +PETSC_CONFIG := $(shell command -v petsc-config 2>/dev/null) +ifeq ($(PETSC_CONFIG),) + PETSC_INC := -I$(PETSC_FINCLUDE) -I$(PETSC_AINCLUDE) + PETSC_LIB := -L$(PETSC_LIBDIR) -lpetsc + PETSC_NOTE := (PETSc manual paths) +else + PETSC_INC := $(shell petsc-config --cflags) + PETSC_LIB := $(shell petsc-config --libs) + PETSC_NOTE := (petsc-config) +endif + +# Flags +OPTFLAGS := -O3 +OMPFLAGS := -fopenmp +WARNFLAGS := -Wall +MODDIR_FLAG := -J$(BUILD_DIR) + +FFLAGS := -cpp $(OPTFLAGS) $(OMPFLAGS) $(WARNFLAGS) -I$(MKL_INCLUDE) $(PETSC_INC) $(MODDIR_FLAG) +DEBUGFLAGS := -cpp -O0 -g -fcheck=all -fbacktrace -ffpe-trap=invalid,zero,overflow,underflow -fbounds-check -I$(MKL_INCLUDE) $(PETSC_INC) $(MODDIR_FLAG) + +# Program +NAME := ThermalFlow.x +TARGET := $(BIN_DIR)/$(NAME) + +# Sources (module order) +SRCS := \ + heatflow/mod_constants.f90 \ + heatflow/mod_constructions.f90 \ + heatflow/mod_SPtype.f90 \ + heatflow/mod_global.f90 \ + heatflow/mod_Sparse.f90 \ + heatflow/mod_inputs.f90 \ + heatflow/mod_material.f90 \ + heatflow/mod_hmatrix.f90 \ + heatflow/mod_init_evolve.f90 \ + heatflow/mkl_pardiso.f90 \ + heatflow/mod_sparse_solver.f90 \ + heatflow/mod_petsc_solver.f90 \ + heatflow/mod_boundary.f90 \ + heatflow/mod_heating.f90 \ + heatflow/mod_cattaneo.f90 \ + heatflow/mod_tempdep.f90 \ + heatflow/mod_evolve.f90 \ + heatflow/mod_output.f90 \ + heatflow/mod_setup.f90 \ + heatflow.f90 -#FFLAGS = -O3 -I$(MKL_INCLUDE_DIR) -MODULEFLAGS = -J$(BUILD_DIR) -FC = gfortran -NCORES := $(shell nproc) -FFLAGS = -O3 -fopenmp -I$(MKL_INCLUDE_DIR) - -########################################## -# TARGETS -########################################## -NAME = ThermalFlow.x -programs = $(BIN_DIR)/$(NAME) +OBJS := $(addprefix $(BUILD_DIR)/,$(notdir $(SRCS:.f90=.o))) -.PHONY: all debug clean OMP +.PHONY: all debug clean distclean run help show -all: $(programs) +all: show $(TARGET) -$(BIN_DIR): - mkdir -p $@ +show: + @printf 'Building %s %s\n' '$(NAME)' '$(PETSC_NOTE)' -$(BUILD_DIR): +$(BIN_DIR) $(BUILD_DIR): mkdir -p $@ -# Pattern rule for compiling Fortran files +# Compile module sources $(BUILD_DIR)/%.o: $(SRC_DIR)/heatflow/%.f90 | $(BUILD_DIR) - $(FC) $(FFLAGS) $(MODULEFLAGS) -c $< -o $@ - -$(BUILD_DIR)/%.o: $(SRC_DIR)/%.f90 | $(BUILD_DIR) - $(FC) $(FFLAGS) $(MODULEFLAGS) -c $< -o $@ + $(FC) $(FFLAGS) -c $< -o $@ -$(programs): $(OBJS) | $(BIN_DIR) - $(FC) -O3 -fopenmp $(OBJS) -o $@ $(MKL_FLAGS) +# Main program +$(BUILD_DIR)/heatflow.o: $(SRC_DIR)/heatflow.f90 | $(BUILD_DIR) + $(FC) $(FFLAGS) -c $< -o $@ -.PHONY: run -run: $(programs) - OMP_NUM_THREADS=$(NCORES) MKL_NUM_THREADS=$(NCORES) MKL_DYNAMIC=FALSE OMP_PROC_BIND=spread OMP_PLACES=cores ./bin/$(NAME) +# Link (single definition) +$(TARGET): $(BIN_DIR) $(OBJS) + $(FC) $(OPTFLAGS) $(OMPFLAGS) $(OBJS) -o $@ $(MKL_FLAGS) $(PETSC_LIB) -Wl,-rpath,$(PETSC_LIBDIR) -debug: FFLAGS = -O0 -Wall -g -ffpe-trap=invalid,zero,overflow,underflow -fbacktrace -fcheck=all -fbounds-check -I$(MKL_INCLUDE_DIR) -debug: $(OBJS) | $(BIN_DIR) - $(FC) $(FFLAGS) $(OBJS) -o $(programs) $(MKL_FLAGS) +debug: FFLAGS = $(DEBUGFLAGS) +debug: clean show $(TARGET) -OMP: $(programs) - ./util/DShell/omp_exec.sh +run: $(TARGET) + OMP_NUM_THREADS=$(NCORES) \ + MKL_NUM_THREADS=$(NCORES) \ + MKL_DYNAMIC=FALSE \ + OMP_PROC_BIND=spread \ + OMP_PLACES=cores \ + $< $(RUN_ARGS) clean: - rm -f $(BUILD_DIR)/*.o $(BUILD_DIR)/*.mod $(programs) + @echo "[CLEAN] objects and modules" + @rm -f $(BUILD_DIR)/*.o $(BUILD_DIR)/*.mod + +distclean: clean + @echo "[CLEAN] executable" + @rm -f $(TARGET) + +help: + @echo "Targets:" + @echo " make / make all - build optimized" + @echo " make debug - debug build" + @echo " make run - run with all cores" + @echo " make clean - remove objects/modules" + @echo " make distclean - remove executable" + @echo "Variables:" + @echo " RUN_ARGS='-ksp_type cg -pc_type gamg -ksp_rtol 1e-8 -ksp_monitor'" + @echo "Parallel build: make -j$(NCORES)" + +#################################################################### +# End +#################################################################### \ No newline at end of file diff --git a/src/heatflow.f90 b/src/heatflow.f90 index 4b4df6e..135482b 100644 --- a/src/heatflow.f90 +++ b/src/heatflow.f90 @@ -28,6 +28,7 @@ program HEATFLOW_V0_3 use evolution, only: simulate use setup, only: set_global_variables use INITIAL, only: initial_evolve + use petsc_solver, only: petsc_init, petsc_finalize implicit none real(real12) :: cpustart, cpuend, cpustart2, progress @@ -79,8 +80,11 @@ program HEATFLOW_V0_3 ! CALL initial_evolve to set systems initial Temperature conditions if (itime .eq. 1) CALL initial_evolve - ! run the time evolution + ! run the time evolution + CALL petsc_init() CALL simulate(itime) + CALL petsc_finalize() + ! Write results diff --git a/src/heatflow/mod_evolve.f90 b/src/heatflow/mod_evolve.f90 index 3156bd2..22aac5a 100644 --- a/src/heatflow/mod_evolve.f90 +++ b/src/heatflow/mod_evolve.f90 @@ -31,7 +31,8 @@ module evolution use boundary_vector, only: boundary use cattaneo, only: S_catS ! use tempdep, only: ChangeProp - use sparse_solver, only: bicgstab, solve_pardiso + use petsc_solver, only: solve_petsc_csr + implicit none private @@ -51,7 +52,9 @@ subroutine simulate(itime) integer:: ncg, itol, itmax !, iss integer :: iter real(real12) :: e, err, tol - + integer, allocatable :: ia32(:), ja32(:) ! 32-bit copies for PETSc + integer :: NA32 + !---------------------- ! Initialize vectors !---------------------- @@ -149,8 +152,19 @@ subroutine simulate(itime) iter= 0 err=E - - call bicgstab(acsr, ia, ja, S, itmax, Temp_p, x, iter) + print *, "Calling solver..." + ! call bicgstab(acsr, ia, ja, S, itmax, Temp_p, x, iter) + if (.not. allocated(ia32)) then + allocate(ia32(size(ia)), ja32(size(ja))) + ia32 = int(ia, kind=kind(ia32)) + ja32 = int(ja, kind=kind(ja32)) + end if + NA32 = int(NA, kind=kind(NA32)) + allocate(x(NA)) + call solve_petsc_csr(NA32, ia32, ja32, acsr, S, x, tol, itmax) + if (allocated(ia32)) deallocate(ia32, ja32) + + print *, "Solver finished." ! CALL solve_pardiso(acsr, S, ia, ja, x) ! CALL linbcg(S,x,itol=int(itol,I4B),tol=tol, itmax=int(itmax,I4B), iter=iter, & diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90 new file mode 100644 index 0000000..ceda8a6 --- /dev/null +++ b/src/heatflow/mod_petsc_solver.f90 @@ -0,0 +1,94 @@ +module petsc_solver +#include "petsc/finclude/petscsys.h" +#include "petsc/finclude/petscksp.h" + use petscksp + implicit none + private + public :: petsc_init, petsc_finalize, solve_petsc_csr + +contains + + subroutine petsc_init() + integer :: ierr + call PetscInitialize(PETSC_NULL_CHARACTER, ierr) + end subroutine petsc_init + + subroutine petsc_finalize() + integer :: ierr + call PetscFinalize(ierr) + end subroutine petsc_finalize + + subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) + integer, intent(in) :: n + integer, intent(in) :: ia(:), ja(:) + real(8), intent(in) :: aval(:), b(:) + real(8), intent(inout) :: x(:) + real(8), intent(in) :: rtol + integer, intent(in) :: maxit + Mat :: A + Vec :: bb, xx + KSP :: ksp + PC :: pc + integer :: ierr, i, row_nz, start_k + integer, allocatable :: cols0(:), idx(:) + real(8), allocatable :: vals(:) + real(8), pointer :: xptr(:) + + if (size(ia) /= n+1) stop 'solve_petsc_csr: ia size mismatch' + if (size(b) /= n .or. size(x) /= n) stop 'solve_petsc_csr: vector size mismatch' + + ! Create matrix with an estimated 7 nonzeros/row (adjust if needed) + call MatCreateSeqAIJ(PETSC_COMM_SELF, n, n, 7, PETSC_NULL_INTEGER, A, ierr) + + do i = 1, n + row_nz = ia(i+1) - ia(i) + if (row_nz > 0) then + start_k = ia(i) + allocate(cols0(row_nz), vals(row_nz)) + cols0 = ja(start_k:start_k+row_nz-1) - 1 ! zero-based + vals = aval(start_k:start_k+row_nz-1) + call MatSetValues(A, 1, (/i-1/), row_nz, cols0, vals, INSERT_VALUES, ierr) + deallocate(cols0, vals) + end if + end do + call MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY, ierr) + call MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY, ierr) + + ! Create vectors + call VecCreateSeq(PETSC_COMM_SELF, n, bb, ierr) + call VecCreateSeq(PETSC_COMM_SELF, n, xx, ierr) + + ! Set RHS and initial guess + allocate(idx(n)) + idx = [(i-1, i=1,n)] + call VecSetValues(bb, n, idx, b, INSERT_VALUES, ierr) + call VecAssemblyBegin(bb,ierr); call VecAssemblyEnd(bb,ierr) + + call VecSetValues(xx, n, idx, x, INSERT_VALUES, ierr) + call VecAssemblyBegin(xx,ierr); call VecAssemblyEnd(xx,ierr) + + ! KSP setup + call KSPCreate(PETSC_COMM_SELF, ksp, ierr) + call KSPSetOperators(ksp, A, A, ierr) ! 3-arg form (reuse automatically) + call KSPGetPC(ksp, pc, ierr) + call PCSetType(pc, PCILU, ierr) ! Override at runtime with -pc_type + call KSPSetType(ksp, KSPCG, ierr) ! Use -ksp_type bcgs if not SPD + call KSPSetTolerances(ksp, rtol, PETSC_DEFAULT_REAL, PETSC_DEFAULT_REAL, maxit, ierr) + call KSPSetFromOptions(ksp, ierr) + + call KSPSolve(ksp, bb, xx, ierr) + + ! Extract solution + call VecGetArrayF90(xx, xptr, ierr) + x = xptr + call VecRestoreArrayF90(xx, xptr, ierr) + + ! Cleanup + deallocate(idx) + call KSPDestroy(ksp, ierr) + call VecDestroy(bb, ierr) + call VecDestroy(xx, ierr) + call MatDestroy(A, ierr) + end subroutine solve_petsc_csr + +end module petsc_solver \ No newline at end of file diff --git a/src/heatflow/mod_sparse_solver.f90 b/src/heatflow/mod_sparse_solver.f90 index f085b87..8bb24d0 100644 --- a/src/heatflow/mod_sparse_solver.f90 +++ b/src/heatflow/mod_sparse_solver.f90 @@ -139,7 +139,6 @@ subroutine bicgstab( acsr, & real(8), dimension(:), allocatable :: r, p, s, rst, temp1, temp2 n = size(b,1) - allocate(x(n)) allocate(r(n)) allocate(p(n)) @@ -150,6 +149,7 @@ subroutine bicgstab( acsr, & call mkl_dcsrgemv("N",n,acsr,ia,ja,x,temp1) + print *, "Initial residual norm: ", norm2(b - temp1) r = b - temp1 call random_number(rst) @@ -163,7 +163,7 @@ subroutine bicgstab( acsr, & delta0 = delta do i = 1, maxiter - + print *, "Iteration ", i if ( norm2(r) /= norm2(r) ) then write(*,'(a)') "Error in solver: residual NaN" exit From 763ec50e5c19fbdbc8713bb8d37af2031ff57f4f Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 6 Oct 2025 11:22:12 +0100 Subject: [PATCH 15/28] Fix petsc finaliser --- src/heatflow.f90 | 8 +++++--- src/heatflow/mod_evolve.f90 | 2 -- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/heatflow.f90 b/src/heatflow.f90 index 135482b..d1db700 100644 --- a/src/heatflow.f90 +++ b/src/heatflow.f90 @@ -65,6 +65,8 @@ program HEATFLOW_V0_3 !-------------------------------------------------------------! ! run simulation for 'ntime' time steps ! !-------------------------------------------------------------! + CALL petsc_init() + do itime=1,ntime if (iverb.eq.0) then @@ -81,9 +83,7 @@ program HEATFLOW_V0_3 if (itime .eq. 1) CALL initial_evolve ! run the time evolution - CALL petsc_init() CALL simulate(itime) - CALL petsc_finalize() @@ -91,7 +91,9 @@ program HEATFLOW_V0_3 CALL data_write(itime) if (IVERB.ge.3) CALL final_print - end do + end do + CALL petsc_finalize() + !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^! !-------------------------------------------------------------! diff --git a/src/heatflow/mod_evolve.f90 b/src/heatflow/mod_evolve.f90 index 22aac5a..5a857cd 100644 --- a/src/heatflow/mod_evolve.f90 +++ b/src/heatflow/mod_evolve.f90 @@ -152,7 +152,6 @@ subroutine simulate(itime) iter= 0 err=E - print *, "Calling solver..." ! call bicgstab(acsr, ia, ja, S, itmax, Temp_p, x, iter) if (.not. allocated(ia32)) then allocate(ia32(size(ia)), ja32(size(ja))) @@ -164,7 +163,6 @@ subroutine simulate(itime) call solve_petsc_csr(NA32, ia32, ja32, acsr, S, x, tol, itmax) if (allocated(ia32)) deallocate(ia32, ja32) - print *, "Solver finished." ! CALL solve_pardiso(acsr, S, ia, ja, x) ! CALL linbcg(S,x,itol=int(itol,I4B),tol=tol, itmax=int(itmax,I4B), iter=iter, & From ac8835cce2dbc158174db28592c8c304b7a7574e Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 14 Nov 2025 12:57:14 +0000 Subject: [PATCH 16/28] Working petsc, not fully reproducing old yet --- src/.vscode/settings.json | 5 +++++ src/heatflow/mod_evolve.f90 | 24 ++++++++++++++++-------- src/heatflow/mod_petsc_solver.f90 | 29 ++++++++++++++++++++++++++--- 3 files changed, 47 insertions(+), 11 deletions(-) create mode 100644 src/.vscode/settings.json diff --git a/src/.vscode/settings.json b/src/.vscode/settings.json new file mode 100644 index 0000000..a8c2003 --- /dev/null +++ b/src/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "python-envs.defaultEnvManager": "ms-python.python:conda", + "python-envs.defaultPackageManager": "ms-python.python:conda", + "python-envs.pythonProjects": [] +} \ No newline at end of file diff --git a/src/heatflow/mod_evolve.f90 b/src/heatflow/mod_evolve.f90 index 5a857cd..5921610 100644 --- a/src/heatflow/mod_evolve.f90 +++ b/src/heatflow/mod_evolve.f90 @@ -153,22 +153,30 @@ subroutine simulate(itime) err=E ! call bicgstab(acsr, ia, ja, S, itmax, Temp_p, x, iter) - if (.not. allocated(ia32)) then - allocate(ia32(size(ia)), ja32(size(ja))) - ia32 = int(ia, kind=kind(ia32)) - ja32 = int(ja, kind=kind(ja32)) - end if - NA32 = int(NA, kind=kind(NA32)) + + ! Allocate and initialize x with a good initial guess allocate(x(NA)) + x = Temp_p + (Temp_p - Temp_pp) + if (any(x - Temp_p .lt. TINY)) x = x + TINY ! avoid nan solver issue + + ! Convert to 32-bit integers for PETSc (only on first call) + if (.not. allocated(ia32)) then + allocate(ia32(size(ia)), ja32(size(ja))) + ia32 = int(ia, kind=kind(ia32)) + ja32 = int(ja, kind=kind(ja32)) + end if + NA32 = int(NA, kind=kind(NA32)) + call solve_petsc_csr(NA32, ia32, ja32, acsr, S, x, tol, itmax) - if (allocated(ia32)) deallocate(ia32, ja32) + + ! Note: Don't deallocate ia32, ja32 - keep them for next time step ! CALL solve_pardiso(acsr, S, ia, ja, x) ! CALL linbcg(S,x,itol=int(itol,I4B),tol=tol, itmax=int(itmax,I4B), iter=iter, & ! err=E) - ! + ! if (any(isnan(x(:)))) then write(0,*) "fatal error: NAN in x tempurature vector" write(0,*) 'time step ', itime, " T ", sum(Temp_p)/size(Temp_p), E ,iter diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90 index ceda8a6..b24ac17 100644 --- a/src/heatflow/mod_petsc_solver.f90 +++ b/src/heatflow/mod_petsc_solver.f90 @@ -29,10 +29,12 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) Vec :: bb, xx KSP :: ksp PC :: pc - integer :: ierr, i, row_nz, start_k + integer :: ierr, i, row_nz, start_k, its integer, allocatable :: cols0(:), idx(:) real(8), allocatable :: vals(:) real(8), pointer :: xptr(:) + KSPConvergedReason :: reason + real(8) :: rnorm if (size(ia) /= n+1) stop 'solve_petsc_csr: ia size mismatch' if (size(b) /= n .or. size(x) /= n) stop 'solve_petsc_csr: vector size mismatch' @@ -71,12 +73,33 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) call KSPCreate(PETSC_COMM_SELF, ksp, ierr) call KSPSetOperators(ksp, A, A, ierr) ! 3-arg form (reuse automatically) call KSPGetPC(ksp, pc, ierr) - call PCSetType(pc, PCILU, ierr) ! Override at runtime with -pc_type - call KSPSetType(ksp, KSPCG, ierr) ! Use -ksp_type bcgs if not SPD + call PCSetType(pc, PCJACOBI, ierr) ! Use Jacobi (diagonal) preconditioner to match linbcg + call KSPSetType(ksp, KSPBCGS, ierr) ! Use BCGS to match linbcg behavior + + ! Set convergence tolerances + ! rtol = relative tolerance, atol = absolute tolerance (use default), dtol = divergence tolerance, maxits = max iterations call KSPSetTolerances(ksp, rtol, PETSC_DEFAULT_REAL, PETSC_DEFAULT_REAL, maxit, ierr) + + ! Use unpreconditioned norm (matching linbcg with itol=1) + call KSPSetNormType(ksp, KSP_NORM_UNPRECONDITIONED, ierr) + + ! Allow command line override of solver options call KSPSetFromOptions(ksp, ierr) call KSPSolve(ksp, bb, xx, ierr) + + ! Check convergence + call KSPGetConvergedReason(ksp, reason, ierr) + call KSPGetIterationNumber(ksp, its, ierr) + call KSPGetResidualNorm(ksp, rnorm, ierr) + + if (reason < 0) then + write(0,*) "WARNING: PETSc solver diverged or failed!" + write(0,*) " Reason code:", reason + write(0,*) " Iterations:", its + write(0,*) " Residual norm:", rnorm + ! Don't stop - let the main code detect NaNs if needed + end if ! Extract solution call VecGetArrayF90(xx, xptr, ierr) From 2e2617772c593d4e322d6a3d3a8b07e5f71ae170 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 14 Nov 2025 14:54:39 +0000 Subject: [PATCH 17/28] Fixed petsc --- src/heatflow/mod_evolve.f90 | 33 ++++++++++++++++++- src/heatflow/mod_heating.f90 | 2 +- src/heatflow/mod_petsc_solver.f90 | 52 +++++++++++++++++++++++++++++- src/heatflow/mod_setup.f90 | 5 ++- src/heatflow/mod_sparse_solver.f90 | 39 +++++++++++++++++++--- 5 files changed, 120 insertions(+), 11 deletions(-) mode change 100644 => 100755 src/heatflow/mod_setup.f90 diff --git a/src/heatflow/mod_evolve.f90 b/src/heatflow/mod_evolve.f90 index 5921610..67af070 100644 --- a/src/heatflow/mod_evolve.f90 +++ b/src/heatflow/mod_evolve.f90 @@ -37,6 +37,9 @@ module evolution private public :: simulate + + ! Module-level variables for PETSc (persist across time steps) + integer, allocatable, save :: ia32(:), ja32(:) ! 32-bit copies for PETSc contains @@ -52,7 +55,6 @@ subroutine simulate(itime) integer:: ncg, itol, itmax !, iss integer :: iter real(real12) :: e, err, tol - integer, allocatable :: ia32(:), ja32(:) ! 32-bit copies for PETSc integer :: NA32 !---------------------- @@ -117,6 +119,16 @@ subroutine simulate(itime) !--------------------------------------------- if ( iSteady .eq. 0 ) then S = - inverse_time * Temp_p * lin_rhoc - Qdens - B + if (IVERB .gt. 3) then + write(*,*) "S construction diagnostics:" + write(*,*) " inverse_time =", inverse_time + write(*,*) " Temp_p avg =", sum(Temp_p)/size(Temp_p) + write(*,*) " lin_rhoc avg =", sum(lin_rhoc)/size(lin_rhoc) + write(*,*) " Qdens avg =", sum(Qdens)/size(Qdens) + write(*,*) " B avg =", sum(B)/size(B) + write(*,*) " -inverse_time*Temp_p*lin_rhoc avg =", sum(-inverse_time*Temp_p*lin_rhoc)/size(Temp_p) + write(*,*) " S before S_CAT avg =", sum(S)/size(S) + end if if ( iCAttaneo .eq. 1) then S = S + S_CAT end if @@ -159,6 +171,17 @@ subroutine simulate(itime) x = Temp_p + (Temp_p - Temp_pp) if (any(x - Temp_p .lt. TINY)) x = x + TINY ! avoid nan solver issue + ! Debug: Print initial guess statistics + if (IVERB .gt. 3) then + write(*,*) "========== PETSc Solver Diagnostics ==========" + write(*,*) "Time step:", itime + write(*,*) "Initial guess x: min=", minval(x), " max=", maxval(x), " avg=", sum(x)/size(x) + write(*,*) "RHS S: min=", minval(S), " max=", maxval(S), " avg=", sum(S)/size(S) + write(*,*) "Temp_p: min=", minval(Temp_p), " max=", maxval(Temp_p), " avg=", sum(Temp_p)/size(Temp_p) + write(*,*) "Matrix acsr: min=", minval(acsr), " max=", maxval(acsr), " avg=", sum(acsr)/size(acsr) + write(*,*) "Matrix size: n=", NA32, " nnz=", size(acsr) + end if + ! Convert to 32-bit integers for PETSc (only on first call) if (.not. allocated(ia32)) then allocate(ia32(size(ia)), ja32(size(ja))) @@ -169,6 +192,14 @@ subroutine simulate(itime) call solve_petsc_csr(NA32, ia32, ja32, acsr, S, x, tol, itmax) + ! Debug: Print solution statistics and verify solution + if (IVERB .gt. 3) then + write(*,*) "Solution x after PETSc: min=", minval(x), " max=", maxval(x), " avg=", sum(x)/size(x) + write(*,*) "Temperature change: avg(x-Temp_p)=", sum(x-Temp_p)/size(x) + write(*,*) "Max temperature change: ", maxval(abs(x-Temp_p)) + write(*,*) "==============================================" + end if + ! Note: Don't deallocate ia32, ja32 - keep them for next time step diff --git a/src/heatflow/mod_heating.f90 b/src/heatflow/mod_heating.f90 index f33eb5f..aa070ac 100644 --- a/src/heatflow/mod_heating.f90 +++ b/src/heatflow/mod_heating.f90 @@ -181,7 +181,7 @@ subroutine heater(itime, Q, Qdens) ! Normalize all heat sources by the heated volume - shared_power = .False. + shared_power = .TRUE. if (shared_power) then if (heated_volume .gt. 0.0) then Qdens(:) = Q(:) / heated_volume diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90 index b24ac17..dd6c73a 100644 --- a/src/heatflow/mod_petsc_solver.f90 +++ b/src/heatflow/mod_petsc_solver.f90 @@ -42,19 +42,34 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) ! Create matrix with an estimated 7 nonzeros/row (adjust if needed) call MatCreateSeqAIJ(PETSC_COMM_SELF, n, n, 7, PETSC_NULL_INTEGER, A, ierr) + ! Fill matrix from CSR format (ia, ja are 1-based Fortran indexing) do i = 1, n row_nz = ia(i+1) - ia(i) if (row_nz > 0) then start_k = ia(i) allocate(cols0(row_nz), vals(row_nz)) - cols0 = ja(start_k:start_k+row_nz-1) - 1 ! zero-based + ! Convert column indices from 1-based to 0-based for PETSc + cols0 = ja(start_k:start_k+row_nz-1) - 1 vals = aval(start_k:start_k+row_nz-1) + + ! Debug: print first row details + if (i == 1) then + write(*,'(A,I0,A,I0)') 'Row 1: nnz=', row_nz, ', start_k=', start_k + write(*,'(A,10I6)') ' 1-based cols:', ja(start_k:min(start_k+9,start_k+row_nz-1)) + write(*,'(A,10I6)') ' 0-based cols:', cols0(1:min(10,row_nz)) + write(*,'(A,10ES12.4)') ' vals:', vals(1:min(10,row_nz)) + end if + + ! Set row i-1 (0-based) with column indices cols0 (0-based) call MatSetValues(A, 1, (/i-1/), row_nz, cols0, vals, INSERT_VALUES, ierr) deallocate(cols0, vals) end if end do call MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY, ierr) call MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY, ierr) + + ! Optional: Verify matrix assembly (uncomment for debugging) + ! call MatView(A, PETSC_VIEWER_STDOUT_SELF, ierr) ! Create vectors call VecCreateSeq(PETSC_COMM_SELF, n, bb, ierr) @@ -93,6 +108,10 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) call KSPGetIterationNumber(ksp, its, ierr) call KSPGetResidualNorm(ksp, rnorm, ierr) + ! Report convergence status (commented out by default for performance) + ! Uncomment the next line to see convergence info every solve: + ! write(*,'(A,I0,A,ES12.5,A,I0)') ' PETSc: iterations=', its, ', residual=', rnorm, ', reason=', reason + if (reason < 0) then write(0,*) "WARNING: PETSc solver diverged or failed!" write(0,*) " Reason code:", reason @@ -105,6 +124,37 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) call VecGetArrayF90(xx, xptr, ierr) x = xptr call VecRestoreArrayF90(xx, xptr, ierr) + + ! Manual residual check: compute ||A*x - b|| + block + real(8), allocatable :: Ax(:), residual(:) + real(8) :: manual_rnorm, bnorm + integer(8) :: i, k, start_k, row_nz + + allocate(Ax(n), residual(n)) + Ax = 0.0_8 + + ! Compute A*x manually using CSR format + do i = 1, n + row_nz = ia(i+1) - ia(i) + if (row_nz > 0) then + start_k = ia(i) + do k = start_k, start_k + row_nz - 1 + Ax(i) = Ax(i) + aval(k) * x(ja(k)) + end do + end if + end do + + residual = Ax - b + manual_rnorm = sqrt(sum(residual**2)) + bnorm = sqrt(sum(b**2)) + + write(*,'(A,ES15.7)') ' Manual residual check: ||A*x-b|| = ', manual_rnorm + write(*,'(A,ES15.7)') ' ||b|| = ', bnorm + write(*,'(A,ES15.7)') ' ||A*x-b||/||b|| = ', manual_rnorm/bnorm + + deallocate(Ax, residual) + end block ! Cleanup deallocate(idx) diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90 old mode 100644 new mode 100755 index 40c53bd..e3c2872 --- a/src/heatflow/mod_setup.f90 +++ b/src/heatflow/mod_setup.f90 @@ -37,6 +37,7 @@ subroutine set_global_variables() integer(int12) :: ix,iy,iz,index real(real12) :: kappa,kappa3D,h_conv,heat_capacity,rho,sound_speed,tau, em real(real12), dimension(3) :: vel + allocate(Temp_cur(nx, ny, nz)) allocate(Temp_p(NA)) allocate(Temp_pp(NA)) @@ -82,7 +83,6 @@ subroutine set_global_variables() ! Allocate the arrays to hold the H matrix in CSR format allocate(acsr(ra%len), ja(ra%len), ia(ra%n+1)) CALL coo2csr(ra%n, ra%len, ra%val, ra%irow, ra%jcol, acsr, ja, ia) - ! print*, ra%val end if !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -106,7 +106,7 @@ subroutine sparse_Hmatrix() if (Periodicz) len = len + 2*nx*ny ra%n = NA ! The number of rows in the H matrix ra%len = len ! The number of non-zero elements in the H matrix - ! Allocate the arrays to hold the H matrix in sparse row storage + ! Allocate the arrays to hold the H matrix in sparse storage allocate(ra%val(len), ra%irow(len), ra%jcol(len)) ra%val(:)=0 ra%irow(:)=-2 @@ -147,7 +147,6 @@ subroutine sparse_Hmatrix() ra%irow(count) = i ! The row of the H matrix ra%jcol(count) = j ! The column of the H matrix count = count + 1 ! The number of non-zero elements in the H matrix - H0=hmatrixfunc(j,i) ! The value of the H matrix ra%val(count) = H0 ! The value of the H matrix ra%irow(count) = j ! The row of the H matrix ra%jcol(count) = i ! The column of the H matrix diff --git a/src/heatflow/mod_sparse_solver.f90 b/src/heatflow/mod_sparse_solver.f90 index 8bb24d0..8f7957c 100644 --- a/src/heatflow/mod_sparse_solver.f90 +++ b/src/heatflow/mod_sparse_solver.f90 @@ -60,10 +60,14 @@ subroutine coo2csr( nrow, & integer(kind=8), dimension(nrow+1), intent(out) :: ia ! Local variables. - integer (kind=8) :: i, iad, j, k, k0 + integer (kind=8) :: i, iad, j, k, k0, row_start + integer (kind=8) :: dup_count real(kind=8) :: x + logical :: found_dup + integer(kind=8), dimension(nrow+1) :: ia_save ! Save original row starts ia(1:nrow+1) = 0 + dup_count = 0 ! determine the row lengths. @@ -83,20 +87,45 @@ subroutine coo2csr( nrow, & end do + ! Save the original row pointers + ia_save = ia + ! go through the structure once more. fill in output matrix. + ! This version handles duplicate (i,j) entries by summing them. do k = 1, nnz i = ir(k) j = jc(k) x = a(k) - iad = ia(i) - acsr(iad) = x - ja(iad) = j - ia(i) = iad + 1 + + ! Search for existing entry in this row with same column + found_dup = .false. + row_start = ia_save(i) + do iad = row_start, ia(i)-1 + if (ja(iad) == j) then + ! Found duplicate - sum the values + acsr(iad) = acsr(iad) + x + found_dup = .true. + dup_count = dup_count + 1 + exit + end if + end do + + if (.not. found_dup) then + ! New entry - insert it + iad = ia(i) + acsr(iad) = x + ja(iad) = j + ia(i) = iad + 1 + end if end do + if (dup_count > 0) then + write(*,'(A,I0,A)') 'WARNING: COO->CSR found and summed ', dup_count, ' duplicate entries' + end if + ! shift back ia. do j = nrow, 1, -1 From 61acedfc9dfa65511671744e7f553a3c1db9c380 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 18 Nov 2025 09:53:30 +0000 Subject: [PATCH 18/28] remove debugging prints --- src/heatflow/mod_petsc_solver.f90 | 39 ------------------------------- 1 file changed, 39 deletions(-) diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90 index dd6c73a..d6fd327 100644 --- a/src/heatflow/mod_petsc_solver.f90 +++ b/src/heatflow/mod_petsc_solver.f90 @@ -52,14 +52,6 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) cols0 = ja(start_k:start_k+row_nz-1) - 1 vals = aval(start_k:start_k+row_nz-1) - ! Debug: print first row details - if (i == 1) then - write(*,'(A,I0,A,I0)') 'Row 1: nnz=', row_nz, ', start_k=', start_k - write(*,'(A,10I6)') ' 1-based cols:', ja(start_k:min(start_k+9,start_k+row_nz-1)) - write(*,'(A,10I6)') ' 0-based cols:', cols0(1:min(10,row_nz)) - write(*,'(A,10ES12.4)') ' vals:', vals(1:min(10,row_nz)) - end if - ! Set row i-1 (0-based) with column indices cols0 (0-based) call MatSetValues(A, 1, (/i-1/), row_nz, cols0, vals, INSERT_VALUES, ierr) deallocate(cols0, vals) @@ -124,37 +116,6 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) call VecGetArrayF90(xx, xptr, ierr) x = xptr call VecRestoreArrayF90(xx, xptr, ierr) - - ! Manual residual check: compute ||A*x - b|| - block - real(8), allocatable :: Ax(:), residual(:) - real(8) :: manual_rnorm, bnorm - integer(8) :: i, k, start_k, row_nz - - allocate(Ax(n), residual(n)) - Ax = 0.0_8 - - ! Compute A*x manually using CSR format - do i = 1, n - row_nz = ia(i+1) - ia(i) - if (row_nz > 0) then - start_k = ia(i) - do k = start_k, start_k + row_nz - 1 - Ax(i) = Ax(i) + aval(k) * x(ja(k)) - end do - end if - end do - - residual = Ax - b - manual_rnorm = sqrt(sum(residual**2)) - bnorm = sqrt(sum(b**2)) - - write(*,'(A,ES15.7)') ' Manual residual check: ||A*x-b|| = ', manual_rnorm - write(*,'(A,ES15.7)') ' ||b|| = ', bnorm - write(*,'(A,ES15.7)') ' ||A*x-b||/||b|| = ', manual_rnorm/bnorm - - deallocate(Ax, residual) - end block ! Cleanup deallocate(idx) From 9307f859a53c6fdfcde7335466d693c0cab4b929 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 18 Nov 2025 10:24:36 +0000 Subject: [PATCH 19/28] make more memory efficient --- src/heatflow/mod_petsc_solver.f90 | 134 ++++++++++++------- src/heatflow/mod_setup.f90 | 212 +++++++++++++++++++++++------- 2 files changed, 248 insertions(+), 98 deletions(-) diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90 index d6fd327..e45ce2e 100644 --- a/src/heatflow/mod_petsc_solver.f90 +++ b/src/heatflow/mod_petsc_solver.f90 @@ -4,7 +4,15 @@ module petsc_solver use petscksp implicit none private - public :: petsc_init, petsc_finalize, solve_petsc_csr + public :: petsc_init, petsc_finalize, solve_petsc_csr, petsc_cleanup + + ! Persistent PETSc objects (reused across timesteps for memory efficiency) + Mat, save :: A_saved = PETSC_NULL_MAT + Vec, save :: bb_saved = PETSC_NULL_VEC + Vec, save :: xx_saved = PETSC_NULL_VEC + KSP, save :: ksp_saved = PETSC_NULL_KSP + logical, save :: initialized = .false. + integer, save :: n_saved = 0 contains @@ -15,9 +23,25 @@ end subroutine petsc_init subroutine petsc_finalize() integer :: ierr + call petsc_cleanup() call PetscFinalize(ierr) end subroutine petsc_finalize + subroutine petsc_cleanup() + ! Clean up persistent PETSc objects + integer :: ierr + if (A_saved /= PETSC_NULL_MAT) call MatDestroy(A_saved, ierr) + if (bb_saved /= PETSC_NULL_VEC) call VecDestroy(bb_saved, ierr) + if (xx_saved /= PETSC_NULL_VEC) call VecDestroy(xx_saved, ierr) + if (ksp_saved /= PETSC_NULL_KSP) call KSPDestroy(ksp_saved, ierr) + A_saved = PETSC_NULL_MAT + bb_saved = PETSC_NULL_VEC + xx_saved = PETSC_NULL_VEC + ksp_saved = PETSC_NULL_KSP + initialized = .false. + n_saved = 0 + end subroutine petsc_cleanup + subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) integer, intent(in) :: n integer, intent(in) :: ia(:), ja(:) @@ -25,24 +49,58 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) real(8), intent(inout) :: x(:) real(8), intent(in) :: rtol integer, intent(in) :: maxit - Mat :: A - Vec :: bb, xx - KSP :: ksp + PC :: pc integer :: ierr, i, row_nz, start_k, its - integer, allocatable :: cols0(:), idx(:) + integer, allocatable :: cols0(:), idx(:), d_nnz(:) real(8), allocatable :: vals(:) real(8), pointer :: xptr(:) KSPConvergedReason :: reason real(8) :: rnorm + logical :: rebuild_needed if (size(ia) /= n+1) stop 'solve_petsc_csr: ia size mismatch' if (size(b) /= n .or. size(x) /= n) stop 'solve_petsc_csr: vector size mismatch' - ! Create matrix with an estimated 7 nonzeros/row (adjust if needed) - call MatCreateSeqAIJ(PETSC_COMM_SELF, n, n, 7, PETSC_NULL_INTEGER, A, ierr) + ! Determine if we need to rebuild the matrix structure + rebuild_needed = .false. + if (.not. initialized) rebuild_needed = .true. + if (n /= n_saved) rebuild_needed = .true. + + ! Create PETSc objects on first call or if size changed + if (rebuild_needed) then + ! Clean up old objects if they exist + if (initialized) call petsc_cleanup() + + ! Preallocate matrix with exact nonzeros per row (saves memory) + allocate(d_nnz(n)) + do i = 1, n + d_nnz(i) = ia(i+1) - ia(i) + end do + call MatCreateSeqAIJ(PETSC_COMM_SELF, n, n, 0, d_nnz, A_saved, ierr) + deallocate(d_nnz) + + ! Create persistent vectors + call VecCreateSeq(PETSC_COMM_SELF, n, bb_saved, ierr) + call VecCreateSeq(PETSC_COMM_SELF, n, xx_saved, ierr) + + ! Create and configure KSP solver (persistent across timesteps) + call KSPCreate(PETSC_COMM_SELF, ksp_saved, ierr) + call KSPSetOperators(ksp_saved, A_saved, A_saved, ierr) + call KSPGetPC(ksp_saved, pc, ierr) + call PCSetType(pc, PCJACOBI, ierr) ! Jacobi preconditioner + call KSPSetType(ksp_saved, KSPBCGS, ierr) ! BiCGSTAB solver + call KSPSetTolerances(ksp_saved, rtol, PETSC_DEFAULT_REAL, & + PETSC_DEFAULT_REAL, maxit, ierr) + call KSPSetNormType(ksp_saved, KSP_NORM_UNPRECONDITIONED, ierr) + call KSPSetFromOptions(ksp_saved, ierr) + + initialized = .true. + n_saved = n + end if - ! Fill matrix from CSR format (ia, ja are 1-based Fortran indexing) + ! Update matrix values (always needed each timestep) + call MatZeroEntries(A_saved, ierr) do i = 1, n row_nz = ia(i+1) - ia(i) if (row_nz > 0) then @@ -53,52 +111,34 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) vals = aval(start_k:start_k+row_nz-1) ! Set row i-1 (0-based) with column indices cols0 (0-based) - call MatSetValues(A, 1, (/i-1/), row_nz, cols0, vals, INSERT_VALUES, ierr) + call MatSetValues(A_saved, 1, (/i-1/), row_nz, cols0, vals, INSERT_VALUES, ierr) deallocate(cols0, vals) end if end do - call MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY, ierr) - call MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY, ierr) + call MatAssemblyBegin(A_saved, MAT_FINAL_ASSEMBLY, ierr) + call MatAssemblyEnd(A_saved, MAT_FINAL_ASSEMBLY, ierr) ! Optional: Verify matrix assembly (uncomment for debugging) - ! call MatView(A, PETSC_VIEWER_STDOUT_SELF, ierr) - - ! Create vectors - call VecCreateSeq(PETSC_COMM_SELF, n, bb, ierr) - call VecCreateSeq(PETSC_COMM_SELF, n, xx, ierr) + ! call MatView(A_saved, PETSC_VIEWER_STDOUT_SELF, ierr) - ! Set RHS and initial guess + ! Update RHS vector allocate(idx(n)) idx = [(i-1, i=1,n)] - call VecSetValues(bb, n, idx, b, INSERT_VALUES, ierr) - call VecAssemblyBegin(bb,ierr); call VecAssemblyEnd(bb,ierr) + call VecSetValues(bb_saved, n, idx, b, INSERT_VALUES, ierr) + call VecAssemblyBegin(bb_saved,ierr); call VecAssemblyEnd(bb_saved,ierr) - call VecSetValues(xx, n, idx, x, INSERT_VALUES, ierr) - call VecAssemblyBegin(xx,ierr); call VecAssemblyEnd(xx,ierr) - - ! KSP setup - call KSPCreate(PETSC_COMM_SELF, ksp, ierr) - call KSPSetOperators(ksp, A, A, ierr) ! 3-arg form (reuse automatically) - call KSPGetPC(ksp, pc, ierr) - call PCSetType(pc, PCJACOBI, ierr) ! Use Jacobi (diagonal) preconditioner to match linbcg - call KSPSetType(ksp, KSPBCGS, ierr) ! Use BCGS to match linbcg behavior - - ! Set convergence tolerances - ! rtol = relative tolerance, atol = absolute tolerance (use default), dtol = divergence tolerance, maxits = max iterations - call KSPSetTolerances(ksp, rtol, PETSC_DEFAULT_REAL, PETSC_DEFAULT_REAL, maxit, ierr) - - ! Use unpreconditioned norm (matching linbcg with itol=1) - call KSPSetNormType(ksp, KSP_NORM_UNPRECONDITIONED, ierr) - - ! Allow command line override of solver options - call KSPSetFromOptions(ksp, ierr) + ! Update initial guess + call VecSetValues(xx_saved, n, idx, x, INSERT_VALUES, ierr) + call VecAssemblyBegin(xx_saved,ierr); call VecAssemblyEnd(xx_saved,ierr) + deallocate(idx) - call KSPSolve(ksp, bb, xx, ierr) + ! Solve the system (reusing persistent KSP) + call KSPSolve(ksp_saved, bb_saved, xx_saved, ierr) ! Check convergence - call KSPGetConvergedReason(ksp, reason, ierr) - call KSPGetIterationNumber(ksp, its, ierr) - call KSPGetResidualNorm(ksp, rnorm, ierr) + call KSPGetConvergedReason(ksp_saved, reason, ierr) + call KSPGetIterationNumber(ksp_saved, its, ierr) + call KSPGetResidualNorm(ksp_saved, rnorm, ierr) ! Report convergence status (commented out by default for performance) ! Uncomment the next line to see convergence info every solve: @@ -113,16 +153,10 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) end if ! Extract solution - call VecGetArrayF90(xx, xptr, ierr) + call VecGetArrayF90(xx_saved, xptr, ierr) x = xptr - call VecRestoreArrayF90(xx, xptr, ierr) + call VecRestoreArrayF90(xx_saved, xptr, ierr) - ! Cleanup - deallocate(idx) - call KSPDestroy(ksp, ierr) - call VecDestroy(bb, ierr) - call VecDestroy(xx, ierr) - call MatDestroy(A, ierr) end subroutine solve_petsc_csr end module petsc_solver \ No newline at end of file diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90 index e3c2872..8c51b62 100755 --- a/src/heatflow/mod_setup.f90 +++ b/src/heatflow/mod_setup.f90 @@ -52,8 +52,13 @@ subroutine set_global_variables() ! later date !--------------------------------------------------- write(*,*) "Setting up material properties" + write(*,'(A,I10,A)') " Processing ", NA, " grid cells..." index = 0 do iz = 1, nz + ! Progress reporting every 10% for large grids + if (mod(iz-1, max(1,nz/10)) == 0 .and. iz > 1) then + write(*,'(A,I3,A)') " Progress: ", int(100.0*real(iz)/real(nz)), "%" + end if do iy = 1, ny do ix = 1, nx index = index + 1 @@ -75,14 +80,15 @@ subroutine set_global_variables() !--------------------------------------------------- ! Check if the sparse matrix matches the full matrix !--------------------------------------------------- + write(*,*) "Building sparse H matrix..." if (Check_Sparse_Full) then print*, "CHECK SPARSE FULL" CALL build_Hmatrix() else + ! Build CSR format directly (acsr, ja, ia are allocated inside sparse_Hmatrix) CALL sparse_Hmatrix() - ! Allocate the arrays to hold the H matrix in CSR format - allocate(acsr(ra%len), ja(ra%len), ia(ra%n+1)) - CALL coo2csr(ra%n, ra%len, ra%val, ra%irow, ra%jcol, acsr, ja, ia) + ! No need for COO->CSR conversion anymore, it's already in CSR format! + write(*,*) "Sparse matrix setup complete." end if !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -91,26 +97,46 @@ end subroutine set_global_variables !!!################################################################################################# !!!################################################################################################# -!!! This sets up the H Matrix directly in sparse row storage +!!! This sets up the H Matrix directly in sparse row storage (CSR format) +!!! Modified to build CSR directly instead of COO->CSR to save memory !!!################################################################################################# subroutine sparse_Hmatrix() implicit none real(real12) :: H0 ! Holds the value of the H matrix - integer(int12) :: i, j, len, count, k ! i and j are the row and column of the H matrix + integer(int12) :: i, j, count, k, row ! i and j are the row and column of the H matrix + integer(int12) :: nnz_estimate ! Holds the values to add to the row to get the column integer(int12), allocatable, dimension(:) :: addit - ! The number of non-zero elements in the H matrix to look for - len = 7*nx*ny*nz - 2*(nx*ny + ny*nz + nz*nx) - if (Periodicx) len = len + 2*ny*nz - if (Periodicy) len = len + 2*nz*nx - if (Periodicz) len = len + 2*nx*ny + ! Temporary arrays for building each row + real(real12), allocatable, dimension(:) :: row_vals + integer(int12), allocatable, dimension(:) :: row_cols + integer(int12) :: row_count, max_row_size + ra%n = NA ! The number of rows in the H matrix - ra%len = len ! The number of non-zero elements in the H matrix - ! Allocate the arrays to hold the H matrix in sparse storage - allocate(ra%val(len), ra%irow(len), ra%jcol(len)) - ra%val(:)=0 - ra%irow(:)=-2 - ra%jcol(:)=-1 + + ! Estimate nonzeros (7 per interior cell, less at boundaries) + nnz_estimate = 7*nx*ny*nz - 2*(nx*ny + ny*nz + nz*nx) + if (Periodicx) nnz_estimate = nnz_estimate + 2*ny*nz + if (Periodicy) nnz_estimate = nnz_estimate + 2*nz*nx + if (Periodicz) nnz_estimate = nnz_estimate + 2*nx*ny + + ! Allocate CSR arrays with initial estimate (will grow if needed) + ! For 451^3: ~640M entries = 10GB, so allocate conservatively + write(*,'(A,I12,A)') " Estimated nonzeros: ", nnz_estimate, "" + allocate(acsr(nnz_estimate), ja(nnz_estimate)) + allocate(ia(NA+1)) + + ! Setup neighbor offsets + addit = [1] + if (Periodicx) addit = [addit, (nx-1)] + if (ny .gt. 1) addit = [addit, nx] + if ((Periodicy).and.(ny .gt. 1)) addit = [addit, (ny-1)*nx] + if (nz .gt. 1) addit = [addit, nx*ny] + if ((Periodicz).and.(nz .gt. 1)) addit = [addit, (nz-1)*ny*nx] + + ! Allocate temporary row storage (max ~13 entries per row for 3D) + max_row_size = 2*size(addit,1) + 1 + allocate(row_vals(max_row_size), row_cols(max_row_size)) addit = [1] ! The values to add to the row to get the column if (Periodicx) addit = [addit, (nx-1)] if (ny .gt. 1) addit = [addit, nx] ! Add the values to add to the row to get the column @@ -124,41 +150,131 @@ subroutine sparse_Hmatrix() !write(6,*) NA !write(6,*) "=========================================" - count = 0 ! The number of non-zero elements in the H matrix - parent_loop: do j = 1, NA ! Loop over the columns of the H matrix - i=j ! The row of the H matrix - count = count + 1 ! The number of non-zero elements in the H matrix - H0 = hmatrixfunc(i,j) ! The value of the H matrix - ra%val(count) = H0 ! The value of the H matrix - ra%irow(count) = i ! The row of the H matrix - ra%jcol(count) = j ! The column of the H matrix - ! Loop over the values to add to the row to get the column - neighbour_loop: do k = 1, size(addit,1) - i = j + addit(k) ! The row of the H matrix - ! If the row is greater than the number of rows ... - !...in the H matrix then go to the next column - if ((i.gt.NA)) cycle parent_loop - H0=hmatrixfunc(i,j) ! The value of the H matrix - ! If the value of the H matrix is less than TINY then go to the next value ... - !...to add to the row to get the column - if (abs(H0).lt.TINY) cycle neighbour_loop - count = count + 1 ! The number of non-zero elements in the H matrix - ra%val(count) = H0 ! The value of the H matrix - ra%irow(count) = i ! The row of the H matrix - ra%jcol(count) = j ! The column of the H matrix - count = count + 1 ! The number of non-zero elements in the H matrix - ra%val(count) = H0 ! The value of the H matrix - ra%irow(count) = j ! The row of the H matrix - ra%jcol(count) = i ! The column of the H matrix - !write(6,*) j,i, H0, count - end do neighbour_loop - end do parent_loop - - !write(6,*) "=========================================" - !write(6,*) 'c len',count, len + + count = 0 ! Total nonzeros counter + ia(1) = 1 ! CSR row pointer (1-based for Fortran) + + ! Build CSR format row-by-row + write(*,'(A)') " Building CSR matrix row-by-row..." + parent_loop: do row = 1, NA + ! Progress reporting every 10% + if (mod(row-1, max(1,NA/10)) == 0 .and. row > 1) then + write(*,'(A,I3,A,I12,A)') " Progress: ", int(100.0*real(row)/real(NA)), & + "%, nnz=", count, "" + end if + + row_count = 0 + + ! Diagonal element + j = row + row_count = row_count + 1 + H0 = hmatrixfunc(row, j) + row_vals(row_count) = H0 + row_cols(row_count) = j + + ! Off-diagonal elements (process in column-sorted order for CSR) + ! First pass: collect all neighbors + do k = 1, size(addit,1) + j = row + addit(k) + if (j > NA) cycle ! Skip if out of bounds + + H0 = hmatrixfunc(row, j) + if (abs(H0) >= TINY) then + row_count = row_count + 1 + row_vals(row_count) = H0 + row_cols(row_count) = j + end if + end do + + ! Second pass: collect reverse neighbors (j < row) + do k = 1, size(addit,1) + j = row - addit(k) + if (j < 1) cycle ! Skip if out of bounds + + H0 = hmatrixfunc(row, j) + if (abs(H0) >= TINY) then + row_count = row_count + 1 + row_vals(row_count) = H0 + row_cols(row_count) = j + end if + end do + + ! Sort this row's entries by column index (required for CSR) + call sort_row(row_vals, row_cols, row_count) + + ! Copy row data to CSR arrays (no bounds checking - we pre-allocated correctly) + do i = 1, row_count + count = count + 1 + acsr(count) = row_vals(i) + ja(count) = row_cols(i) + end do + + ! Update row pointer + ia(row+1) = count + 1 + end do parent_loop + + ra%len = count + + ! Trim arrays to actual size if we over-estimated + if (count < size(acsr)) then + write(*,'(A,I12,A,I12)') " Trimming arrays from ", size(acsr), " to ", count + call trim_csr_arrays(acsr, ja, count) + end if + + deallocate(row_vals, row_cols) + write(*,'(A,I12,A)') " CSR matrix built successfully. Actual nonzeros: ", count, "" end subroutine sparse_Hmatrix !!!################################################################################################# +!!!################################################################################################# +!!! Sort a row's entries by column index (simple insertion sort, rows are small) +!!!################################################################################################# + subroutine sort_row(vals, cols, n) + implicit none + integer(int12), intent(in) :: n + real(real12), dimension(n), intent(inout) :: vals + integer(int12), dimension(n), intent(inout) :: cols + integer(int12) :: i, j, temp_col + real(real12) :: temp_val + + do i = 2, n + temp_val = vals(i) + temp_col = cols(i) + j = i - 1 + do while (j >= 1) + if (cols(j) <= temp_col) exit + vals(j+1) = vals(j) + cols(j+1) = cols(j) + j = j - 1 + end do + vals(j+1) = temp_val + cols(j+1) = temp_col + end do + end subroutine sort_row +!!!################################################################################################# + +!!!################################################################################################# +!!! Trim CSR arrays to exact size +!!!################################################################################################# + subroutine trim_csr_arrays(acsr_arr, ja_arr, final_size) + implicit none + integer(int12), intent(in) :: final_size + real(real12), allocatable, dimension(:), intent(inout) :: acsr_arr + integer(int12), allocatable, dimension(:), intent(inout) :: ja_arr + real(real12), allocatable, dimension(:) :: temp_vals + integer(int12), allocatable, dimension(:) :: temp_cols + + allocate(temp_vals(final_size), temp_cols(final_size)) + temp_vals = acsr_arr(1:final_size) + temp_cols = ja_arr(1:final_size) + deallocate(acsr_arr, ja_arr) + allocate(acsr_arr(final_size), ja_arr(final_size)) + acsr_arr = temp_vals + ja_arr = temp_cols + deallocate(temp_vals, temp_cols) + end subroutine trim_csr_arrays +!!!################################################################################################# + !!!################################################################################################# !!! Check if the simulation will be stable. NOT FULLY IMPLEMENTED !!!################################################################################################# From b2b58f5f18dd4a451594c5de4b7cd28a2be40b86 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 18 Nov 2025 12:56:36 +0000 Subject: [PATCH 20/28] Fixed memory issues --- src/heatflow/mkl_pardiso.f90 | 452 ----------------------------- src/heatflow/mod_petsc_solver.f90 | 160 ++++++++-- src/heatflow/mod_setup.f90 | 1 - src/heatflow/mod_sparse_solver.f90 | 420 --------------------------- src/heatflow/mod_tempdep.f90 | 0 5 files changed, 141 insertions(+), 892 deletions(-) delete mode 100644 src/heatflow/mkl_pardiso.f90 mode change 100755 => 100644 src/heatflow/mod_setup.f90 delete mode 100644 src/heatflow/mod_sparse_solver.f90 mode change 100755 => 100644 src/heatflow/mod_tempdep.f90 diff --git a/src/heatflow/mkl_pardiso.f90 b/src/heatflow/mkl_pardiso.f90 deleted file mode 100644 index 0ff7527..0000000 --- a/src/heatflow/mkl_pardiso.f90 +++ /dev/null @@ -1,452 +0,0 @@ -!******************************************************************************* -! Copyright(C) 2004-2013 Intel Corporation. All Rights Reserved. -! -! The source code, information and material ("Material") contained herein is -! owned by Intel Corporation or its suppliers or licensors, and title to such -! Material remains with Intel Corporation or its suppliers or licensors. The -! Material contains proprietary information of Intel or its suppliers and -! licensors. The Material is protected by worldwide copyright laws and treaty -! provisions. No part of the Material may be used, copied, reproduced, -! modified, published, uploaded, posted, transmitted, distributed or disclosed -! in any way without Intel's prior express written permission. No license -! under any patent, copyright or other intellectual property rights in the -! Material is granted to or conferred upon you, either expressly, by -! implication, inducement, estoppel or otherwise. Any license under such -! intellectual property rights must be express and approved by Intel in -! writing. -! -! *Third Party trademarks are the property of their respective owners. -! -! Unless otherwise agreed by Intel in writing, you may not remove or alter -! this notice or any other notice embedded in Materials by Intel or Intel's -! suppliers or licensors in any way. -! -!******************************************************************************* -! Content : MKL PARDISO Fortran-90 header file -! -! Contains PARDISO routine definition. -! For CDECL use only. -! -!******************************************************************************* -!DEC$ IF .NOT. DEFINED( __MKL_PARDISO_F90 ) - -!DEC$ DEFINE __MKL_PARDISO_F90 - - MODULE MKL_PARDISO_PRIVATE - - TYPE MKL_PARDISO_HANDLE; INTEGER(KIND=8) DUMMY; END TYPE - - INTEGER, PARAMETER :: PARDISO_OOC_FILE_NAME = 1 - - END MODULE MKL_PARDISO_PRIVATE - - MODULE MKL_PARDISO - USE MKL_PARDISO_PRIVATE - -! -! Subroutine prototype for PARDISO -! - - INTERFACE PARDISO - SUBROUTINE PARDISO_S( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER, INTENT(IN) :: MAXFCT - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(IN) :: MTYPE - INTEGER, INTENT(IN) :: PHASE - INTEGER, INTENT(IN) :: N - INTEGER, INTENT(IN) :: IA(*) - INTEGER, INTENT(IN) :: JA(*) - INTEGER, INTENT(INOUT) :: PERM(*) - INTEGER, INTENT(IN) :: NRHS - INTEGER, INTENT(INOUT) :: IPARM(*) - INTEGER, INTENT(IN) :: MSGLVL - INTEGER, INTENT(OUT) :: ERROR - - REAL(KIND=4), INTENT(IN) :: A(*) - REAL(KIND=4), INTENT(INOUT) :: B(*) - REAL(KIND=4), INTENT(OUT) :: X(*) - END SUBROUTINE PARDISO_S - - SUBROUTINE PARDISO_D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER, INTENT(IN) :: MAXFCT - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(IN) :: MTYPE - INTEGER, INTENT(IN) :: PHASE - INTEGER, INTENT(IN) :: N - INTEGER, INTENT(IN) :: IA(*) - INTEGER, INTENT(IN) :: JA(*) - INTEGER, INTENT(INOUT) :: PERM(*) - INTEGER, INTENT(IN) :: NRHS - INTEGER, INTENT(INOUT) :: IPARM(*) - INTEGER, INTENT(IN) :: MSGLVL - INTEGER, INTENT(OUT) :: ERROR - - REAL(KIND=8), INTENT(IN) :: A(*) - REAL(KIND=8), INTENT(INOUT) :: B(*) - REAL(KIND=8), INTENT(OUT) :: X(*) - END SUBROUTINE PARDISO_D - - SUBROUTINE PARDISO_SC( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER, INTENT(IN) :: MAXFCT - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(IN) :: MTYPE - INTEGER, INTENT(IN) :: PHASE - INTEGER, INTENT(IN) :: N - INTEGER, INTENT(IN) :: IA(*) - INTEGER, INTENT(IN) :: JA(*) - INTEGER, INTENT(INOUT) :: PERM(*) - INTEGER, INTENT(IN) :: NRHS - INTEGER, INTENT(INOUT) :: IPARM(*) - INTEGER, INTENT(IN) :: MSGLVL - INTEGER, INTENT(OUT) :: ERROR - - COMPLEX(KIND=4), INTENT(IN) :: A(*) - COMPLEX(KIND=4), INTENT(INOUT) :: B(*) - COMPLEX(KIND=4), INTENT(OUT) :: X(*) - END SUBROUTINE PARDISO_SC - - SUBROUTINE PARDISO_DC( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER, INTENT(IN) :: MAXFCT - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(IN) :: MTYPE - INTEGER, INTENT(IN) :: PHASE - INTEGER, INTENT(IN) :: N - INTEGER, INTENT(IN) :: IA(*) - INTEGER, INTENT(IN) :: JA(*) - INTEGER, INTENT(INOUT) :: PERM(*) - INTEGER, INTENT(IN) :: NRHS - INTEGER, INTENT(INOUT) :: IPARM(*) - INTEGER, INTENT(IN) :: MSGLVL - INTEGER, INTENT(OUT) :: ERROR - COMPLEX(KIND=8), INTENT(IN) :: A(*) - COMPLEX(KIND=8), INTENT(INOUT) :: B(*) - COMPLEX(KIND=8), INTENT(OUT) :: X(*) - END SUBROUTINE PARDISO_DC - - - SUBROUTINE PARDISO_S_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER, INTENT(IN) :: MAXFCT - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(IN) :: MTYPE - INTEGER, INTENT(IN) :: PHASE - INTEGER, INTENT(IN) :: N - INTEGER, INTENT(IN) :: IA(*) - INTEGER, INTENT(IN) :: JA(*) - INTEGER, INTENT(INOUT) :: PERM(*) - INTEGER, INTENT(IN) :: NRHS - INTEGER, INTENT(INOUT) :: IPARM(*) - INTEGER, INTENT(IN) :: MSGLVL - INTEGER, INTENT(OUT) :: ERROR - REAL(KIND=4), INTENT(IN) :: A(*) - REAL(KIND=4), INTENT(INOUT) :: B(N,*) - REAL(KIND=4), INTENT(OUT) :: X(N,*) - END SUBROUTINE PARDISO_S_2D - - SUBROUTINE PARDISO_D_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER, INTENT(IN) :: MAXFCT - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(IN) :: MTYPE - INTEGER, INTENT(IN) :: PHASE - INTEGER, INTENT(IN) :: N - INTEGER, INTENT(IN) :: IA(*) - INTEGER, INTENT(IN) :: JA(*) - INTEGER, INTENT(INOUT) :: PERM(*) - INTEGER, INTENT(IN) :: NRHS - INTEGER, INTENT(INOUT) :: IPARM(*) - INTEGER, INTENT(IN) :: MSGLVL - INTEGER, INTENT(OUT) :: ERROR - REAL(KIND=8), INTENT(IN) :: A(*) - REAL(KIND=8), INTENT(INOUT) :: B(N,*) - REAL(KIND=8), INTENT(OUT) :: X(N,*) - END SUBROUTINE PARDISO_D_2D - - SUBROUTINE PARDISO_SC_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER, INTENT(IN) :: MAXFCT - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(IN) :: MTYPE - INTEGER, INTENT(IN) :: PHASE - INTEGER, INTENT(IN) :: N - INTEGER, INTENT(IN) :: IA(*) - INTEGER, INTENT(IN) :: JA(*) - INTEGER, INTENT(INOUT) :: PERM(*) - INTEGER, INTENT(IN) :: NRHS - INTEGER, INTENT(INOUT) :: IPARM(*) - INTEGER, INTENT(IN) :: MSGLVL - INTEGER, INTENT(OUT) :: ERROR - COMPLEX(KIND=4), INTENT(IN) :: A(*) - COMPLEX(KIND=4), INTENT(INOUT) :: B(N,*) - COMPLEX(KIND=4), INTENT(OUT) :: X(N,*) - END SUBROUTINE PARDISO_SC_2D - - SUBROUTINE PARDISO_DC_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER, INTENT(IN) :: MAXFCT - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(IN) :: MTYPE - INTEGER, INTENT(IN) :: PHASE - INTEGER, INTENT(IN) :: N - INTEGER, INTENT(IN) :: IA(*) - INTEGER, INTENT(IN) :: JA(*) - INTEGER, INTENT(INOUT) :: PERM(*) - INTEGER, INTENT(IN) :: NRHS - INTEGER, INTENT(INOUT) :: IPARM(*) - INTEGER, INTENT(IN) :: MSGLVL - INTEGER, INTENT(OUT) :: ERROR - COMPLEX(KIND=8), INTENT(IN) :: A(*) - COMPLEX(KIND=8), INTENT(INOUT) :: B(N,*) - COMPLEX(KIND=8), INTENT(OUT) :: X(N,*) - END SUBROUTINE PARDISO_DC_2D - END INTERFACE -! -! Subroutine prototype for PARDISO_64 -! -! Note: The pardiso_64 interface is not supported on IA-32 architecture. -! If called on IA-32, error = -12 is returned. -! - INTERFACE PARDISO_64 - SUBROUTINE PARDISO_S_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER(KIND=8), INTENT(IN) :: MAXFCT - INTEGER(KIND=8), INTENT(IN) :: MNUM - INTEGER(KIND=8), INTENT(IN) :: MTYPE - INTEGER(KIND=8), INTENT(IN) :: PHASE - INTEGER(KIND=8), INTENT(IN) :: N - INTEGER(KIND=8), INTENT(IN) :: IA(*) - INTEGER(KIND=8), INTENT(IN) :: JA(*) - INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) - INTEGER(KIND=8), INTENT(IN) :: NRHS - INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) - INTEGER(KIND=8), INTENT(IN) :: MSGLVL - INTEGER(KIND=8), INTENT(OUT) :: ERROR - REAL(KIND=4), INTENT(IN) :: A(*) - REAL(KIND=4), INTENT(INOUT) :: B(*) - REAL(KIND=4), INTENT(OUT) :: X(*) - END SUBROUTINE PARDISO_S_64 - - SUBROUTINE PARDISO_D_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER(KIND=8), INTENT(IN) :: MAXFCT - INTEGER(KIND=8), INTENT(IN) :: MNUM - INTEGER(KIND=8), INTENT(IN) :: MTYPE - INTEGER(KIND=8), INTENT(IN) :: PHASE - INTEGER(KIND=8), INTENT(IN) :: N - INTEGER(KIND=8), INTENT(IN) :: IA(*) - INTEGER(KIND=8), INTENT(IN) :: JA(*) - INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) - INTEGER(KIND=8), INTENT(IN) :: NRHS - INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) - INTEGER(KIND=8), INTENT(IN) :: MSGLVL - INTEGER(KIND=8), INTENT(OUT) :: ERROR - REAL(KIND=8), INTENT(IN) :: A(*) - REAL(KIND=8), INTENT(INOUT) :: B(*) - REAL(KIND=8), INTENT(OUT) :: X(*) - END SUBROUTINE PARDISO_D_64 - - SUBROUTINE PARDISO_SC_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER(KIND=8), INTENT(IN) :: MAXFCT - INTEGER(KIND=8), INTENT(IN) :: MNUM - INTEGER(KIND=8), INTENT(IN) :: MTYPE - INTEGER(KIND=8), INTENT(IN) :: PHASE - INTEGER(KIND=8), INTENT(IN) :: N - INTEGER(KIND=8), INTENT(IN) :: IA(*) - INTEGER(KIND=8), INTENT(IN) :: JA(*) - INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) - INTEGER(KIND=8), INTENT(IN) :: NRHS - INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) - INTEGER(KIND=8), INTENT(IN) :: MSGLVL - INTEGER(KIND=8), INTENT(OUT) :: ERROR - COMPLEX(KIND=4), INTENT(IN) :: A(*) - COMPLEX(KIND=4), INTENT(INOUT) :: B(*) - COMPLEX(KIND=4), INTENT(OUT) :: X(*) - END SUBROUTINE PARDISO_SC_64 - - SUBROUTINE PARDISO_DC_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER(KIND=8), INTENT(IN) :: MAXFCT - INTEGER(KIND=8), INTENT(IN) :: MNUM - INTEGER(KIND=8), INTENT(IN) :: MTYPE - INTEGER(KIND=8), INTENT(IN) :: PHASE - INTEGER(KIND=8), INTENT(IN) :: N - INTEGER(KIND=8), INTENT(IN) :: IA(*) - INTEGER(KIND=8), INTENT(IN) :: JA(*) - INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) - INTEGER(KIND=8), INTENT(IN) :: NRHS - INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) - INTEGER(KIND=8), INTENT(IN) :: MSGLVL - INTEGER(KIND=8), INTENT(OUT) :: ERROR - COMPLEX(KIND=8), INTENT(IN) :: A(*) - COMPLEX(KIND=8), INTENT(INOUT) :: B(*) - COMPLEX(KIND=8), INTENT(OUT) :: X(*) - END SUBROUTINE PARDISO_DC_64 - - SUBROUTINE PARDISO_S_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER(KIND=8), INTENT(IN) :: MAXFCT - INTEGER(KIND=8), INTENT(IN) :: MNUM - INTEGER(KIND=8), INTENT(IN) :: MTYPE - INTEGER(KIND=8), INTENT(IN) :: PHASE - INTEGER(KIND=8), INTENT(IN) :: N - INTEGER(KIND=8), INTENT(IN) :: IA(*) - INTEGER(KIND=8), INTENT(IN) :: JA(*) - INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) - INTEGER(KIND=8), INTENT(IN) :: NRHS - INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) - INTEGER(KIND=8), INTENT(IN) :: MSGLVL - INTEGER(KIND=8), INTENT(OUT) :: ERROR - REAL(KIND=4), INTENT(IN) :: A(*) - REAL(KIND=4), INTENT(INOUT) :: B(N,*) - REAL(KIND=4), INTENT(OUT) :: X(N,*) - END SUBROUTINE PARDISO_S_64_2D - - SUBROUTINE PARDISO_D_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER(KIND=8), INTENT(IN) :: MAXFCT - INTEGER(KIND=8), INTENT(IN) :: MNUM - INTEGER(KIND=8), INTENT(IN) :: MTYPE - INTEGER(KIND=8), INTENT(IN) :: PHASE - INTEGER(KIND=8), INTENT(IN) :: N - INTEGER(KIND=8), INTENT(IN) :: IA(*) - INTEGER(KIND=8), INTENT(IN) :: JA(*) - INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) - INTEGER(KIND=8), INTENT(IN) :: NRHS - INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) - INTEGER(KIND=8), INTENT(IN) :: MSGLVL - INTEGER(KIND=8), INTENT(OUT) :: ERROR - REAL(KIND=8), INTENT(IN) :: A(*) - REAL(KIND=8), INTENT(INOUT) :: B(N,*) - REAL(KIND=8), INTENT(OUT) :: X(N,*) - END SUBROUTINE PARDISO_D_64_2D - - SUBROUTINE PARDISO_SC_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER(KIND=8), INTENT(IN) :: MAXFCT - INTEGER(KIND=8), INTENT(IN) :: MNUM - INTEGER(KIND=8), INTENT(IN) :: MTYPE - INTEGER(KIND=8), INTENT(IN) :: PHASE - INTEGER(KIND=8), INTENT(IN) :: N - INTEGER(KIND=8), INTENT(IN) :: IA(*) - INTEGER(KIND=8), INTENT(IN) :: JA(*) - INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) - INTEGER(KIND=8), INTENT(IN) :: NRHS - INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) - INTEGER(KIND=8), INTENT(IN) :: MSGLVL - INTEGER(KIND=8), INTENT(OUT) :: ERROR - COMPLEX(KIND=4), INTENT(IN) :: A(*) - COMPLEX(KIND=4), INTENT(INOUT) :: B(N,*) - COMPLEX(KIND=4), INTENT(OUT) :: X(N,*) - END SUBROUTINE PARDISO_SC_64_2D - - SUBROUTINE PARDISO_DC_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER(KIND=8), INTENT(IN) :: MAXFCT - INTEGER(KIND=8), INTENT(IN) :: MNUM - INTEGER(KIND=8), INTENT(IN) :: MTYPE - INTEGER(KIND=8), INTENT(IN) :: PHASE - INTEGER(KIND=8), INTENT(IN) :: N - INTEGER(KIND=8), INTENT(IN) :: IA(*) - INTEGER(KIND=8), INTENT(IN) :: JA(*) - INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) - INTEGER(KIND=8), INTENT(IN) :: NRHS - INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) - INTEGER(KIND=8), INTENT(IN) :: MSGLVL - INTEGER(KIND=8), INTENT(OUT) :: ERROR - COMPLEX(KIND=8), INTENT(IN) :: A(*) - COMPLEX(KIND=8), INTENT(INOUT) :: B(N,*) - COMPLEX(KIND=8), INTENT(OUT) :: X(N,*) - END SUBROUTINE PARDISO_DC_64_2D - - END INTERFACE - - INTERFACE - - SUBROUTINE PARDISOINIT(PT, MTYPE, IPARM) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(OUT) :: PT(*) - INTEGER, INTENT(IN) :: MTYPE - INTEGER, INTENT(OUT) :: IPARM(*) - END SUBROUTINE PARDISOINIT - - END INTERFACE - - INTERFACE PARDISO_GET - - FUNCTION PARDISO_GETENV(PT, OptName, StrVal) - USE MKL_PARDISO_PRIVATE - INTEGER PARDISO_GETENV - TYPE(MKL_PARDISO_HANDLE), INTENT(IN) :: PT(*) - INTEGER, INTENT(IN) :: OptName - CHARACTER(*), INTENT(OUT) :: StrVal - END FUNCTION PARDISO_GETENV - - END INTERFACE - - INTERFACE PARDISO_SET - - FUNCTION PARDISO_SETENV(PT, OptName, StrVal) - USE MKL_PARDISO_PRIVATE - INTEGER PARDISO_SETENV - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER, INTENT(IN) :: OptName - INTEGER, INTENT(IN) :: StrVal(*) - END FUNCTION PARDISO_SETENV - - END INTERFACE - - INTERFACE PARDISO_PIV - - FUNCTION MKL_PARDISO_PIVOT( AII, BII, EPS) - REAL(KIND=8) :: AII, BII, EPS - INTEGER MKL_PARDISO_PIVOT - END - END INTERFACE PARDISO_PIV - - INTERFACE PARDISO_GETDIAG - - SUBROUTINE PARDISO_GETDIAG_D(PT, DIAG_FACT, DIAG_A, MNUM, EPS) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - REAL(KIND=8), INTENT(INOUT) :: DIAG_FACT, DIAG_A - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(INOUT) :: EPS - END - - SUBROUTINE PARDISO_GETDIAG_Z(PT, DIAG_FACT, DIAG_A, MNUM, EPS) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - COMPLEX(KIND=8), INTENT(INOUT) :: DIAG_FACT, DIAG_A - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(INOUT) :: EPS - END - - END INTERFACE PARDISO_GETDIAG - - - - - END MODULE MKL_PARDISO - -!DEC$ ENDIF diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90 index e45ce2e..f0c061f 100644 --- a/src/heatflow/mod_petsc_solver.f90 +++ b/src/heatflow/mod_petsc_solver.f90 @@ -2,6 +2,7 @@ module petsc_solver #include "petsc/finclude/petscsys.h" #include "petsc/finclude/petscksp.h" use petscksp + use iso_c_binding implicit none private public :: petsc_init, petsc_finalize, solve_petsc_csr, petsc_cleanup @@ -53,37 +54,70 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) PC :: pc integer :: ierr, i, row_nz, start_k, its integer, allocatable :: cols0(:), idx(:), d_nnz(:) + PetscInt :: idx_array(1) + PetscScalar :: val_array(1) real(8), allocatable :: vals(:) - real(8), pointer :: xptr(:) - KSPConvergedReason :: reason real(8) :: rnorm logical :: rebuild_needed + write(*,'(A,I0)') ' [DEBUG] Entered solve_petsc_csr, n=', n + call flush(6) + if (size(ia) /= n+1) stop 'solve_petsc_csr: ia size mismatch' if (size(b) /= n .or. size(x) /= n) stop 'solve_petsc_csr: vector size mismatch' + write(*,'(A)') ' [DEBUG] Size checks passed' + call flush(6) + ! Determine if we need to rebuild the matrix structure rebuild_needed = .false. if (.not. initialized) rebuild_needed = .true. if (n /= n_saved) rebuild_needed = .true. + write(*,'(A,L1)') ' [DEBUG] rebuild_needed=', rebuild_needed + call flush(6) + ! Create PETSc objects on first call or if size changed if (rebuild_needed) then + write(*,'(A)') ' [DEBUG] Starting PETSc object creation...' + call flush(6) + ! Clean up old objects if they exist if (initialized) call petsc_cleanup() + write(*,'(A)') ' [DEBUG] Preallocating matrix...' + call flush(6) + ! Preallocate matrix with exact nonzeros per row (saves memory) allocate(d_nnz(n)) do i = 1, n d_nnz(i) = ia(i+1) - ia(i) end do + + write(*,'(A,I0,A,I0)') ' [DEBUG] Creating matrix: n=', n, ', max_nnz/row=', maxval(d_nnz) + call flush(6) + + ! Create matrix with exact preallocation (most memory-efficient) call MatCreateSeqAIJ(PETSC_COMM_SELF, n, n, 0, d_nnz, A_saved, ierr) + if (ierr /= 0) then + write(0,*) "ERROR: MatCreateSeqAIJ failed with ierr=", ierr + write(0,*) " Matrix size may exceed system limits" + write(0,*) " n=", n, ", nnz=", sum(int(d_nnz,8)), ", max_nnz/row=", maxval(d_nnz) + stop + end if + + write(*,'(A)') ' [DEBUG] Matrix created successfully' + call flush(6) + deallocate(d_nnz) ! Create persistent vectors call VecCreateSeq(PETSC_COMM_SELF, n, bb_saved, ierr) call VecCreateSeq(PETSC_COMM_SELF, n, xx_saved, ierr) + write(*,'(A)') ' [DEBUG] Vectors created' + call flush(6) + ! Create and configure KSP solver (persistent across timesteps) call KSPCreate(PETSC_COMM_SELF, ksp_saved, ierr) call KSPSetOperators(ksp_saved, A_saved, A_saved, ierr) @@ -100,6 +134,8 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) end if ! Update matrix values (always needed each timestep) + write(*,'(A)') ' Updating PETSc matrix values...' + call flush(6) call MatZeroEntries(A_saved, ierr) do i = 1, n row_nz = ia(i+1) - ia(i) @@ -115,47 +151,133 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) deallocate(cols0, vals) end if end do + write(*,'(A)') ' Matrix assembly beginning...' + call flush(6) call MatAssemblyBegin(A_saved, MAT_FINAL_ASSEMBLY, ierr) call MatAssemblyEnd(A_saved, MAT_FINAL_ASSEMBLY, ierr) + write(*,'(A)') ' Matrix assembly complete.' + call flush(6) ! Optional: Verify matrix assembly (uncomment for debugging) ! call MatView(A_saved, PETSC_VIEWER_STDOUT_SELF, ierr) - ! Update RHS vector - allocate(idx(n)) - idx = [(i-1, i=1,n)] - call VecSetValues(bb_saved, n, idx, b, INSERT_VALUES, ierr) + ! Update RHS vector in batches to avoid memory issues with huge systems + write(*,'(A)') ' Updating RHS vector...' + call flush(6) + block + integer, parameter :: VEC_CHUNK = 1000000 + integer :: vec_start, vec_end, vec_len, k + integer, allocatable :: idx_vec(:) + + do vec_start = 1, n, VEC_CHUNK + vec_end = min(vec_start + VEC_CHUNK - 1, n) + vec_len = vec_end - vec_start + 1 + + allocate(idx_vec(vec_len)) + idx_vec = [(vec_start + k - 2, k=1,vec_len)] ! 0-based indices + + call VecSetValues(bb_saved, vec_len, idx_vec, b(vec_start:vec_end), INSERT_VALUES, ierr) + if (ierr /= 0) then + write(0,*) "ERROR: VecSetValues(bb) failed at", vec_start, "ierr=", ierr + stop + end if + + deallocate(idx_vec) + end do + end block call VecAssemblyBegin(bb_saved,ierr); call VecAssemblyEnd(bb_saved,ierr) + write(*,'(A)') ' RHS vector complete.' + call flush(6) - ! Update initial guess - call VecSetValues(xx_saved, n, idx, x, INSERT_VALUES, ierr) + ! Update initial guess in batches + write(*,'(A)') ' Updating initial guess vector...' + call flush(6) + block + integer, parameter :: VEC_CHUNK = 1000000 + integer :: vec_start, vec_end, vec_len, k + integer, allocatable :: idx_vec(:) + + do vec_start = 1, n, VEC_CHUNK + vec_end = min(vec_start + VEC_CHUNK - 1, n) + vec_len = vec_end - vec_start + 1 + + allocate(idx_vec(vec_len)) + idx_vec = [(vec_start + k - 2, k=1,vec_len)] ! 0-based indices + + call VecSetValues(xx_saved, vec_len, idx_vec, x(vec_start:vec_end), INSERT_VALUES, ierr) + if (ierr /= 0) then + write(0,*) "ERROR: VecSetValues(xx) failed at", vec_start, "ierr=", ierr + stop + end if + + deallocate(idx_vec) + end do + end block call VecAssemblyBegin(xx_saved,ierr); call VecAssemblyEnd(xx_saved,ierr) - deallocate(idx) ! Solve the system (reusing persistent KSP) + write(*,'(A,I0,A)') ' Solving linear system with n=', n, ' unknowns...' + call flush(6) call KSPSolve(ksp_saved, bb_saved, xx_saved, ierr) - ! Check convergence - call KSPGetConvergedReason(ksp_saved, reason, ierr) + write(*,'(A)') ' KSPSolve completed, checking status...' + call flush(6) + + if (ierr /= 0) then + write(0,*) "ERROR: KSPSolve failed with error code:", ierr + stop + end if + + ! Check convergence - Note: KSPConvergedReason type changed in PETSc 3.24 + ! Simplified error checking without explicit reason query call KSPGetIterationNumber(ksp_saved, its, ierr) call KSPGetResidualNorm(ksp_saved, rnorm, ierr) ! Report convergence status (commented out by default for performance) ! Uncomment the next line to see convergence info every solve: - ! write(*,'(A,I0,A,ES12.5,A,I0)') ' PETSc: iterations=', its, ', residual=', rnorm, ', reason=', reason + ! write(*,'(A,I0,A,ES12.5)') ' PETSc: iterations=', its, ', residual=', rnorm - if (reason < 0) then - write(0,*) "WARNING: PETSc solver diverged or failed!" - write(0,*) " Reason code:", reason + ! Basic divergence check via error code from solve + if (ierr /= 0) then + write(0,*) "WARNING: PETSc solver returned non-zero error code!" + write(0,*) " Error code:", ierr write(0,*) " Iterations:", its write(0,*) " Residual norm:", rnorm ! Don't stop - let the main code detect NaNs if needed end if - ! Extract solution - call VecGetArrayF90(xx_saved, xptr, ierr) - x = xptr - call VecRestoreArrayF90(xx_saved, xptr, ierr) + ! Extract solution vector using batched VecGetValues + ! Process in chunks for better performance on large systems + block + integer, parameter :: CHUNK_SIZE = 100000 + PetscInt, allocatable :: idx_batch(:) + PetscScalar, allocatable :: val_batch(:) + integer :: i_start, i_end, chunk_len, j + + do i_start = 1, n, CHUNK_SIZE + i_end = min(i_start + CHUNK_SIZE - 1, n) + chunk_len = i_end - i_start + 1 + + allocate(idx_batch(chunk_len), val_batch(chunk_len)) + + ! Build index array (0-based for PETSc) + do j = 1, chunk_len + idx_batch(j) = i_start + j - 2 ! -1 for 0-based, -1 more for offset + end do + + ! Get chunk of values + call VecGetValues(xx_saved, chunk_len, idx_batch, val_batch, ierr) + if (ierr /= 0) then + write(0,*) "ERROR: VecGetValues failed at chunk starting", i_start, "ierr=", ierr + stop + end if + + ! Copy to output array + x(i_start:i_end) = val_batch(1:chunk_len) + + deallocate(idx_batch, val_batch) + end do + end block end subroutine solve_petsc_csr diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90 old mode 100755 new mode 100644 index 8c51b62..1d7145a --- a/src/heatflow/mod_setup.f90 +++ b/src/heatflow/mod_setup.f90 @@ -24,7 +24,6 @@ module setup use globe_data, only: acsr, ja, ia use solver, only: SRSin use materials, only: material - use sparse_solver, only: coo2csr implicit none contains diff --git a/src/heatflow/mod_sparse_solver.f90 b/src/heatflow/mod_sparse_solver.f90 deleted file mode 100644 index 8f7957c..0000000 --- a/src/heatflow/mod_sparse_solver.f90 +++ /dev/null @@ -1,420 +0,0 @@ -module sparse_solver - - use mkl_pardiso - implicit none - - private - - public :: coo2csr, & - bicgstab, & - solve_pardiso - -contains - - subroutine coo2csr( nrow, & - nnz, & - a, & - ir, & - jc, & - acsr, & - ja, & - ia ) - - !--------------------------------------------------------------------------! - !! coocsr converts coo to csr. - ! - ! discussion: - ! - ! this routine converts a matrix that is stored in coo coordinate format - ! a, ir, jc into a csr row general sparse acsr, ja, ia format. - ! - ! parameters: - ! - ! input, integer nrow, the row dimension of the matrix. - ! - ! input, integer nnz, the number of nonzero elements in the matrix. - ! - ! a, - ! ir, - ! jc = matrix in coordinate format. a(k), ir(k), jc(k) store the nnz - ! nonzero elements of the matrix with a(k) = actual real value of - ! the elements, ir(k) = its row number and jc(k) = its column - ! number. the order of the elements is arbitrary. - ! - ! on return: - ! - ! ir is destroyed - ! - ! output, real acsr(nnz), ja(nnz), ia(nrow+1), the matrix in csr - ! compressed sparse row format. - !--------------------------------------------------------------------------! - - ! Arguments. - integer (kind=8), intent(in) :: nrow - integer (kind=8), intent(in) :: nnz - real(8), dimension(nnz), intent(in) :: a - integer (kind=8), dimension(nnz), intent(in) :: ir - integer (kind=8), dimension(nnz), intent(in) :: jc - real(8), dimension(nnz), intent(out) :: acsr - integer(kind=8), dimension(nnz), intent(out) :: ja - integer(kind=8), dimension(nrow+1), intent(out) :: ia - - ! Local variables. - integer (kind=8) :: i, iad, j, k, k0, row_start - integer (kind=8) :: dup_count - real(kind=8) :: x - logical :: found_dup - integer(kind=8), dimension(nrow+1) :: ia_save ! Save original row starts - - ia(1:nrow+1) = 0 - dup_count = 0 - - ! determine the row lengths. - - do k = 1, nnz - ia(ir(k)) = ia(ir(k)) + 1 - end do - - ! the starting position of each row. - - k = 1 - - do j = 1, nrow + 1 - - k0 = ia(j) - ia(j) = k - k = k + k0 - - end do - - ! Save the original row pointers - ia_save = ia - - ! go through the structure once more. fill in output matrix. - ! This version handles duplicate (i,j) entries by summing them. - - do k = 1, nnz - - i = ir(k) - j = jc(k) - x = a(k) - - ! Search for existing entry in this row with same column - found_dup = .false. - row_start = ia_save(i) - do iad = row_start, ia(i)-1 - if (ja(iad) == j) then - ! Found duplicate - sum the values - acsr(iad) = acsr(iad) + x - found_dup = .true. - dup_count = dup_count + 1 - exit - end if - end do - - if (.not. found_dup) then - ! New entry - insert it - iad = ia(i) - acsr(iad) = x - ja(iad) = j - ia(i) = iad + 1 - end if - - end do - - if (dup_count > 0) then - write(*,'(A,I0,A)') 'WARNING: COO->CSR found and summed ', dup_count, ' duplicate entries' - end if - - ! shift back ia. - - do j = nrow, 1, -1 - ia(j+1) = ia(j) - end do - - ia(1) = 1 - - return - - end subroutine coo2csr - - !------------------------------------------------------------------- - ! BiConjugate Gradient (Stabilised) Method - !------------------------------------------------------------------- - - subroutine bicgstab( acsr, & - ia, & - ja, & - b, & - maxiter, & - initGuess, & - x, & - iter ) - - ! Arguments - real(8), dimension(:), intent(in) :: acsr - integer (kind=8), dimension(:), intent(in) :: ia - integer (kind=8), dimension(:), intent(in) :: ja - real(8), dimension(:), intent(in) :: b - integer, intent(in) :: maxiter - real(8), dimension(:), intent(in) :: initGuess - real(8), dimension(:), allocatable, intent(out) :: x - integer, intent(out) :: iter - - ! Local variables - integer :: i, j, k, n - real(8), parameter :: cc = 1.0e-9 - real(8) :: alpha,beta,delta0,delta,delta_old,omega - real(8), dimension(:), allocatable :: r, p, s, rst, temp1, temp2 - - n = size(b,1) - allocate(x(n)) - allocate(r(n)) - allocate(p(n)) - allocate(s(n)) - allocate(rst(n)) - allocate(temp1(n)) - allocate(temp2(n)) - - call mkl_dcsrgemv("N",n,acsr,ia,ja,x,temp1) - - print *, "Initial residual norm: ", norm2(b - temp1) - r = b - temp1 - - call random_number(rst) - - p = r - - delta = dot_product(rst,r) - - write(*,'(a,1x,f15.3)') "Starting delta: ", delta - - delta0 = delta - - do i = 1, maxiter - print *, "Iteration ", i - if ( norm2(r) /= norm2(r) ) then - write(*,'(a)') "Error in solver: residual NaN" - exit - end if - - if(mod(i,1000).eq.0) then - write(*,'(a,1x,i6)') 'Iteration number: ',i - write(*,'(a,1x,f15.3)') "Residual ratio: ", norm2(r)/cc - end if - - call mkl_dcsrgemv("N",n,acsr,ia,ja,p,temp1) ! temp1=A*p - - alpha = delta/dot_product(rst,temp1) - s = r - alpha*temp1 - - call mkl_dcsrgemv("N",n,acsr,ia,ja,s,temp2) ! temp2=A*s - - omega = dot_product(s,temp2)/dot_product(temp2,temp2) - x = x + alpha*p + omega*s - r = s - omega*temp2 - delta_old = delta - delta = dot_product(rst,r) - beta = (delta/delta_old)*(alpha/omega) - p = r + beta*(p - omega*temp1) - - if(norm2(r) .lt. cc) then - iter = i - return - end if - - if(i.eq.maxiter) then - write(*,'(a)') "Maximum iterations reached." - write(*,'(a)') "Convergence not achieved." - write(*,'(a,1x,f15.3)') "Norm of residual: ", norm2(r) - write(*,'(a,1x,f15.3)') "Convergence criterion: ", cc - if((norm2(r)/cc) .lt. 2.d0) then - write(*,'(a)') "The residual is within a small",& - "range of the convergence criterion." - write(*,'(a)') "Perhaps increasing iteration ", & - "count may help." - end if - end if - - end do - - end subroutine bicgstab - - !------------------------------------------------------------------- - ! END BiConjugate Gradient (Stabilised) Method - !------------------------------------------------------------------- - - !------------------------------------------------------------------- - ! PARDISO Direct Solver - !------------------------------------------------------------------- - - subroutine solve_pardiso( acsr, & - b, & - ia, & - ja, & - x ) - - use mkl_pardiso - - ! Arguments - real(8), dimension(:), intent(in) :: acsr - real(8), dimension(:), intent(inout) :: b - integer,dimension(:), intent(in) :: ia - integer,dimension(:), intent(in) :: ja - real(8), dimension(:), allocatable, intent(out) :: x - - ! Local variables - type(mkl_pardiso_handle), dimension(:), allocatable :: pt - integer :: i,maxfct,mnum,mtype,phase,n,nrhs,error,msglvl,nnz,error1 - integer, dimension(:), allocatable :: iparm - integer,dimension(1) :: idum - real(8),dimension(1) :: ddum - integer :: badcol, missing_diag, k - logical :: found - n = size(b,1) - nnz = size(acsr,1) - nrhs = 1 - maxfct = 1 - mnum = 1 - - if (.not.(allocated(x))) allocate(x(n)) - - - - allocate(iparm(64)) !set up pardiso control parameter - - do i=1,64 - iparm(i) = 0 - end do - - iparm(1) = 1 ! no solver default - iparm(2) = 2 ! fill-in reordering from metis - iparm(4) = 0 ! no iterative-direct algorithm - iparm(5) = 0 ! no user fill-in reducing permutation - iparm(6) = 0 ! =0 solution on the first n compoments of x - iparm(8) = 2 ! numbers of iterative refinement steps - iparm(10) = 13 ! perturbe the pivot elements with 1e-13 - iparm(11) = 1 ! use nonsymmetric permutation and scaling mps - iparm(13) = 0 ! maximum weighted matching algorithm is - !switched-off (default for symmetric). - ! try iparm(13) = 1 in case of inaccuracy - iparm(14) = 0 ! output: number of perturbed pivots - iparm(18) = -1 ! output: number of nonzeros in the factor lu - iparm(19) = -1 ! output: mflops for lu factorization - iparm(20) = 0 ! output: numbers of cg iterations - - error = 0 ! initialize error flag - msglvl = 0 ! 0=no output, 1=print statistical information - mtype = 11 ! real and unsymmetric matrix - - ! Initiliaze the internal solver memory pointer. - ! This is only necessary for the first call of the solver. - - allocate (pt(64)) - do i=1,64 - pt(i)%dummy = 0 - end do - - !---------------- CSR integrity / diagnostic checks ---------------- - - badcol = 0 - - write(*,*) 'PARDISO debug:' - write(*,*) ' n =', n - write(*,*) ' nnz =', nnz - write(*,*) ' ia(1) =', ia(1), ' ia(n+1)=', ia(n+1), ' ia(n+1)-1=', ia(n+1)-1 - - if (ia(1) /= 1) stop 'ERROR: ia(1) must be 1' - if (ia(n+1)-1 /= nnz) stop 'ERROR: ia end mismatch' - - do i=1,n - if (ia(i) > ia(i+1)) then - write(*,*) 'Row pointer decreases at row', i - stop 'ERROR: ia not monotone' - end if - end do - - do k=1,nnz - if (ja(k) < 1 .or. ja(k) > n) then - badcol = badcol + 1 - if (badcol <= 10) write(*,*) 'Bad column index k=',k,' ja=',ja(k) - end if - end do - if (badcol > 0) then - write(*,*) 'Total bad columns =', badcol - stop 'ERROR: invalid ja entries' - end if - - ! Check each row has a diagonal and (optionally) detect duplicates - missing_diag = 0 - do i=1,n - found = .false. - if (ia(i) < ia(i+1)) then - ! simple duplicate check (requires row segment unsorted ascending to be meaningful) - do k = ia(i), ia(i+1)-1 - if (ja(k) == i) then - if (acsr(k) == 0.0d0) then - write(*,*) 'Zero diagonal at row', i - stop 'ERROR: zero diagonal' - end if - found = .true. - end if - end do - end if - if (.not. found) then - missing_diag = missing_diag + 1 - if (missing_diag <= 10) write(*,*) 'Missing diagonal at row', i - end if - end do - if (missing_diag > 0) stop 'ERROR: missing diagonals' - !------------------------------------------------------------------- - phase = 11 ! Only reordering and symbolic factorization - - call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, & - idum, nrhs, iparm, msglvl, ddum, ddum, error) - - if (error /= 0) then - write(*,*) 'the following error was detected: ', error - goto 1000 - end if - - phase = 22 ! only factorization - call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, & - idum, nrhs, iparm, msglvl, ddum, ddum, error) - if (error /= 0) then - write(*,*) 'the following error was detected: ', error - goto 1000 - endif - - ! back substitution and iterative refinement - iparm(8) = 2 ! max numbers of iterative refinement steps - phase = 33 ! only solving - call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, & - idum, nrhs, iparm, msglvl, b, x, error) - write(*,*) 'solve completed ... ' - if (error /= 0) then - write(*,*) 'the following error was detected: ', error - goto 1000 - endif - -1000 continue - ! termination and release of memory - phase = -1 ! release internal memory - call pardiso (pt,maxfct,mnum,mtype,phase,n,ddum,idum,idum, & - idum, nrhs, iparm, msglvl, ddum, ddum, error1) - - if (error1 /= 0) then - write(*,*) 'the following release error was detected: ', & - error1 - stop 1 - endif - - if ( error /= 0 ) stop 1 - - end subroutine solve_pardiso - - !------------------------------------------------------------------- - ! END PARDISO Direct Solver - !------------------------------------------------------------------- - -end module sparse_solver diff --git a/src/heatflow/mod_tempdep.f90 b/src/heatflow/mod_tempdep.f90 old mode 100755 new mode 100644 From 8687aafc8b91516b577292c3816975ff4e8e44d7 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 18 Nov 2025 16:17:17 +0000 Subject: [PATCH 21/28] Enable threading --- src/heatflow.f90 | 9 +++- src/heatflow/mod_petsc_solver.f90 | 85 +++++-------------------------- src/heatflow/mod_setup.f90 | 2 + 3 files changed, 23 insertions(+), 73 deletions(-) diff --git a/src/heatflow.f90 b/src/heatflow.f90 index d1db700..6817c67 100644 --- a/src/heatflow.f90 +++ b/src/heatflow.f90 @@ -34,6 +34,12 @@ program HEATFLOW_V0_3 real(real12) :: cpustart, cpuend, cpustart2, progress integer(int12) :: itime + !-------------------------------------------------------------! + ! Initialize PETSc FIRST (before any other operations) ! + !-------------------------------------------------------------! + CALL petsc_init() + !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^! + !-------------------------------------------------------------! ! calculate the time to run full simulation ! !-------------------------------------------------------------! @@ -65,7 +71,6 @@ program HEATFLOW_V0_3 !-------------------------------------------------------------! ! run simulation for 'ntime' time steps ! !-------------------------------------------------------------! - CALL petsc_init() do itime=1,ntime @@ -99,7 +104,7 @@ program HEATFLOW_V0_3 !-------------------------------------------------------------! ! calculate end time and print to user ! !-------------------------------------------------------------! - CALL cpu_time(cpuend) + CALL cpu_time(cpuend) write(*,'(A,F12.6)') ' time=', cpuend-cpustart !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^! diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90 index f0c061f..5a16b4b 100644 --- a/src/heatflow/mod_petsc_solver.f90 +++ b/src/heatflow/mod_petsc_solver.f90 @@ -60,33 +60,33 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) real(8) :: rnorm logical :: rebuild_needed - write(*,'(A,I0)') ' [DEBUG] Entered solve_petsc_csr, n=', n - call flush(6) + ! write(*,'(A,I0)') ' [DEBUG] Entered solve_petsc_csr, n=', n + ! call flush(6) if (size(ia) /= n+1) stop 'solve_petsc_csr: ia size mismatch' if (size(b) /= n .or. size(x) /= n) stop 'solve_petsc_csr: vector size mismatch' - write(*,'(A)') ' [DEBUG] Size checks passed' - call flush(6) + ! write(*,'(A)') ' [DEBUG] Size checks passed' + ! call flush(6) ! Determine if we need to rebuild the matrix structure rebuild_needed = .false. if (.not. initialized) rebuild_needed = .true. if (n /= n_saved) rebuild_needed = .true. - write(*,'(A,L1)') ' [DEBUG] rebuild_needed=', rebuild_needed - call flush(6) + ! write(*,'(A,L1)') ' [DEBUG] rebuild_needed=', rebuild_needed + ! call flush(6) ! Create PETSc objects on first call or if size changed if (rebuild_needed) then - write(*,'(A)') ' [DEBUG] Starting PETSc object creation...' - call flush(6) + ! write(*,'(A)') ' [DEBUG] Starting PETSc object creation...' + ! call flush(6) ! Clean up old objects if they exist if (initialized) call petsc_cleanup() - write(*,'(A)') ' [DEBUG] Preallocating matrix...' - call flush(6) + ! write(*,'(A)') ' [DEBUG] Preallocating matrix...' + ! call flush(6) ! Preallocate matrix with exact nonzeros per row (saves memory) allocate(d_nnz(n)) @@ -94,8 +94,8 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) d_nnz(i) = ia(i+1) - ia(i) end do - write(*,'(A,I0,A,I0)') ' [DEBUG] Creating matrix: n=', n, ', max_nnz/row=', maxval(d_nnz) - call flush(6) + ! write(*,'(A,I0,A,I0)') ' [DEBUG] Creating matrix: n=', n, ', max_nnz/row=', maxval(d_nnz) + ! call flush(6) ! Create matrix with exact preallocation (most memory-efficient) call MatCreateSeqAIJ(PETSC_COMM_SELF, n, n, 0, d_nnz, A_saved, ierr) @@ -106,18 +106,12 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) stop end if - write(*,'(A)') ' [DEBUG] Matrix created successfully' - call flush(6) - deallocate(d_nnz) ! Create persistent vectors call VecCreateSeq(PETSC_COMM_SELF, n, bb_saved, ierr) call VecCreateSeq(PETSC_COMM_SELF, n, xx_saved, ierr) - write(*,'(A)') ' [DEBUG] Vectors created' - call flush(6) - ! Create and configure KSP solver (persistent across timesteps) call KSPCreate(PETSC_COMM_SELF, ksp_saved, ierr) call KSPSetOperators(ksp_saved, A_saved, A_saved, ierr) @@ -134,8 +128,6 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) end if ! Update matrix values (always needed each timestep) - write(*,'(A)') ' Updating PETSc matrix values...' - call flush(6) call MatZeroEntries(A_saved, ierr) do i = 1, n row_nz = ia(i+1) - ia(i) @@ -151,19 +143,13 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) deallocate(cols0, vals) end if end do - write(*,'(A)') ' Matrix assembly beginning...' - call flush(6) call MatAssemblyBegin(A_saved, MAT_FINAL_ASSEMBLY, ierr) call MatAssemblyEnd(A_saved, MAT_FINAL_ASSEMBLY, ierr) - write(*,'(A)') ' Matrix assembly complete.' - call flush(6) ! Optional: Verify matrix assembly (uncomment for debugging) ! call MatView(A_saved, PETSC_VIEWER_STDOUT_SELF, ierr) ! Update RHS vector in batches to avoid memory issues with huge systems - write(*,'(A)') ' Updating RHS vector...' - call flush(6) block integer, parameter :: VEC_CHUNK = 1000000 integer :: vec_start, vec_end, vec_len, k @@ -177,21 +163,12 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) idx_vec = [(vec_start + k - 2, k=1,vec_len)] ! 0-based indices call VecSetValues(bb_saved, vec_len, idx_vec, b(vec_start:vec_end), INSERT_VALUES, ierr) - if (ierr /= 0) then - write(0,*) "ERROR: VecSetValues(bb) failed at", vec_start, "ierr=", ierr - stop - end if - deallocate(idx_vec) end do end block call VecAssemblyBegin(bb_saved,ierr); call VecAssemblyEnd(bb_saved,ierr) - write(*,'(A)') ' RHS vector complete.' - call flush(6) ! Update initial guess in batches - write(*,'(A)') ' Updating initial guess vector...' - call flush(6) block integer, parameter :: VEC_CHUNK = 1000000 integer :: vec_start, vec_end, vec_len, k @@ -205,49 +182,23 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) idx_vec = [(vec_start + k - 2, k=1,vec_len)] ! 0-based indices call VecSetValues(xx_saved, vec_len, idx_vec, x(vec_start:vec_end), INSERT_VALUES, ierr) - if (ierr /= 0) then - write(0,*) "ERROR: VecSetValues(xx) failed at", vec_start, "ierr=", ierr - stop - end if - deallocate(idx_vec) end do end block call VecAssemblyBegin(xx_saved,ierr); call VecAssemblyEnd(xx_saved,ierr) - ! Solve the system (reusing persistent KSP) - write(*,'(A,I0,A)') ' Solving linear system with n=', n, ' unknowns...' - call flush(6) + ! Solve the system call KSPSolve(ksp_saved, bb_saved, xx_saved, ierr) - write(*,'(A)') ' KSPSolve completed, checking status...' - call flush(6) - if (ierr /= 0) then write(0,*) "ERROR: KSPSolve failed with error code:", ierr stop end if - ! Check convergence - Note: KSPConvergedReason type changed in PETSc 3.24 - ! Simplified error checking without explicit reason query call KSPGetIterationNumber(ksp_saved, its, ierr) call KSPGetResidualNorm(ksp_saved, rnorm, ierr) - - ! Report convergence status (commented out by default for performance) - ! Uncomment the next line to see convergence info every solve: - ! write(*,'(A,I0,A,ES12.5)') ' PETSc: iterations=', its, ', residual=', rnorm - - ! Basic divergence check via error code from solve - if (ierr /= 0) then - write(0,*) "WARNING: PETSc solver returned non-zero error code!" - write(0,*) " Error code:", ierr - write(0,*) " Iterations:", its - write(0,*) " Residual norm:", rnorm - ! Don't stop - let the main code detect NaNs if needed - end if ! Extract solution vector using batched VecGetValues - ! Process in chunks for better performance on large systems block integer, parameter :: CHUNK_SIZE = 100000 PetscInt, allocatable :: idx_batch(:) @@ -260,19 +211,11 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) allocate(idx_batch(chunk_len), val_batch(chunk_len)) - ! Build index array (0-based for PETSc) do j = 1, chunk_len - idx_batch(j) = i_start + j - 2 ! -1 for 0-based, -1 more for offset + idx_batch(j) = i_start + j - 2 end do - ! Get chunk of values call VecGetValues(xx_saved, chunk_len, idx_batch, val_batch, ierr) - if (ierr /= 0) then - write(0,*) "ERROR: VecGetValues failed at chunk starting", i_start, "ierr=", ierr - stop - end if - - ! Copy to output array x(i_start:i_end) = val_batch(1:chunk_len) deallocate(idx_batch, val_batch) diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90 index 1d7145a..0a25b00 100644 --- a/src/heatflow/mod_setup.f90 +++ b/src/heatflow/mod_setup.f90 @@ -26,6 +26,8 @@ module setup use materials, only: material implicit none + public :: set_global_variables + contains From 90f435425f90d58a9fd300803c57db66e3b3e0b3 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 2 Dec 2025 19:11:12 +0000 Subject: [PATCH 22/28] Add ILU, and fix heating case 2 --- src/heatflow.f90 | 9 +- src/heatflow/mkl_pardiso.f90 | 452 ----------------------------- src/heatflow/mod_evolve.f90 | 2 +- src/heatflow/mod_heating.f90 | 50 +++- src/heatflow/mod_petsc_solver.f90 | 272 ++++++++++------- src/heatflow/mod_setup.f90 | 215 ++++++++++---- src/heatflow/mod_sparse_solver.f90 | 420 --------------------------- src/heatflow/mod_tempdep.f90 | 0 8 files changed, 382 insertions(+), 1038 deletions(-) delete mode 100644 src/heatflow/mkl_pardiso.f90 mode change 100755 => 100644 src/heatflow/mod_setup.f90 delete mode 100644 src/heatflow/mod_sparse_solver.f90 mode change 100755 => 100644 src/heatflow/mod_tempdep.f90 diff --git a/src/heatflow.f90 b/src/heatflow.f90 index d1db700..6817c67 100644 --- a/src/heatflow.f90 +++ b/src/heatflow.f90 @@ -34,6 +34,12 @@ program HEATFLOW_V0_3 real(real12) :: cpustart, cpuend, cpustart2, progress integer(int12) :: itime + !-------------------------------------------------------------! + ! Initialize PETSc FIRST (before any other operations) ! + !-------------------------------------------------------------! + CALL petsc_init() + !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^! + !-------------------------------------------------------------! ! calculate the time to run full simulation ! !-------------------------------------------------------------! @@ -65,7 +71,6 @@ program HEATFLOW_V0_3 !-------------------------------------------------------------! ! run simulation for 'ntime' time steps ! !-------------------------------------------------------------! - CALL petsc_init() do itime=1,ntime @@ -99,7 +104,7 @@ program HEATFLOW_V0_3 !-------------------------------------------------------------! ! calculate end time and print to user ! !-------------------------------------------------------------! - CALL cpu_time(cpuend) + CALL cpu_time(cpuend) write(*,'(A,F12.6)') ' time=', cpuend-cpustart !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^! diff --git a/src/heatflow/mkl_pardiso.f90 b/src/heatflow/mkl_pardiso.f90 deleted file mode 100644 index 0ff7527..0000000 --- a/src/heatflow/mkl_pardiso.f90 +++ /dev/null @@ -1,452 +0,0 @@ -!******************************************************************************* -! Copyright(C) 2004-2013 Intel Corporation. All Rights Reserved. -! -! The source code, information and material ("Material") contained herein is -! owned by Intel Corporation or its suppliers or licensors, and title to such -! Material remains with Intel Corporation or its suppliers or licensors. The -! Material contains proprietary information of Intel or its suppliers and -! licensors. The Material is protected by worldwide copyright laws and treaty -! provisions. No part of the Material may be used, copied, reproduced, -! modified, published, uploaded, posted, transmitted, distributed or disclosed -! in any way without Intel's prior express written permission. No license -! under any patent, copyright or other intellectual property rights in the -! Material is granted to or conferred upon you, either expressly, by -! implication, inducement, estoppel or otherwise. Any license under such -! intellectual property rights must be express and approved by Intel in -! writing. -! -! *Third Party trademarks are the property of their respective owners. -! -! Unless otherwise agreed by Intel in writing, you may not remove or alter -! this notice or any other notice embedded in Materials by Intel or Intel's -! suppliers or licensors in any way. -! -!******************************************************************************* -! Content : MKL PARDISO Fortran-90 header file -! -! Contains PARDISO routine definition. -! For CDECL use only. -! -!******************************************************************************* -!DEC$ IF .NOT. DEFINED( __MKL_PARDISO_F90 ) - -!DEC$ DEFINE __MKL_PARDISO_F90 - - MODULE MKL_PARDISO_PRIVATE - - TYPE MKL_PARDISO_HANDLE; INTEGER(KIND=8) DUMMY; END TYPE - - INTEGER, PARAMETER :: PARDISO_OOC_FILE_NAME = 1 - - END MODULE MKL_PARDISO_PRIVATE - - MODULE MKL_PARDISO - USE MKL_PARDISO_PRIVATE - -! -! Subroutine prototype for PARDISO -! - - INTERFACE PARDISO - SUBROUTINE PARDISO_S( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER, INTENT(IN) :: MAXFCT - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(IN) :: MTYPE - INTEGER, INTENT(IN) :: PHASE - INTEGER, INTENT(IN) :: N - INTEGER, INTENT(IN) :: IA(*) - INTEGER, INTENT(IN) :: JA(*) - INTEGER, INTENT(INOUT) :: PERM(*) - INTEGER, INTENT(IN) :: NRHS - INTEGER, INTENT(INOUT) :: IPARM(*) - INTEGER, INTENT(IN) :: MSGLVL - INTEGER, INTENT(OUT) :: ERROR - - REAL(KIND=4), INTENT(IN) :: A(*) - REAL(KIND=4), INTENT(INOUT) :: B(*) - REAL(KIND=4), INTENT(OUT) :: X(*) - END SUBROUTINE PARDISO_S - - SUBROUTINE PARDISO_D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER, INTENT(IN) :: MAXFCT - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(IN) :: MTYPE - INTEGER, INTENT(IN) :: PHASE - INTEGER, INTENT(IN) :: N - INTEGER, INTENT(IN) :: IA(*) - INTEGER, INTENT(IN) :: JA(*) - INTEGER, INTENT(INOUT) :: PERM(*) - INTEGER, INTENT(IN) :: NRHS - INTEGER, INTENT(INOUT) :: IPARM(*) - INTEGER, INTENT(IN) :: MSGLVL - INTEGER, INTENT(OUT) :: ERROR - - REAL(KIND=8), INTENT(IN) :: A(*) - REAL(KIND=8), INTENT(INOUT) :: B(*) - REAL(KIND=8), INTENT(OUT) :: X(*) - END SUBROUTINE PARDISO_D - - SUBROUTINE PARDISO_SC( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER, INTENT(IN) :: MAXFCT - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(IN) :: MTYPE - INTEGER, INTENT(IN) :: PHASE - INTEGER, INTENT(IN) :: N - INTEGER, INTENT(IN) :: IA(*) - INTEGER, INTENT(IN) :: JA(*) - INTEGER, INTENT(INOUT) :: PERM(*) - INTEGER, INTENT(IN) :: NRHS - INTEGER, INTENT(INOUT) :: IPARM(*) - INTEGER, INTENT(IN) :: MSGLVL - INTEGER, INTENT(OUT) :: ERROR - - COMPLEX(KIND=4), INTENT(IN) :: A(*) - COMPLEX(KIND=4), INTENT(INOUT) :: B(*) - COMPLEX(KIND=4), INTENT(OUT) :: X(*) - END SUBROUTINE PARDISO_SC - - SUBROUTINE PARDISO_DC( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER, INTENT(IN) :: MAXFCT - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(IN) :: MTYPE - INTEGER, INTENT(IN) :: PHASE - INTEGER, INTENT(IN) :: N - INTEGER, INTENT(IN) :: IA(*) - INTEGER, INTENT(IN) :: JA(*) - INTEGER, INTENT(INOUT) :: PERM(*) - INTEGER, INTENT(IN) :: NRHS - INTEGER, INTENT(INOUT) :: IPARM(*) - INTEGER, INTENT(IN) :: MSGLVL - INTEGER, INTENT(OUT) :: ERROR - COMPLEX(KIND=8), INTENT(IN) :: A(*) - COMPLEX(KIND=8), INTENT(INOUT) :: B(*) - COMPLEX(KIND=8), INTENT(OUT) :: X(*) - END SUBROUTINE PARDISO_DC - - - SUBROUTINE PARDISO_S_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER, INTENT(IN) :: MAXFCT - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(IN) :: MTYPE - INTEGER, INTENT(IN) :: PHASE - INTEGER, INTENT(IN) :: N - INTEGER, INTENT(IN) :: IA(*) - INTEGER, INTENT(IN) :: JA(*) - INTEGER, INTENT(INOUT) :: PERM(*) - INTEGER, INTENT(IN) :: NRHS - INTEGER, INTENT(INOUT) :: IPARM(*) - INTEGER, INTENT(IN) :: MSGLVL - INTEGER, INTENT(OUT) :: ERROR - REAL(KIND=4), INTENT(IN) :: A(*) - REAL(KIND=4), INTENT(INOUT) :: B(N,*) - REAL(KIND=4), INTENT(OUT) :: X(N,*) - END SUBROUTINE PARDISO_S_2D - - SUBROUTINE PARDISO_D_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER, INTENT(IN) :: MAXFCT - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(IN) :: MTYPE - INTEGER, INTENT(IN) :: PHASE - INTEGER, INTENT(IN) :: N - INTEGER, INTENT(IN) :: IA(*) - INTEGER, INTENT(IN) :: JA(*) - INTEGER, INTENT(INOUT) :: PERM(*) - INTEGER, INTENT(IN) :: NRHS - INTEGER, INTENT(INOUT) :: IPARM(*) - INTEGER, INTENT(IN) :: MSGLVL - INTEGER, INTENT(OUT) :: ERROR - REAL(KIND=8), INTENT(IN) :: A(*) - REAL(KIND=8), INTENT(INOUT) :: B(N,*) - REAL(KIND=8), INTENT(OUT) :: X(N,*) - END SUBROUTINE PARDISO_D_2D - - SUBROUTINE PARDISO_SC_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER, INTENT(IN) :: MAXFCT - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(IN) :: MTYPE - INTEGER, INTENT(IN) :: PHASE - INTEGER, INTENT(IN) :: N - INTEGER, INTENT(IN) :: IA(*) - INTEGER, INTENT(IN) :: JA(*) - INTEGER, INTENT(INOUT) :: PERM(*) - INTEGER, INTENT(IN) :: NRHS - INTEGER, INTENT(INOUT) :: IPARM(*) - INTEGER, INTENT(IN) :: MSGLVL - INTEGER, INTENT(OUT) :: ERROR - COMPLEX(KIND=4), INTENT(IN) :: A(*) - COMPLEX(KIND=4), INTENT(INOUT) :: B(N,*) - COMPLEX(KIND=4), INTENT(OUT) :: X(N,*) - END SUBROUTINE PARDISO_SC_2D - - SUBROUTINE PARDISO_DC_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER, INTENT(IN) :: MAXFCT - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(IN) :: MTYPE - INTEGER, INTENT(IN) :: PHASE - INTEGER, INTENT(IN) :: N - INTEGER, INTENT(IN) :: IA(*) - INTEGER, INTENT(IN) :: JA(*) - INTEGER, INTENT(INOUT) :: PERM(*) - INTEGER, INTENT(IN) :: NRHS - INTEGER, INTENT(INOUT) :: IPARM(*) - INTEGER, INTENT(IN) :: MSGLVL - INTEGER, INTENT(OUT) :: ERROR - COMPLEX(KIND=8), INTENT(IN) :: A(*) - COMPLEX(KIND=8), INTENT(INOUT) :: B(N,*) - COMPLEX(KIND=8), INTENT(OUT) :: X(N,*) - END SUBROUTINE PARDISO_DC_2D - END INTERFACE -! -! Subroutine prototype for PARDISO_64 -! -! Note: The pardiso_64 interface is not supported on IA-32 architecture. -! If called on IA-32, error = -12 is returned. -! - INTERFACE PARDISO_64 - SUBROUTINE PARDISO_S_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER(KIND=8), INTENT(IN) :: MAXFCT - INTEGER(KIND=8), INTENT(IN) :: MNUM - INTEGER(KIND=8), INTENT(IN) :: MTYPE - INTEGER(KIND=8), INTENT(IN) :: PHASE - INTEGER(KIND=8), INTENT(IN) :: N - INTEGER(KIND=8), INTENT(IN) :: IA(*) - INTEGER(KIND=8), INTENT(IN) :: JA(*) - INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) - INTEGER(KIND=8), INTENT(IN) :: NRHS - INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) - INTEGER(KIND=8), INTENT(IN) :: MSGLVL - INTEGER(KIND=8), INTENT(OUT) :: ERROR - REAL(KIND=4), INTENT(IN) :: A(*) - REAL(KIND=4), INTENT(INOUT) :: B(*) - REAL(KIND=4), INTENT(OUT) :: X(*) - END SUBROUTINE PARDISO_S_64 - - SUBROUTINE PARDISO_D_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER(KIND=8), INTENT(IN) :: MAXFCT - INTEGER(KIND=8), INTENT(IN) :: MNUM - INTEGER(KIND=8), INTENT(IN) :: MTYPE - INTEGER(KIND=8), INTENT(IN) :: PHASE - INTEGER(KIND=8), INTENT(IN) :: N - INTEGER(KIND=8), INTENT(IN) :: IA(*) - INTEGER(KIND=8), INTENT(IN) :: JA(*) - INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) - INTEGER(KIND=8), INTENT(IN) :: NRHS - INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) - INTEGER(KIND=8), INTENT(IN) :: MSGLVL - INTEGER(KIND=8), INTENT(OUT) :: ERROR - REAL(KIND=8), INTENT(IN) :: A(*) - REAL(KIND=8), INTENT(INOUT) :: B(*) - REAL(KIND=8), INTENT(OUT) :: X(*) - END SUBROUTINE PARDISO_D_64 - - SUBROUTINE PARDISO_SC_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER(KIND=8), INTENT(IN) :: MAXFCT - INTEGER(KIND=8), INTENT(IN) :: MNUM - INTEGER(KIND=8), INTENT(IN) :: MTYPE - INTEGER(KIND=8), INTENT(IN) :: PHASE - INTEGER(KIND=8), INTENT(IN) :: N - INTEGER(KIND=8), INTENT(IN) :: IA(*) - INTEGER(KIND=8), INTENT(IN) :: JA(*) - INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) - INTEGER(KIND=8), INTENT(IN) :: NRHS - INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) - INTEGER(KIND=8), INTENT(IN) :: MSGLVL - INTEGER(KIND=8), INTENT(OUT) :: ERROR - COMPLEX(KIND=4), INTENT(IN) :: A(*) - COMPLEX(KIND=4), INTENT(INOUT) :: B(*) - COMPLEX(KIND=4), INTENT(OUT) :: X(*) - END SUBROUTINE PARDISO_SC_64 - - SUBROUTINE PARDISO_DC_64( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER(KIND=8), INTENT(IN) :: MAXFCT - INTEGER(KIND=8), INTENT(IN) :: MNUM - INTEGER(KIND=8), INTENT(IN) :: MTYPE - INTEGER(KIND=8), INTENT(IN) :: PHASE - INTEGER(KIND=8), INTENT(IN) :: N - INTEGER(KIND=8), INTENT(IN) :: IA(*) - INTEGER(KIND=8), INTENT(IN) :: JA(*) - INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) - INTEGER(KIND=8), INTENT(IN) :: NRHS - INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) - INTEGER(KIND=8), INTENT(IN) :: MSGLVL - INTEGER(KIND=8), INTENT(OUT) :: ERROR - COMPLEX(KIND=8), INTENT(IN) :: A(*) - COMPLEX(KIND=8), INTENT(INOUT) :: B(*) - COMPLEX(KIND=8), INTENT(OUT) :: X(*) - END SUBROUTINE PARDISO_DC_64 - - SUBROUTINE PARDISO_S_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER(KIND=8), INTENT(IN) :: MAXFCT - INTEGER(KIND=8), INTENT(IN) :: MNUM - INTEGER(KIND=8), INTENT(IN) :: MTYPE - INTEGER(KIND=8), INTENT(IN) :: PHASE - INTEGER(KIND=8), INTENT(IN) :: N - INTEGER(KIND=8), INTENT(IN) :: IA(*) - INTEGER(KIND=8), INTENT(IN) :: JA(*) - INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) - INTEGER(KIND=8), INTENT(IN) :: NRHS - INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) - INTEGER(KIND=8), INTENT(IN) :: MSGLVL - INTEGER(KIND=8), INTENT(OUT) :: ERROR - REAL(KIND=4), INTENT(IN) :: A(*) - REAL(KIND=4), INTENT(INOUT) :: B(N,*) - REAL(KIND=4), INTENT(OUT) :: X(N,*) - END SUBROUTINE PARDISO_S_64_2D - - SUBROUTINE PARDISO_D_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER(KIND=8), INTENT(IN) :: MAXFCT - INTEGER(KIND=8), INTENT(IN) :: MNUM - INTEGER(KIND=8), INTENT(IN) :: MTYPE - INTEGER(KIND=8), INTENT(IN) :: PHASE - INTEGER(KIND=8), INTENT(IN) :: N - INTEGER(KIND=8), INTENT(IN) :: IA(*) - INTEGER(KIND=8), INTENT(IN) :: JA(*) - INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) - INTEGER(KIND=8), INTENT(IN) :: NRHS - INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) - INTEGER(KIND=8), INTENT(IN) :: MSGLVL - INTEGER(KIND=8), INTENT(OUT) :: ERROR - REAL(KIND=8), INTENT(IN) :: A(*) - REAL(KIND=8), INTENT(INOUT) :: B(N,*) - REAL(KIND=8), INTENT(OUT) :: X(N,*) - END SUBROUTINE PARDISO_D_64_2D - - SUBROUTINE PARDISO_SC_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER(KIND=8), INTENT(IN) :: MAXFCT - INTEGER(KIND=8), INTENT(IN) :: MNUM - INTEGER(KIND=8), INTENT(IN) :: MTYPE - INTEGER(KIND=8), INTENT(IN) :: PHASE - INTEGER(KIND=8), INTENT(IN) :: N - INTEGER(KIND=8), INTENT(IN) :: IA(*) - INTEGER(KIND=8), INTENT(IN) :: JA(*) - INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) - INTEGER(KIND=8), INTENT(IN) :: NRHS - INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) - INTEGER(KIND=8), INTENT(IN) :: MSGLVL - INTEGER(KIND=8), INTENT(OUT) :: ERROR - COMPLEX(KIND=4), INTENT(IN) :: A(*) - COMPLEX(KIND=4), INTENT(INOUT) :: B(N,*) - COMPLEX(KIND=4), INTENT(OUT) :: X(N,*) - END SUBROUTINE PARDISO_SC_64_2D - - SUBROUTINE PARDISO_DC_64_2D( PT, MAXFCT, MNUM, MTYPE, PHASE, N, A, IA, JA, PERM, NRHS, IPARM, MSGLVL, B, X, ERROR ) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER(KIND=8), INTENT(IN) :: MAXFCT - INTEGER(KIND=8), INTENT(IN) :: MNUM - INTEGER(KIND=8), INTENT(IN) :: MTYPE - INTEGER(KIND=8), INTENT(IN) :: PHASE - INTEGER(KIND=8), INTENT(IN) :: N - INTEGER(KIND=8), INTENT(IN) :: IA(*) - INTEGER(KIND=8), INTENT(IN) :: JA(*) - INTEGER(KIND=8), INTENT(INOUT) :: PERM(*) - INTEGER(KIND=8), INTENT(IN) :: NRHS - INTEGER(KIND=8), INTENT(INOUT) :: IPARM(*) - INTEGER(KIND=8), INTENT(IN) :: MSGLVL - INTEGER(KIND=8), INTENT(OUT) :: ERROR - COMPLEX(KIND=8), INTENT(IN) :: A(*) - COMPLEX(KIND=8), INTENT(INOUT) :: B(N,*) - COMPLEX(KIND=8), INTENT(OUT) :: X(N,*) - END SUBROUTINE PARDISO_DC_64_2D - - END INTERFACE - - INTERFACE - - SUBROUTINE PARDISOINIT(PT, MTYPE, IPARM) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(OUT) :: PT(*) - INTEGER, INTENT(IN) :: MTYPE - INTEGER, INTENT(OUT) :: IPARM(*) - END SUBROUTINE PARDISOINIT - - END INTERFACE - - INTERFACE PARDISO_GET - - FUNCTION PARDISO_GETENV(PT, OptName, StrVal) - USE MKL_PARDISO_PRIVATE - INTEGER PARDISO_GETENV - TYPE(MKL_PARDISO_HANDLE), INTENT(IN) :: PT(*) - INTEGER, INTENT(IN) :: OptName - CHARACTER(*), INTENT(OUT) :: StrVal - END FUNCTION PARDISO_GETENV - - END INTERFACE - - INTERFACE PARDISO_SET - - FUNCTION PARDISO_SETENV(PT, OptName, StrVal) - USE MKL_PARDISO_PRIVATE - INTEGER PARDISO_SETENV - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - INTEGER, INTENT(IN) :: OptName - INTEGER, INTENT(IN) :: StrVal(*) - END FUNCTION PARDISO_SETENV - - END INTERFACE - - INTERFACE PARDISO_PIV - - FUNCTION MKL_PARDISO_PIVOT( AII, BII, EPS) - REAL(KIND=8) :: AII, BII, EPS - INTEGER MKL_PARDISO_PIVOT - END - END INTERFACE PARDISO_PIV - - INTERFACE PARDISO_GETDIAG - - SUBROUTINE PARDISO_GETDIAG_D(PT, DIAG_FACT, DIAG_A, MNUM, EPS) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - REAL(KIND=8), INTENT(INOUT) :: DIAG_FACT, DIAG_A - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(INOUT) :: EPS - END - - SUBROUTINE PARDISO_GETDIAG_Z(PT, DIAG_FACT, DIAG_A, MNUM, EPS) - USE MKL_PARDISO_PRIVATE - TYPE(MKL_PARDISO_HANDLE), INTENT(INOUT) :: PT(*) - COMPLEX(KIND=8), INTENT(INOUT) :: DIAG_FACT, DIAG_A - INTEGER, INTENT(IN) :: MNUM - INTEGER, INTENT(INOUT) :: EPS - END - - END INTERFACE PARDISO_GETDIAG - - - - - END MODULE MKL_PARDISO - -!DEC$ ENDIF diff --git a/src/heatflow/mod_evolve.f90 b/src/heatflow/mod_evolve.f90 index 67af070..0554c0d 100644 --- a/src/heatflow/mod_evolve.f90 +++ b/src/heatflow/mod_evolve.f90 @@ -159,7 +159,7 @@ subroutine simulate(itime) ! if (any(x-Temp_p .lt. TINY)) x=x+TINY !avoid nan solver issue itol=1 tol=1.e-32_real12 - itmax=50000 + itmax=500000 ncg = 0 iter= 0 err=E diff --git a/src/heatflow/mod_heating.f90 b/src/heatflow/mod_heating.f90 index aa070ac..8fb0298 100644 --- a/src/heatflow/mod_heating.f90 +++ b/src/heatflow/mod_heating.f90 @@ -78,18 +78,22 @@ subroutine heater(itime, Q, Qdens) !------------------------------ if ( time .le. time_pulse ) then Q(IA) = POWER - if (icattaneo .eq. 1) then - if (itime .eq. 1) then - Q(IA) = Q(IA) + (tau*(POWER)) - end if - if (itime .eq. heated_steps+1) then - Q(IA) = Q(IA) - (tau*(POWER)) - end if - end if - + ! print *, "Heating on" else + ! print *, "Heating off" Q(IA) = 0.0_real12 end if + + if (icattaneo .eq. 1) then + if (itime .eq. 1) then + Q(IA) = Q(IA) + tau*POWER + ! print *, "Turning on heater at time ", time + end if + if (itime .eq. (heated_steps + 1)) then + Q(IA) = Q(IA) - tau*POWER + ! print *, "Turning off heater at time ", time + end if + end if !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -126,6 +130,34 @@ subroutine heater(itime, Q, Qdens) end if !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + case(7) + !--------------------------------------------------------- + ! Heater on for a time period, off for a time period, + ! then on again (square-wave heating) + !--------------------------------------------------------- + + if (mod(time, 2.0_real12 * time_pulse) .le. time_pulse) then + Q(IA) = POWER + + if (icattaneo .eq. 1) then + + ! Forward-time pulse check + if (mod(time + time_step, 2.0_real12 * time_pulse) .le. time_pulse) then + Q(IA) = Q(IA) + tau * POWER + end if + + ! Backward-time pulse check + if (mod(time - time_step, 2.0_real12 * time_pulse) .le. time_pulse) then + Q(IA) = Q(IA) - tau * POWER + end if + + end if + + else + Q(IA) = 0.0_real12 + end if + + case(10) Q(IA) = POWER - (tau*(POWER)) diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90 index dd6c73a..f5e37b7 100644 --- a/src/heatflow/mod_petsc_solver.f90 +++ b/src/heatflow/mod_petsc_solver.f90 @@ -2,9 +2,18 @@ module petsc_solver #include "petsc/finclude/petscsys.h" #include "petsc/finclude/petscksp.h" use petscksp + use iso_c_binding implicit none private - public :: petsc_init, petsc_finalize, solve_petsc_csr + public :: petsc_init, petsc_finalize, solve_petsc_csr, petsc_cleanup + + ! Persistent PETSc objects (reused across timesteps for memory efficiency) + Mat, save :: A_saved = PETSC_NULL_MAT + Vec, save :: bb_saved = PETSC_NULL_VEC + Vec, save :: xx_saved = PETSC_NULL_VEC + KSP, save :: ksp_saved = PETSC_NULL_KSP + logical, save :: initialized = .false. + integer, save :: n_saved = 0 contains @@ -15,9 +24,25 @@ end subroutine petsc_init subroutine petsc_finalize() integer :: ierr + call petsc_cleanup() call PetscFinalize(ierr) end subroutine petsc_finalize + subroutine petsc_cleanup() + ! Clean up persistent PETSc objects + integer :: ierr + if (A_saved /= PETSC_NULL_MAT) call MatDestroy(A_saved, ierr) + if (bb_saved /= PETSC_NULL_VEC) call VecDestroy(bb_saved, ierr) + if (xx_saved /= PETSC_NULL_VEC) call VecDestroy(xx_saved, ierr) + if (ksp_saved /= PETSC_NULL_KSP) call KSPDestroy(ksp_saved, ierr) + A_saved = PETSC_NULL_MAT + bb_saved = PETSC_NULL_VEC + xx_saved = PETSC_NULL_VEC + ksp_saved = PETSC_NULL_KSP + initialized = .false. + n_saved = 0 + end subroutine petsc_cleanup + subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) integer, intent(in) :: n integer, intent(in) :: ia(:), ja(:) @@ -25,24 +50,87 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) real(8), intent(inout) :: x(:) real(8), intent(in) :: rtol integer, intent(in) :: maxit - Mat :: A - Vec :: bb, xx - KSP :: ksp + PC :: pc integer :: ierr, i, row_nz, start_k, its - integer, allocatable :: cols0(:), idx(:) + integer, allocatable :: cols0(:), idx(:), d_nnz(:) + PetscInt :: idx_array(1) + PetscScalar :: val_array(1) real(8), allocatable :: vals(:) - real(8), pointer :: xptr(:) - KSPConvergedReason :: reason real(8) :: rnorm + logical :: rebuild_needed + ! write(*,'(A,I0)') ' [DEBUG] Entered solve_petsc_csr, n=', n + ! call flush(6) + if (size(ia) /= n+1) stop 'solve_petsc_csr: ia size mismatch' if (size(b) /= n .or. size(x) /= n) stop 'solve_petsc_csr: vector size mismatch' - ! Create matrix with an estimated 7 nonzeros/row (adjust if needed) - call MatCreateSeqAIJ(PETSC_COMM_SELF, n, n, 7, PETSC_NULL_INTEGER, A, ierr) + ! write(*,'(A)') ' [DEBUG] Size checks passed' + ! call flush(6) + + ! Determine if we need to rebuild the matrix structure + rebuild_needed = .false. + if (.not. initialized) rebuild_needed = .true. + if (n /= n_saved) rebuild_needed = .true. + + ! write(*,'(A,L1)') ' [DEBUG] rebuild_needed=', rebuild_needed + ! call flush(6) + + ! Create PETSc objects on first call or if size changed + if (rebuild_needed) then + ! write(*,'(A)') ' [DEBUG] Starting PETSc object creation...' + ! call flush(6) + + ! Clean up old objects if they exist + if (initialized) call petsc_cleanup() + + ! write(*,'(A)') ' [DEBUG] Preallocating matrix...' + ! call flush(6) + + ! Preallocate matrix with exact nonzeros per row (saves memory) + allocate(d_nnz(n)) + do i = 1, n + d_nnz(i) = ia(i+1) - ia(i) + end do + + ! write(*,'(A,I0,A,I0)') ' [DEBUG] Creating matrix: n=', n, ', max_nnz/row=', maxval(d_nnz) + ! call flush(6) + + ! Create matrix with exact preallocation (most memory-efficient) + call MatCreateSeqAIJ(PETSC_COMM_SELF, n, n, 0, d_nnz, A_saved, ierr) + if (ierr /= 0) then + write(0,*) "ERROR: MatCreateSeqAIJ failed with ierr=", ierr + write(0,*) " Matrix size may exceed system limits" + write(0,*) " n=", n, ", nnz=", sum(int(d_nnz,8)), ", max_nnz/row=", maxval(d_nnz) + stop + end if + + deallocate(d_nnz) + + ! Create persistent vectors + call VecCreateSeq(PETSC_COMM_SELF, n, bb_saved, ierr) + call VecCreateSeq(PETSC_COMM_SELF, n, xx_saved, ierr) + + ! Create and configure KSP solver (persistent across timesteps) + call KSPCreate(PETSC_COMM_SELF, ksp_saved, ierr) + call KSPSetOperators(ksp_saved, A_saved, A_saved, ierr) + call KSPGetPC(ksp_saved, pc, ierr) + ! Use ILU preconditioner for better conditioning (especially for high conductivity materials) + call PCSetType(pc, PCILU, ierr) ! ILU preconditioner (better than Jacobi) + ! call PCSetType(pc, PCJACOBI, ierr) ! Jacobi preconditioner (simple) + call KSPSetType(ksp_saved, KSPBCGS, ierr) ! BiCGSTAB solver + call KSPSetTolerances(ksp_saved, rtol, PETSC_DEFAULT_REAL, & + PETSC_DEFAULT_REAL, maxit, ierr) + call KSPSetNormType(ksp_saved, KSP_NORM_UNPRECONDITIONED, ierr) + call KSPSetFromOptions(ksp_saved, ierr) + + initialized = .true. + n_saved = n + end if - ! Fill matrix from CSR format (ia, ja are 1-based Fortran indexing) + ! Update matrix values (always needed each timestep) + call MatZeroEntries(A_saved, ierr) do i = 1, n row_nz = ia(i+1) - ia(i) if (row_nz > 0) then @@ -52,116 +140,90 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) cols0 = ja(start_k:start_k+row_nz-1) - 1 vals = aval(start_k:start_k+row_nz-1) - ! Debug: print first row details - if (i == 1) then - write(*,'(A,I0,A,I0)') 'Row 1: nnz=', row_nz, ', start_k=', start_k - write(*,'(A,10I6)') ' 1-based cols:', ja(start_k:min(start_k+9,start_k+row_nz-1)) - write(*,'(A,10I6)') ' 0-based cols:', cols0(1:min(10,row_nz)) - write(*,'(A,10ES12.4)') ' vals:', vals(1:min(10,row_nz)) - end if - ! Set row i-1 (0-based) with column indices cols0 (0-based) - call MatSetValues(A, 1, (/i-1/), row_nz, cols0, vals, INSERT_VALUES, ierr) + call MatSetValues(A_saved, 1, (/i-1/), row_nz, cols0, vals, INSERT_VALUES, ierr) deallocate(cols0, vals) end if end do - call MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY, ierr) - call MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY, ierr) + call MatAssemblyBegin(A_saved, MAT_FINAL_ASSEMBLY, ierr) + call MatAssemblyEnd(A_saved, MAT_FINAL_ASSEMBLY, ierr) ! Optional: Verify matrix assembly (uncomment for debugging) - ! call MatView(A, PETSC_VIEWER_STDOUT_SELF, ierr) - - ! Create vectors - call VecCreateSeq(PETSC_COMM_SELF, n, bb, ierr) - call VecCreateSeq(PETSC_COMM_SELF, n, xx, ierr) - - ! Set RHS and initial guess - allocate(idx(n)) - idx = [(i-1, i=1,n)] - call VecSetValues(bb, n, idx, b, INSERT_VALUES, ierr) - call VecAssemblyBegin(bb,ierr); call VecAssemblyEnd(bb,ierr) - - call VecSetValues(xx, n, idx, x, INSERT_VALUES, ierr) - call VecAssemblyBegin(xx,ierr); call VecAssemblyEnd(xx,ierr) - - ! KSP setup - call KSPCreate(PETSC_COMM_SELF, ksp, ierr) - call KSPSetOperators(ksp, A, A, ierr) ! 3-arg form (reuse automatically) - call KSPGetPC(ksp, pc, ierr) - call PCSetType(pc, PCJACOBI, ierr) ! Use Jacobi (diagonal) preconditioner to match linbcg - call KSPSetType(ksp, KSPBCGS, ierr) ! Use BCGS to match linbcg behavior - - ! Set convergence tolerances - ! rtol = relative tolerance, atol = absolute tolerance (use default), dtol = divergence tolerance, maxits = max iterations - call KSPSetTolerances(ksp, rtol, PETSC_DEFAULT_REAL, PETSC_DEFAULT_REAL, maxit, ierr) - - ! Use unpreconditioned norm (matching linbcg with itol=1) - call KSPSetNormType(ksp, KSP_NORM_UNPRECONDITIONED, ierr) - - ! Allow command line override of solver options - call KSPSetFromOptions(ksp, ierr) + ! call MatView(A_saved, PETSC_VIEWER_STDOUT_SELF, ierr) - call KSPSolve(ksp, bb, xx, ierr) - - ! Check convergence - call KSPGetConvergedReason(ksp, reason, ierr) - call KSPGetIterationNumber(ksp, its, ierr) - call KSPGetResidualNorm(ksp, rnorm, ierr) - - ! Report convergence status (commented out by default for performance) - ! Uncomment the next line to see convergence info every solve: - ! write(*,'(A,I0,A,ES12.5,A,I0)') ' PETSc: iterations=', its, ', residual=', rnorm, ', reason=', reason + ! Update RHS vector in batches to avoid memory issues with huge systems + block + integer, parameter :: VEC_CHUNK = 1000000 + integer :: vec_start, vec_end, vec_len, k + integer, allocatable :: idx_vec(:) + + do vec_start = 1, n, VEC_CHUNK + vec_end = min(vec_start + VEC_CHUNK - 1, n) + vec_len = vec_end - vec_start + 1 + + allocate(idx_vec(vec_len)) + idx_vec = [(vec_start + k - 2, k=1,vec_len)] ! 0-based indices + + call VecSetValues(bb_saved, vec_len, idx_vec, b(vec_start:vec_end), INSERT_VALUES, ierr) + deallocate(idx_vec) + end do + end block + call VecAssemblyBegin(bb_saved,ierr); call VecAssemblyEnd(bb_saved,ierr) + + ! Update initial guess in batches + block + integer, parameter :: VEC_CHUNK = 1000000 + integer :: vec_start, vec_end, vec_len, k + integer, allocatable :: idx_vec(:) + + do vec_start = 1, n, VEC_CHUNK + vec_end = min(vec_start + VEC_CHUNK - 1, n) + vec_len = vec_end - vec_start + 1 + + allocate(idx_vec(vec_len)) + idx_vec = [(vec_start + k - 2, k=1,vec_len)] ! 0-based indices + + call VecSetValues(xx_saved, vec_len, idx_vec, x(vec_start:vec_end), INSERT_VALUES, ierr) + deallocate(idx_vec) + end do + end block + call VecAssemblyBegin(xx_saved,ierr); call VecAssemblyEnd(xx_saved,ierr) + + ! Solve the system + call KSPSolve(ksp_saved, bb_saved, xx_saved, ierr) - if (reason < 0) then - write(0,*) "WARNING: PETSc solver diverged or failed!" - write(0,*) " Reason code:", reason - write(0,*) " Iterations:", its - write(0,*) " Residual norm:", rnorm - ! Don't stop - let the main code detect NaNs if needed + if (ierr /= 0) then + write(0,*) "ERROR: KSPSolve failed with error code:", ierr + stop end if - - ! Extract solution - call VecGetArrayF90(xx, xptr, ierr) - x = xptr - call VecRestoreArrayF90(xx, xptr, ierr) - ! Manual residual check: compute ||A*x - b|| + call KSPGetIterationNumber(ksp_saved, its, ierr) + call KSPGetResidualNorm(ksp_saved, rnorm, ierr) + + ! Extract solution vector using batched VecGetValues block - real(8), allocatable :: Ax(:), residual(:) - real(8) :: manual_rnorm, bnorm - integer(8) :: i, k, start_k, row_nz + integer, parameter :: CHUNK_SIZE = 100000 + PetscInt, allocatable :: idx_batch(:) + PetscScalar, allocatable :: val_batch(:) + integer :: i_start, i_end, chunk_len, j - allocate(Ax(n), residual(n)) - Ax = 0.0_8 - - ! Compute A*x manually using CSR format - do i = 1, n - row_nz = ia(i+1) - ia(i) - if (row_nz > 0) then - start_k = ia(i) - do k = start_k, start_k + row_nz - 1 - Ax(i) = Ax(i) + aval(k) * x(ja(k)) - end do - end if + do i_start = 1, n, CHUNK_SIZE + i_end = min(i_start + CHUNK_SIZE - 1, n) + chunk_len = i_end - i_start + 1 + + allocate(idx_batch(chunk_len), val_batch(chunk_len)) + + do j = 1, chunk_len + idx_batch(j) = i_start + j - 2 + end do + + call VecGetValues(xx_saved, chunk_len, idx_batch, val_batch, ierr) + x(i_start:i_end) = val_batch(1:chunk_len) + + deallocate(idx_batch, val_batch) end do - - residual = Ax - b - manual_rnorm = sqrt(sum(residual**2)) - bnorm = sqrt(sum(b**2)) - - write(*,'(A,ES15.7)') ' Manual residual check: ||A*x-b|| = ', manual_rnorm - write(*,'(A,ES15.7)') ' ||b|| = ', bnorm - write(*,'(A,ES15.7)') ' ||A*x-b||/||b|| = ', manual_rnorm/bnorm - - deallocate(Ax, residual) end block - ! Cleanup - deallocate(idx) - call KSPDestroy(ksp, ierr) - call VecDestroy(bb, ierr) - call VecDestroy(xx, ierr) - call MatDestroy(A, ierr) end subroutine solve_petsc_csr end module petsc_solver \ No newline at end of file diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90 old mode 100755 new mode 100644 index e3c2872..0a25b00 --- a/src/heatflow/mod_setup.f90 +++ b/src/heatflow/mod_setup.f90 @@ -24,9 +24,10 @@ module setup use globe_data, only: acsr, ja, ia use solver, only: SRSin use materials, only: material - use sparse_solver, only: coo2csr implicit none + public :: set_global_variables + contains @@ -52,8 +53,13 @@ subroutine set_global_variables() ! later date !--------------------------------------------------- write(*,*) "Setting up material properties" + write(*,'(A,I10,A)') " Processing ", NA, " grid cells..." index = 0 do iz = 1, nz + ! Progress reporting every 10% for large grids + if (mod(iz-1, max(1,nz/10)) == 0 .and. iz > 1) then + write(*,'(A,I3,A)') " Progress: ", int(100.0*real(iz)/real(nz)), "%" + end if do iy = 1, ny do ix = 1, nx index = index + 1 @@ -75,14 +81,15 @@ subroutine set_global_variables() !--------------------------------------------------- ! Check if the sparse matrix matches the full matrix !--------------------------------------------------- + write(*,*) "Building sparse H matrix..." if (Check_Sparse_Full) then print*, "CHECK SPARSE FULL" CALL build_Hmatrix() else + ! Build CSR format directly (acsr, ja, ia are allocated inside sparse_Hmatrix) CALL sparse_Hmatrix() - ! Allocate the arrays to hold the H matrix in CSR format - allocate(acsr(ra%len), ja(ra%len), ia(ra%n+1)) - CALL coo2csr(ra%n, ra%len, ra%val, ra%irow, ra%jcol, acsr, ja, ia) + ! No need for COO->CSR conversion anymore, it's already in CSR format! + write(*,*) "Sparse matrix setup complete." end if !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -91,26 +98,46 @@ end subroutine set_global_variables !!!################################################################################################# !!!################################################################################################# -!!! This sets up the H Matrix directly in sparse row storage +!!! This sets up the H Matrix directly in sparse row storage (CSR format) +!!! Modified to build CSR directly instead of COO->CSR to save memory !!!################################################################################################# subroutine sparse_Hmatrix() implicit none real(real12) :: H0 ! Holds the value of the H matrix - integer(int12) :: i, j, len, count, k ! i and j are the row and column of the H matrix + integer(int12) :: i, j, count, k, row ! i and j are the row and column of the H matrix + integer(int12) :: nnz_estimate ! Holds the values to add to the row to get the column integer(int12), allocatable, dimension(:) :: addit - ! The number of non-zero elements in the H matrix to look for - len = 7*nx*ny*nz - 2*(nx*ny + ny*nz + nz*nx) - if (Periodicx) len = len + 2*ny*nz - if (Periodicy) len = len + 2*nz*nx - if (Periodicz) len = len + 2*nx*ny + ! Temporary arrays for building each row + real(real12), allocatable, dimension(:) :: row_vals + integer(int12), allocatable, dimension(:) :: row_cols + integer(int12) :: row_count, max_row_size + ra%n = NA ! The number of rows in the H matrix - ra%len = len ! The number of non-zero elements in the H matrix - ! Allocate the arrays to hold the H matrix in sparse storage - allocate(ra%val(len), ra%irow(len), ra%jcol(len)) - ra%val(:)=0 - ra%irow(:)=-2 - ra%jcol(:)=-1 + + ! Estimate nonzeros (7 per interior cell, less at boundaries) + nnz_estimate = 7*nx*ny*nz - 2*(nx*ny + ny*nz + nz*nx) + if (Periodicx) nnz_estimate = nnz_estimate + 2*ny*nz + if (Periodicy) nnz_estimate = nnz_estimate + 2*nz*nx + if (Periodicz) nnz_estimate = nnz_estimate + 2*nx*ny + + ! Allocate CSR arrays with initial estimate (will grow if needed) + ! For 451^3: ~640M entries = 10GB, so allocate conservatively + write(*,'(A,I12,A)') " Estimated nonzeros: ", nnz_estimate, "" + allocate(acsr(nnz_estimate), ja(nnz_estimate)) + allocate(ia(NA+1)) + + ! Setup neighbor offsets + addit = [1] + if (Periodicx) addit = [addit, (nx-1)] + if (ny .gt. 1) addit = [addit, nx] + if ((Periodicy).and.(ny .gt. 1)) addit = [addit, (ny-1)*nx] + if (nz .gt. 1) addit = [addit, nx*ny] + if ((Periodicz).and.(nz .gt. 1)) addit = [addit, (nz-1)*ny*nx] + + ! Allocate temporary row storage (max ~13 entries per row for 3D) + max_row_size = 2*size(addit,1) + 1 + allocate(row_vals(max_row_size), row_cols(max_row_size)) addit = [1] ! The values to add to the row to get the column if (Periodicx) addit = [addit, (nx-1)] if (ny .gt. 1) addit = [addit, nx] ! Add the values to add to the row to get the column @@ -124,41 +151,131 @@ subroutine sparse_Hmatrix() !write(6,*) NA !write(6,*) "=========================================" - count = 0 ! The number of non-zero elements in the H matrix - parent_loop: do j = 1, NA ! Loop over the columns of the H matrix - i=j ! The row of the H matrix - count = count + 1 ! The number of non-zero elements in the H matrix - H0 = hmatrixfunc(i,j) ! The value of the H matrix - ra%val(count) = H0 ! The value of the H matrix - ra%irow(count) = i ! The row of the H matrix - ra%jcol(count) = j ! The column of the H matrix - ! Loop over the values to add to the row to get the column - neighbour_loop: do k = 1, size(addit,1) - i = j + addit(k) ! The row of the H matrix - ! If the row is greater than the number of rows ... - !...in the H matrix then go to the next column - if ((i.gt.NA)) cycle parent_loop - H0=hmatrixfunc(i,j) ! The value of the H matrix - ! If the value of the H matrix is less than TINY then go to the next value ... - !...to add to the row to get the column - if (abs(H0).lt.TINY) cycle neighbour_loop - count = count + 1 ! The number of non-zero elements in the H matrix - ra%val(count) = H0 ! The value of the H matrix - ra%irow(count) = i ! The row of the H matrix - ra%jcol(count) = j ! The column of the H matrix - count = count + 1 ! The number of non-zero elements in the H matrix - ra%val(count) = H0 ! The value of the H matrix - ra%irow(count) = j ! The row of the H matrix - ra%jcol(count) = i ! The column of the H matrix - !write(6,*) j,i, H0, count - end do neighbour_loop - end do parent_loop - - !write(6,*) "=========================================" - !write(6,*) 'c len',count, len + + count = 0 ! Total nonzeros counter + ia(1) = 1 ! CSR row pointer (1-based for Fortran) + + ! Build CSR format row-by-row + write(*,'(A)') " Building CSR matrix row-by-row..." + parent_loop: do row = 1, NA + ! Progress reporting every 10% + if (mod(row-1, max(1,NA/10)) == 0 .and. row > 1) then + write(*,'(A,I3,A,I12,A)') " Progress: ", int(100.0*real(row)/real(NA)), & + "%, nnz=", count, "" + end if + + row_count = 0 + + ! Diagonal element + j = row + row_count = row_count + 1 + H0 = hmatrixfunc(row, j) + row_vals(row_count) = H0 + row_cols(row_count) = j + + ! Off-diagonal elements (process in column-sorted order for CSR) + ! First pass: collect all neighbors + do k = 1, size(addit,1) + j = row + addit(k) + if (j > NA) cycle ! Skip if out of bounds + + H0 = hmatrixfunc(row, j) + if (abs(H0) >= TINY) then + row_count = row_count + 1 + row_vals(row_count) = H0 + row_cols(row_count) = j + end if + end do + + ! Second pass: collect reverse neighbors (j < row) + do k = 1, size(addit,1) + j = row - addit(k) + if (j < 1) cycle ! Skip if out of bounds + + H0 = hmatrixfunc(row, j) + if (abs(H0) >= TINY) then + row_count = row_count + 1 + row_vals(row_count) = H0 + row_cols(row_count) = j + end if + end do + + ! Sort this row's entries by column index (required for CSR) + call sort_row(row_vals, row_cols, row_count) + + ! Copy row data to CSR arrays (no bounds checking - we pre-allocated correctly) + do i = 1, row_count + count = count + 1 + acsr(count) = row_vals(i) + ja(count) = row_cols(i) + end do + + ! Update row pointer + ia(row+1) = count + 1 + end do parent_loop + + ra%len = count + + ! Trim arrays to actual size if we over-estimated + if (count < size(acsr)) then + write(*,'(A,I12,A,I12)') " Trimming arrays from ", size(acsr), " to ", count + call trim_csr_arrays(acsr, ja, count) + end if + + deallocate(row_vals, row_cols) + write(*,'(A,I12,A)') " CSR matrix built successfully. Actual nonzeros: ", count, "" end subroutine sparse_Hmatrix !!!################################################################################################# +!!!################################################################################################# +!!! Sort a row's entries by column index (simple insertion sort, rows are small) +!!!################################################################################################# + subroutine sort_row(vals, cols, n) + implicit none + integer(int12), intent(in) :: n + real(real12), dimension(n), intent(inout) :: vals + integer(int12), dimension(n), intent(inout) :: cols + integer(int12) :: i, j, temp_col + real(real12) :: temp_val + + do i = 2, n + temp_val = vals(i) + temp_col = cols(i) + j = i - 1 + do while (j >= 1) + if (cols(j) <= temp_col) exit + vals(j+1) = vals(j) + cols(j+1) = cols(j) + j = j - 1 + end do + vals(j+1) = temp_val + cols(j+1) = temp_col + end do + end subroutine sort_row +!!!################################################################################################# + +!!!################################################################################################# +!!! Trim CSR arrays to exact size +!!!################################################################################################# + subroutine trim_csr_arrays(acsr_arr, ja_arr, final_size) + implicit none + integer(int12), intent(in) :: final_size + real(real12), allocatable, dimension(:), intent(inout) :: acsr_arr + integer(int12), allocatable, dimension(:), intent(inout) :: ja_arr + real(real12), allocatable, dimension(:) :: temp_vals + integer(int12), allocatable, dimension(:) :: temp_cols + + allocate(temp_vals(final_size), temp_cols(final_size)) + temp_vals = acsr_arr(1:final_size) + temp_cols = ja_arr(1:final_size) + deallocate(acsr_arr, ja_arr) + allocate(acsr_arr(final_size), ja_arr(final_size)) + acsr_arr = temp_vals + ja_arr = temp_cols + deallocate(temp_vals, temp_cols) + end subroutine trim_csr_arrays +!!!################################################################################################# + !!!################################################################################################# !!! Check if the simulation will be stable. NOT FULLY IMPLEMENTED !!!################################################################################################# diff --git a/src/heatflow/mod_sparse_solver.f90 b/src/heatflow/mod_sparse_solver.f90 deleted file mode 100644 index 8f7957c..0000000 --- a/src/heatflow/mod_sparse_solver.f90 +++ /dev/null @@ -1,420 +0,0 @@ -module sparse_solver - - use mkl_pardiso - implicit none - - private - - public :: coo2csr, & - bicgstab, & - solve_pardiso - -contains - - subroutine coo2csr( nrow, & - nnz, & - a, & - ir, & - jc, & - acsr, & - ja, & - ia ) - - !--------------------------------------------------------------------------! - !! coocsr converts coo to csr. - ! - ! discussion: - ! - ! this routine converts a matrix that is stored in coo coordinate format - ! a, ir, jc into a csr row general sparse acsr, ja, ia format. - ! - ! parameters: - ! - ! input, integer nrow, the row dimension of the matrix. - ! - ! input, integer nnz, the number of nonzero elements in the matrix. - ! - ! a, - ! ir, - ! jc = matrix in coordinate format. a(k), ir(k), jc(k) store the nnz - ! nonzero elements of the matrix with a(k) = actual real value of - ! the elements, ir(k) = its row number and jc(k) = its column - ! number. the order of the elements is arbitrary. - ! - ! on return: - ! - ! ir is destroyed - ! - ! output, real acsr(nnz), ja(nnz), ia(nrow+1), the matrix in csr - ! compressed sparse row format. - !--------------------------------------------------------------------------! - - ! Arguments. - integer (kind=8), intent(in) :: nrow - integer (kind=8), intent(in) :: nnz - real(8), dimension(nnz), intent(in) :: a - integer (kind=8), dimension(nnz), intent(in) :: ir - integer (kind=8), dimension(nnz), intent(in) :: jc - real(8), dimension(nnz), intent(out) :: acsr - integer(kind=8), dimension(nnz), intent(out) :: ja - integer(kind=8), dimension(nrow+1), intent(out) :: ia - - ! Local variables. - integer (kind=8) :: i, iad, j, k, k0, row_start - integer (kind=8) :: dup_count - real(kind=8) :: x - logical :: found_dup - integer(kind=8), dimension(nrow+1) :: ia_save ! Save original row starts - - ia(1:nrow+1) = 0 - dup_count = 0 - - ! determine the row lengths. - - do k = 1, nnz - ia(ir(k)) = ia(ir(k)) + 1 - end do - - ! the starting position of each row. - - k = 1 - - do j = 1, nrow + 1 - - k0 = ia(j) - ia(j) = k - k = k + k0 - - end do - - ! Save the original row pointers - ia_save = ia - - ! go through the structure once more. fill in output matrix. - ! This version handles duplicate (i,j) entries by summing them. - - do k = 1, nnz - - i = ir(k) - j = jc(k) - x = a(k) - - ! Search for existing entry in this row with same column - found_dup = .false. - row_start = ia_save(i) - do iad = row_start, ia(i)-1 - if (ja(iad) == j) then - ! Found duplicate - sum the values - acsr(iad) = acsr(iad) + x - found_dup = .true. - dup_count = dup_count + 1 - exit - end if - end do - - if (.not. found_dup) then - ! New entry - insert it - iad = ia(i) - acsr(iad) = x - ja(iad) = j - ia(i) = iad + 1 - end if - - end do - - if (dup_count > 0) then - write(*,'(A,I0,A)') 'WARNING: COO->CSR found and summed ', dup_count, ' duplicate entries' - end if - - ! shift back ia. - - do j = nrow, 1, -1 - ia(j+1) = ia(j) - end do - - ia(1) = 1 - - return - - end subroutine coo2csr - - !------------------------------------------------------------------- - ! BiConjugate Gradient (Stabilised) Method - !------------------------------------------------------------------- - - subroutine bicgstab( acsr, & - ia, & - ja, & - b, & - maxiter, & - initGuess, & - x, & - iter ) - - ! Arguments - real(8), dimension(:), intent(in) :: acsr - integer (kind=8), dimension(:), intent(in) :: ia - integer (kind=8), dimension(:), intent(in) :: ja - real(8), dimension(:), intent(in) :: b - integer, intent(in) :: maxiter - real(8), dimension(:), intent(in) :: initGuess - real(8), dimension(:), allocatable, intent(out) :: x - integer, intent(out) :: iter - - ! Local variables - integer :: i, j, k, n - real(8), parameter :: cc = 1.0e-9 - real(8) :: alpha,beta,delta0,delta,delta_old,omega - real(8), dimension(:), allocatable :: r, p, s, rst, temp1, temp2 - - n = size(b,1) - allocate(x(n)) - allocate(r(n)) - allocate(p(n)) - allocate(s(n)) - allocate(rst(n)) - allocate(temp1(n)) - allocate(temp2(n)) - - call mkl_dcsrgemv("N",n,acsr,ia,ja,x,temp1) - - print *, "Initial residual norm: ", norm2(b - temp1) - r = b - temp1 - - call random_number(rst) - - p = r - - delta = dot_product(rst,r) - - write(*,'(a,1x,f15.3)') "Starting delta: ", delta - - delta0 = delta - - do i = 1, maxiter - print *, "Iteration ", i - if ( norm2(r) /= norm2(r) ) then - write(*,'(a)') "Error in solver: residual NaN" - exit - end if - - if(mod(i,1000).eq.0) then - write(*,'(a,1x,i6)') 'Iteration number: ',i - write(*,'(a,1x,f15.3)') "Residual ratio: ", norm2(r)/cc - end if - - call mkl_dcsrgemv("N",n,acsr,ia,ja,p,temp1) ! temp1=A*p - - alpha = delta/dot_product(rst,temp1) - s = r - alpha*temp1 - - call mkl_dcsrgemv("N",n,acsr,ia,ja,s,temp2) ! temp2=A*s - - omega = dot_product(s,temp2)/dot_product(temp2,temp2) - x = x + alpha*p + omega*s - r = s - omega*temp2 - delta_old = delta - delta = dot_product(rst,r) - beta = (delta/delta_old)*(alpha/omega) - p = r + beta*(p - omega*temp1) - - if(norm2(r) .lt. cc) then - iter = i - return - end if - - if(i.eq.maxiter) then - write(*,'(a)') "Maximum iterations reached." - write(*,'(a)') "Convergence not achieved." - write(*,'(a,1x,f15.3)') "Norm of residual: ", norm2(r) - write(*,'(a,1x,f15.3)') "Convergence criterion: ", cc - if((norm2(r)/cc) .lt. 2.d0) then - write(*,'(a)') "The residual is within a small",& - "range of the convergence criterion." - write(*,'(a)') "Perhaps increasing iteration ", & - "count may help." - end if - end if - - end do - - end subroutine bicgstab - - !------------------------------------------------------------------- - ! END BiConjugate Gradient (Stabilised) Method - !------------------------------------------------------------------- - - !------------------------------------------------------------------- - ! PARDISO Direct Solver - !------------------------------------------------------------------- - - subroutine solve_pardiso( acsr, & - b, & - ia, & - ja, & - x ) - - use mkl_pardiso - - ! Arguments - real(8), dimension(:), intent(in) :: acsr - real(8), dimension(:), intent(inout) :: b - integer,dimension(:), intent(in) :: ia - integer,dimension(:), intent(in) :: ja - real(8), dimension(:), allocatable, intent(out) :: x - - ! Local variables - type(mkl_pardiso_handle), dimension(:), allocatable :: pt - integer :: i,maxfct,mnum,mtype,phase,n,nrhs,error,msglvl,nnz,error1 - integer, dimension(:), allocatable :: iparm - integer,dimension(1) :: idum - real(8),dimension(1) :: ddum - integer :: badcol, missing_diag, k - logical :: found - n = size(b,1) - nnz = size(acsr,1) - nrhs = 1 - maxfct = 1 - mnum = 1 - - if (.not.(allocated(x))) allocate(x(n)) - - - - allocate(iparm(64)) !set up pardiso control parameter - - do i=1,64 - iparm(i) = 0 - end do - - iparm(1) = 1 ! no solver default - iparm(2) = 2 ! fill-in reordering from metis - iparm(4) = 0 ! no iterative-direct algorithm - iparm(5) = 0 ! no user fill-in reducing permutation - iparm(6) = 0 ! =0 solution on the first n compoments of x - iparm(8) = 2 ! numbers of iterative refinement steps - iparm(10) = 13 ! perturbe the pivot elements with 1e-13 - iparm(11) = 1 ! use nonsymmetric permutation and scaling mps - iparm(13) = 0 ! maximum weighted matching algorithm is - !switched-off (default for symmetric). - ! try iparm(13) = 1 in case of inaccuracy - iparm(14) = 0 ! output: number of perturbed pivots - iparm(18) = -1 ! output: number of nonzeros in the factor lu - iparm(19) = -1 ! output: mflops for lu factorization - iparm(20) = 0 ! output: numbers of cg iterations - - error = 0 ! initialize error flag - msglvl = 0 ! 0=no output, 1=print statistical information - mtype = 11 ! real and unsymmetric matrix - - ! Initiliaze the internal solver memory pointer. - ! This is only necessary for the first call of the solver. - - allocate (pt(64)) - do i=1,64 - pt(i)%dummy = 0 - end do - - !---------------- CSR integrity / diagnostic checks ---------------- - - badcol = 0 - - write(*,*) 'PARDISO debug:' - write(*,*) ' n =', n - write(*,*) ' nnz =', nnz - write(*,*) ' ia(1) =', ia(1), ' ia(n+1)=', ia(n+1), ' ia(n+1)-1=', ia(n+1)-1 - - if (ia(1) /= 1) stop 'ERROR: ia(1) must be 1' - if (ia(n+1)-1 /= nnz) stop 'ERROR: ia end mismatch' - - do i=1,n - if (ia(i) > ia(i+1)) then - write(*,*) 'Row pointer decreases at row', i - stop 'ERROR: ia not monotone' - end if - end do - - do k=1,nnz - if (ja(k) < 1 .or. ja(k) > n) then - badcol = badcol + 1 - if (badcol <= 10) write(*,*) 'Bad column index k=',k,' ja=',ja(k) - end if - end do - if (badcol > 0) then - write(*,*) 'Total bad columns =', badcol - stop 'ERROR: invalid ja entries' - end if - - ! Check each row has a diagonal and (optionally) detect duplicates - missing_diag = 0 - do i=1,n - found = .false. - if (ia(i) < ia(i+1)) then - ! simple duplicate check (requires row segment unsorted ascending to be meaningful) - do k = ia(i), ia(i+1)-1 - if (ja(k) == i) then - if (acsr(k) == 0.0d0) then - write(*,*) 'Zero diagonal at row', i - stop 'ERROR: zero diagonal' - end if - found = .true. - end if - end do - end if - if (.not. found) then - missing_diag = missing_diag + 1 - if (missing_diag <= 10) write(*,*) 'Missing diagonal at row', i - end if - end do - if (missing_diag > 0) stop 'ERROR: missing diagonals' - !------------------------------------------------------------------- - phase = 11 ! Only reordering and symbolic factorization - - call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, & - idum, nrhs, iparm, msglvl, ddum, ddum, error) - - if (error /= 0) then - write(*,*) 'the following error was detected: ', error - goto 1000 - end if - - phase = 22 ! only factorization - call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, & - idum, nrhs, iparm, msglvl, ddum, ddum, error) - if (error /= 0) then - write(*,*) 'the following error was detected: ', error - goto 1000 - endif - - ! back substitution and iterative refinement - iparm(8) = 2 ! max numbers of iterative refinement steps - phase = 33 ! only solving - call pardiso (pt,maxfct,mnum,mtype,phase,n,acsr,ia,ja, & - idum, nrhs, iparm, msglvl, b, x, error) - write(*,*) 'solve completed ... ' - if (error /= 0) then - write(*,*) 'the following error was detected: ', error - goto 1000 - endif - -1000 continue - ! termination and release of memory - phase = -1 ! release internal memory - call pardiso (pt,maxfct,mnum,mtype,phase,n,ddum,idum,idum, & - idum, nrhs, iparm, msglvl, ddum, ddum, error1) - - if (error1 /= 0) then - write(*,*) 'the following release error was detected: ', & - error1 - stop 1 - endif - - if ( error /= 0 ) stop 1 - - end subroutine solve_pardiso - - !------------------------------------------------------------------- - ! END PARDISO Direct Solver - !------------------------------------------------------------------- - -end module sparse_solver diff --git a/src/heatflow/mod_tempdep.f90 b/src/heatflow/mod_tempdep.f90 old mode 100755 new mode 100644 From 83c6bfe6bceb20c7e0fbc6554e88cd52413ac419 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 2 Dec 2025 21:24:38 +0000 Subject: [PATCH 23/28] Change blas and lapack to work with threading --- Makefile | 45 +++++++++++++++------------------------------ 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/Makefile b/Makefile index 840a5a8..43c8a8b 100644 --- a/Makefile +++ b/Makefile @@ -9,34 +9,22 @@ SRC_DIR := ./src BUILD_DIR := ./obj BIN_DIR := ./bin -# Compiler +# Compiler (gfortran with OpenMP for threading) FC := gfortran # Core count NCORES := $(shell nproc) -# MKL -MKLROOT ?= /opt/intel/oneapi/mkl/latest -MKL_LIB_DIR := $(MKLROOT)/lib/intel64 -MKL_INCLUDE := $(MKLROOT)/include -MKL_FLAGS := -L$(MKL_LIB_DIR) -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl - -# PETSc (manual fallback if petsc-config missing) -PETSC_PREFIX := /usr/lib/petscdir/petsc3.15/x86_64-linux-gnu-real -PETSC_FINCLUDE := /usr/share/petsc/3.15/include -PETSC_AINCLUDE := $(PETSC_PREFIX)/include -PETSC_LIBDIR := $(PETSC_PREFIX)/lib - -PETSC_CONFIG := $(shell command -v petsc-config 2>/dev/null) -ifeq ($(PETSC_CONFIG),) - PETSC_INC := -I$(PETSC_FINCLUDE) -I$(PETSC_AINCLUDE) - PETSC_LIB := -L$(PETSC_LIBDIR) -lpetsc - PETSC_NOTE := (PETSc manual paths) -else - PETSC_INC := $(shell petsc-config --cflags) - PETSC_LIB := $(shell petsc-config --libs) - PETSC_NOTE := (petsc-config) -endif +# Detect conda environment for BLAS/LAPACK (fallback if no system libs) +CONDA_PREFIX ?= $(shell conda info --base 2>/dev/null || echo /home/hm556/miniforge3) + +# PETSc (system installation) +PETSC_INC := -I/usr/share/petsc/3.15/include -I/usr/lib/petscdir/petsc3.15/x86_64-linux-gnu-real/include +PETSC_LIB := -L/usr/lib/petscdir/petsc3.15/x86_64-linux-gnu-real/lib -lpetsc -Wl,-rpath,/usr/lib/petscdir/petsc3.15/x86_64-linux-gnu-real/lib +PETSC_NOTE := (system PETSc 3.15) + +# Use OpenBLAS for multi-threaded BLAS/LAPACK (better than reference BLAS/ATLAS) +BLAS_FLAGS := -lopenblas -lgomp -lpthread -lm # Flags OPTFLAGS := -O3 @@ -44,8 +32,8 @@ OMPFLAGS := -fopenmp WARNFLAGS := -Wall MODDIR_FLAG := -J$(BUILD_DIR) -FFLAGS := -cpp $(OPTFLAGS) $(OMPFLAGS) $(WARNFLAGS) -I$(MKL_INCLUDE) $(PETSC_INC) $(MODDIR_FLAG) -DEBUGFLAGS := -cpp -O0 -g -fcheck=all -fbacktrace -ffpe-trap=invalid,zero,overflow,underflow -fbounds-check -I$(MKL_INCLUDE) $(PETSC_INC) $(MODDIR_FLAG) +FFLAGS := -cpp $(OPTFLAGS) $(OMPFLAGS) $(WARNFLAGS) $(PETSC_INC) $(MODDIR_FLAG) +DEBUGFLAGS := -cpp -O0 -g -fcheck=all -fbacktrace -ffpe-trap=invalid,zero,overflow,underflow -fbounds-check $(PETSC_INC) $(MODDIR_FLAG) # Program NAME := ThermalFlow.x @@ -62,8 +50,6 @@ SRCS := \ heatflow/mod_material.f90 \ heatflow/mod_hmatrix.f90 \ heatflow/mod_init_evolve.f90 \ - heatflow/mkl_pardiso.f90 \ - heatflow/mod_sparse_solver.f90 \ heatflow/mod_petsc_solver.f90 \ heatflow/mod_boundary.f90 \ heatflow/mod_heating.f90 \ @@ -96,15 +82,14 @@ $(BUILD_DIR)/heatflow.o: $(SRC_DIR)/heatflow.f90 | $(BUILD_DIR) # Link (single definition) $(TARGET): $(BIN_DIR) $(OBJS) - $(FC) $(OPTFLAGS) $(OMPFLAGS) $(OBJS) -o $@ $(MKL_FLAGS) $(PETSC_LIB) -Wl,-rpath,$(PETSC_LIBDIR) + $(FC) $(OPTFLAGS) $(OMPFLAGS) $(OBJS) -o $@ $(BLAS_FLAGS) $(PETSC_LIB) debug: FFLAGS = $(DEBUGFLAGS) debug: clean show $(TARGET) run: $(TARGET) OMP_NUM_THREADS=$(NCORES) \ - MKL_NUM_THREADS=$(NCORES) \ - MKL_DYNAMIC=FALSE \ + OPENBLAS_NUM_THREADS=$(NCORES) \ OMP_PROC_BIND=spread \ OMP_PLACES=cores \ $< $(RUN_ARGS) From 2799327a49bb1a95ad9a8cfadb54b9ea75338794 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 2 Dec 2025 21:25:12 +0000 Subject: [PATCH 24/28] Add ability to change preconditioner and add AMG --- src/heatflow/mod_petsc_solver.f90 | 41 ++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90 index f5e37b7..54161af 100644 --- a/src/heatflow/mod_petsc_solver.f90 +++ b/src/heatflow/mod_petsc_solver.f90 @@ -7,6 +7,14 @@ module petsc_solver private public :: petsc_init, petsc_finalize, solve_petsc_csr, petsc_cleanup + ! ===== PRECONDITIONER SELECTION ===== + ! Change this to switch between preconditioners: + ! 'GAMG' = Algebraic Multigrid (best for elliptic PDEs, 10-20x faster) + ! 'ILU' = Incomplete LU (good general purpose, robust) + ! 'LU' = Direct solver (most robust, uses more memory) + character(len=10), parameter :: PRECONDITIONER = 'GAMG' ! <-- Change here! + ! ==================================== + ! Persistent PETSc objects (reused across timesteps for memory efficiency) Mat, save :: A_saved = PETSC_NULL_MAT Vec, save :: bb_saved = PETSC_NULL_VEC @@ -116,10 +124,35 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) call KSPCreate(PETSC_COMM_SELF, ksp_saved, ierr) call KSPSetOperators(ksp_saved, A_saved, A_saved, ierr) call KSPGetPC(ksp_saved, pc, ierr) - ! Use ILU preconditioner for better conditioning (especially for high conductivity materials) - call PCSetType(pc, PCILU, ierr) ! ILU preconditioner (better than Jacobi) - ! call PCSetType(pc, PCJACOBI, ierr) ! Jacobi preconditioner (simple) - call KSPSetType(ksp_saved, KSPBCGS, ierr) ! BiCGSTAB solver + + ! Select preconditioner based on parameter at top of module + select case (trim(PRECONDITIONER)) + case ('GAMG') + ! Algebraic Multigrid - Best for elliptic PDEs with varying coefficients + ! Optimal O(1) iterations, 10-20x faster than ILU for large problems + call PCSetType(pc, PCGAMG, ierr) + call KSPSetType(ksp_saved, KSPGMRES, ierr) ! GMRES works well with AMG + write(*,'(A)') ' [Solver] Using GAMG (Algebraic Multigrid) preconditioner with GMRES' + + case ('ILU') + ! Incomplete LU - Good general purpose, robust + call PCSetType(pc, PCILU, ierr) + call KSPSetType(ksp_saved, KSPBCGS, ierr) ! BiCGSTAB works well with ILU + write(*,'(A)') ' [Solver] Using ILU preconditioner with BiCGSTAB' + + case ('LU') + ! Direct LU - Most robust, more memory intensive + call PCSetType(pc, PCLU, ierr) + call KSPSetType(ksp_saved, KSPPREONLY, ierr) ! Direct solve + write(*,'(A)') ' [Solver] Using direct LU solver' + + case default + write(*,'(A,A)') ' [Warning] Unknown preconditioner: ', trim(PRECONDITIONER) + write(*,'(A)') ' Defaulting to ILU' + call PCSetType(pc, PCILU, ierr) + call KSPSetType(ksp_saved, KSPBCGS, ierr) + end select + call KSPSetTolerances(ksp_saved, rtol, PETSC_DEFAULT_REAL, & PETSC_DEFAULT_REAL, maxit, ierr) call KSPSetNormType(ksp_saved, KSP_NORM_UNPRECONDITIONED, ierr) From a1137b5ee630f4c7ae028f58680c8405026689f0 Mon Sep 17 00:00:00 2001 From: FHDavies Date: Thu, 15 Jan 2026 13:18:42 +0000 Subject: [PATCH 25/28] Code keeps filling up my computer! I've implemented a compressed binary output transiant output Added a new input flag _CompressedOutput to param.in to enable binary 'stream' output in mod_output.f90. Also added a USER_MANUAL.md to docs/. --- docs/USER_MANUAL.md | 140 ++++++++++++++++++++++++++++++ src/heatflow/mod_inputs.f90 | 11 ++- src/heatflow/mod_output.f90 | 55 ++++++++---- src/heatflow/mod_petsc_solver.f90 | 2 +- 4 files changed, 186 insertions(+), 22 deletions(-) create mode 100644 docs/USER_MANUAL.md diff --git a/docs/USER_MANUAL.md b/docs/USER_MANUAL.md new file mode 100644 index 0000000..c622bc9 --- /dev/null +++ b/docs/USER_MANUAL.md @@ -0,0 +1,140 @@ +# HeatFlow User Manual + +This manual provides a concise guide to configuring and running simulations using the **HeatFlow** software. The software simulates heat transport using finite difference methods, primarily focusing on the Cattaneo (hyperbolic heat equation) and Fourier models. + +## Input Files + +The simulation is controlled by three main input files located in the `inputs/` directory: +1. **`param.in`**: Simulation parameters (time steps, flags, boundary conditions). +2. **`mat.in`**: Material properties. +3. **`system.in`**: Geometry and grid definition. + +### 1. `param.in` (Simulation Parameters) + +This file uses a `KEYWORD = VALUE` format. Comments can be added using `!`. + +#### General Settings +| Keyword | Type | Default | Description | +| :--- | :--- | :--- | :--- | +| `_RunName` | String | `default` | Name of the simulation run. | +| `IVERB` | Integer | `1` | Verbosity level (higher = more output). | +| `ntime` | Integer | `10` | Total number of time steps. | +| `time_step` | Double | `1.0` | Time step size. | +| `freq` | Double | `1.0` | Frequency of the heater. | +| `icattaneo` | Integer | `1` | Switch for Cattaneo term (`1` = On, `0` = Off/Fourier). | +| `isteady` | Integer | `0` | Steady state switch (`1` = Steady state, `0` = Transient). | +| `heattime` | Integer | `0` | Number of steps for which heating is applied (case 2). | +| `TempDepProp`| Integer | `0` | Flag for temperature dependent properties. | + +#### Boundary & Conditions +| Keyword | Type | Default | Description | +| :--- | :--- | :--- | :--- | +| `iboundary` | Integer | `1` | Boundary condition type. | +| `Periodic` | String | `''` | Periodic boundaries. Contains 'x', 'y', or 'z' (e.g., `'xy'`). | +| `kappaBound` | Double | `0.0` | Global boundary thermal conductivity (sets all planes). | +| `kappaBoundx1`...`z2` | Double | `0.0` | Specific boundary conductivity (e.g., `kappaBoundx1` for x=1 plane). | +| `T_System` | Double | `300.0` | Initial system temperature. | +| `T_Bath` | Double | - | Global bath temperature (sets all boundaries boundaries). | +| `T_Bathx1`...`z2` | Double | `T_Bath` | Specific boundary temperatures. | +| `T_BathCG` | Double | `0.0` | Constant gradient bath temperature. | +| `CG_dir` | String | `' '` | Direction for constant gradient (e.g., `'+x'`, `'-y'`). | +| `T_BathCC` | Logical| `F` | Scale constant gradient with DeltaT. | +| `BR` | Double | `1.0` | Bath Ratio (scaling factor). | + +#### Power +| Keyword | Type | Default | Description | +| :--- | :--- | :--- | :--- | +| `power_in` | Double | `0.0` | Power input for the heater. | + +#### Flags (Logical) +All flags default to `.False.`. Set to `.True.` (or `T`) to enable. +- `_Check_Sparse_Full`: Check if simulation is sparse or full. +- `_Check_Stability`: Perform stability check. +- `_Check_Steady_State`: Check for steady state convergence. +- `_WriteToTxt`: Enable writing output to text files. +- `_Percentage_Completion`: Show progress % in output. +- `_Test_Run`: Flag for test runs. +- `_InputTempDis`: Load initial temperature distribution from file. +- `_FullRestart`: Perform a full restart. + +#### Output Control +Defines the region of the grid to write to output. +| Keyword | Type | Default | Description | +| :--- | :--- | :--- | :--- | +| `write_every` | Integer | `1` | Write output every N steps. | +| `start_ix`, `end_ix` | Integer | `1`..`Nx` | X-range for output. | +| `start_iy`, `end_iy` | Integer | `1`..`Ny` | Y-range for output. | +| `start_iz`, `end_iz` | Integer | `1`..`Nz` | Z-range for output. | + +--- + +### 2. `mat.in` (Material Properties) + +Defines the physical properties for each material index used in the system. The file ends with a line containing `0`. + +**Format:** +``` + +keyword = value +... +0 +``` + +| Keyword | Description | +| :--- | :--- | +| `heat_capacity` | Specific heat capacity. | +| `kappa` | Thermal conductivity. | +| `rho` | Density. | +| `tau` | Relaxation time (for Cattaneo). | +| `em` | Emissivity / Parameter (usage depends on physics context). | +| `vel` | Velocity vector (3 components, e.g., `vel = 1.0 0.0 0.0`). | + +**Example:** +``` +1 +heat_capacity = 4200 +kappa = 0.541 +rho = 997 +tau = 1e-12 +vel = 0.0 0.0 0.0 +0 +``` + +--- + +### 3. `system.in` (Geometry/Mesh) + +Defines the simulation grid and the material distribution. + +**Structure:** +1. **Grid Dimensions**: `nx ny nz` +2. **Physical Dimensions**: `Lx Ly Lz` +3. **Grid Data**: A list of `MaterialID:HeaterID` for every cell. + +The file is read in the order: Z-planes, then Y-rows, then X-columns. +Each line in the file (after header) corresponds to one row (X-direction). + +**Example:** +``` +10 10 1 +0.01 0.01 0.001 + +! Z=1, Y=1 Row +1:0 1:0 1:0 1:0 1:0 1:0 1:0 1:0 1:0 1:0 +! Z=1, Y=2 Row +1:0 1:0 ... +``` +- `1:0` means Material ID 1, Heater ID 0 (no heater). +- `1:1` means Material ID 1, Heater ID 1 (active heater). + +## Execution + +Ensure the `inputs/` directory exists with the three required files. Run the executable from the directory containing `inputs/`. + +```bash +./ThermalFlow.x +``` +or via `fpm`: +```bash +fpm run --profile release +``` diff --git a/src/heatflow/mod_inputs.f90 b/src/heatflow/mod_inputs.f90 index 553e925..d26f629 100644 --- a/src/heatflow/mod_inputs.f90 +++ b/src/heatflow/mod_inputs.f90 @@ -91,7 +91,7 @@ module inputs integer(int12) :: start_ix, end_ix, start_iy, end_iy, start_iz, end_iz, TempDepProp, heated_steps ! flags logical :: Check_Sparse_Full, Check_Stability, Check_Steady_State - logical :: WriteToTxt, LPercentage, InputTempDis + logical :: WriteToTxt, LPercentage, InputTempDis, CompressedOutput logical :: Test_Run = .FALSE., FullRestart = .FALSE. ! Name of simiulation run @@ -204,7 +204,7 @@ end subroutine read_all_files subroutine read_param(unit) implicit none integer:: unit, Reason - integer,dimension(45)::readvar + integer,dimension(46)::readvar character(1024)::buffer readvar(:)=0 @@ -221,6 +221,7 @@ subroutine read_param(unit) RunName = 'default' RunName = trim(adjustl(RunName)) WriteToTxt = .FALSE. + CompressedOutput = .FALSE. ntime = 10 heated_steps = 0 write_every = 1 @@ -325,6 +326,7 @@ subroutine read_param(unit) CALL assignD(buffer,"BR",BR,readvar(43)) CALL assignL(buffer,"T_BathCC",T_BathCC,readvar(44)) CALL assignS(buffer,"CG_dir",CG_dir,readvar(45)) + CALL assignL(buffer,"_CompressedOutput",CompressedOutput,readvar(46)) !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ end do @@ -520,6 +522,10 @@ subroutine check_param(readvar,n) readvar(39) = 1 end if + if (readvar(46) .eq. 0) then + readvar(46) = 1 + end if + if (any(readvar.eq.0)) then write(6,*) write(6,'(A43)') '###############################' @@ -554,6 +560,7 @@ subroutine check_param(readvar,n) write(6,'(A35,L1)') ' _FullRestart = ', FullRestart write(6,'(A35,A)') ' _RunName = ', trim(RunName) write(6,'(A35,L1)') ' _WriteToTxt = ', WriteToTxt + write(6,'(A35,L1)') ' _CompressedOutput = ', CompressedOutput write(6,'(A35,I12)') ' ntime = ', ntime write(6,'(A35,I12)') ' heattime = ', heated_steps write(6,'(A35,I12)') ' write_every = ', write_every diff --git a/src/heatflow/mod_output.f90 b/src/heatflow/mod_output.f90 index 0235e4e..c518761 100644 --- a/src/heatflow/mod_output.f90 +++ b/src/heatflow/mod_output.f90 @@ -44,7 +44,7 @@ module output use constants, only: real12, int12, TINY, fields use inputs, only: nx,ny,nz, time_step, grid, NA, Check_Steady_State, ntime, WriteToTxt - use inputs, only: Test_Run, freq, RunName, FullRestart, IVERB, write_every + use inputs, only: Test_Run, freq, RunName, FullRestart, IVERB, write_every, CompressedOutput use inputs, only: start_ix, end_ix, start_iy, end_iy, start_iz, end_iz use globe_data, only: Temp_p,Temp_pp, heat, heated_volume, logname implicit none @@ -92,11 +92,17 @@ subroutine data_write(itime) ! find most recent log file and open it !--------------------------------------- CALL last_log(logname,outdir) - open(logunit,file=logname) - ! print*, logunit - ! print*, logname - write(logunit,*) real((itime-1)*(time_step)), & - (Temp_cur(start_ix:end_ix, start_iy:end_iy, start_iz:end_iz)) + if (CompressedOutput) then + open(logunit,file=logname, status='unknown', access='stream', position='append') + write(logunit) real((itime-1)*(time_step), kind=real12) + write(logunit) (Temp_cur(start_ix:end_ix, start_iy:end_iy, start_iz:end_iz)) + else + open(logunit,file=logname) + ! print*, logunit + ! print*, logname + write(logunit,*) real((itime-1)*(time_step)), & + (Temp_cur(start_ix:end_ix, start_iy:end_iy, start_iz:end_iz)) + end if close(logunit) !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ end if @@ -110,17 +116,23 @@ subroutine data_write(itime) ! write out to log file !--------------------------------------- if (.not. Test_run) then - if (WriteToTxt) then - if (mod(itime, write_every) .eq. 0) then - write(*, *) 'Writing Temperature difference to file' - ! print*, logunit - ! print*, logname - open(logunit,file=logname, status='old', position='append') - write(logunit,*) real((itime-1)*(time_step)), & - (Temp_cur(start_ix:end_ix, start_iy:end_iy, start_iz:end_iz)) - close(logunit) - end if - endif + if (mod(itime, write_every) .eq. 0) then + if (CompressedOutput) then + write(*, *) 'Writing Temperature (Compressed) to file' + open(logunit,file=logname, status='unknown', access='stream', position='append') + write(logunit) real((itime-1)*(time_step), kind=real12) + write(logunit) (Temp_cur(start_ix:end_ix, start_iy:end_iy, start_iz:end_iz)) + close(logunit) + elseif (WriteToTxt) then + write(*, *) 'Writing Temperature difference to file' + ! print*, logunit + ! print*, logname + open(logunit,file=logname, status='old', position='append') + write(logunit,*) real((itime-1)*(time_step)), & + (Temp_cur(start_ix:end_ix, start_iy:end_iy, start_iz:end_iz)) + close(logunit) + end if + end if end if !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -202,8 +214,13 @@ subroutine last_log(logname,outdir) i = 0 flag=.true. do while (flag) - write(logname, '(A,A,I2.2)') trim(adjustl(outdir)) // 'output_' // & - trim(adjustl(RunName)),'_', i + if (CompressedOutput) then + write(logname, '(A,A,I2.2,A)') trim(adjustl(outdir)) // 'output_' // & + trim(adjustl(RunName)),'_', i, '.bin' + else + write(logname, '(A,A,I2.2)') trim(adjustl(outdir)) // 'output_' // & + trim(adjustl(RunName)),'_', i + endif inquire(file=logname, exist=flag) i = i+1 end do diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90 index 54161af..a65e01d 100644 --- a/src/heatflow/mod_petsc_solver.f90 +++ b/src/heatflow/mod_petsc_solver.f90 @@ -12,7 +12,7 @@ module petsc_solver ! 'GAMG' = Algebraic Multigrid (best for elliptic PDEs, 10-20x faster) ! 'ILU' = Incomplete LU (good general purpose, robust) ! 'LU' = Direct solver (most robust, uses more memory) - character(len=10), parameter :: PRECONDITIONER = 'GAMG' ! <-- Change here! + character(len=10), parameter :: PRECONDITIONER = 'ILU' ! <-- Change here! ! ==================================== ! Persistent PETSc objects (reused across timesteps for memory efficiency) From c6cf9297d21a1163eb47349e5fae93ec68aeccd3 Mon Sep 17 00:00:00 2001 From: HarryMclean Date: Thu, 26 Mar 2026 19:02:06 +0000 Subject: [PATCH 26/28] Make PETsc dynamically discovered --- Makefile | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 43c8a8b..6a0f200 100644 --- a/Makefile +++ b/Makefile @@ -18,10 +18,34 @@ NCORES := $(shell nproc) # Detect conda environment for BLAS/LAPACK (fallback if no system libs) CONDA_PREFIX ?= $(shell conda info --base 2>/dev/null || echo /home/hm556/miniforge3) -# PETSc (system installation) +# PETSc (discover dynamically when possible) +PKG_CONFIG ?= pkg-config +PETSC_PKG_CFLAGS := $(shell $(PKG_CONFIG) --cflags petsc 2>/dev/null || $(PKG_CONFIG) --cflags PETSc 2>/dev/null) +PETSC_PKG_LIBS := $(shell $(PKG_CONFIG) --libs petsc 2>/dev/null || $(PKG_CONFIG) --libs PETSc 2>/dev/null) + +ifdef PETSC_DIR +PETSC_DIR_INC := -I$(PETSC_DIR)/include +ifdef PETSC_ARCH +PETSC_DIR_INC += -I$(PETSC_DIR)/$(PETSC_ARCH)/include +PETSC_DIR_LIB := -L$(PETSC_DIR)/$(PETSC_ARCH)/lib -Wl,-rpath,$(PETSC_DIR)/$(PETSC_ARCH)/lib +endif +endif + +ifeq ($(strip $(PETSC_PKG_CFLAGS)),) +PETSC_INC := $(PETSC_DIR_INC) +PETSC_LIB := $(PETSC_DIR_LIB) -lpetsc +PETSC_NOTE := (PETSc from PETSC_DIR/PETSC_ARCH) +else +PETSC_INC := $(PETSC_PKG_CFLAGS) +PETSC_LIB := $(PETSC_PKG_LIBS) +PETSC_NOTE := (PETSc via pkg-config) +endif + +ifeq ($(strip $(PETSC_INC)),) PETSC_INC := -I/usr/share/petsc/3.15/include -I/usr/lib/petscdir/petsc3.15/x86_64-linux-gnu-real/include PETSC_LIB := -L/usr/lib/petscdir/petsc3.15/x86_64-linux-gnu-real/lib -lpetsc -Wl,-rpath,/usr/lib/petscdir/petsc3.15/x86_64-linux-gnu-real/lib -PETSC_NOTE := (system PETSc 3.15) +PETSC_NOTE := (legacy PETSc 3.15 fallback) +endif # Use OpenBLAS for multi-threaded BLAS/LAPACK (better than reference BLAS/ATLAS) BLAS_FLAGS := -lopenblas -lgomp -lpthread -lm From d922e0eca25724e6f604fa3f6052d3d064a7cca1 Mon Sep 17 00:00:00 2001 From: HarryMclean Date: Thu, 26 Mar 2026 20:19:21 +0000 Subject: [PATCH 27/28] Implement MPI solver run with "OMP_NUM_THREADS=1 OPENBLAS_NUM_THREADS=1 mpiexec -n 2 ../HeatFlow/bin/ThermalFlow.x" --- CHANGELOG | 9 + Makefile | 48 +++- src/heatflow.f90 | 16 +- src/heatflow/mod_inputs.f90 | 198 ++++++++------- src/heatflow/mod_petsc_solver.f90 | 410 +++++++++++++++++------------- src/heatflow/mod_setup.f90 | 52 ++-- 6 files changed, 442 insertions(+), 291 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 794b181..35a47e5 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -12,6 +12,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed ### Security +##[2.0.0] - 2026-03-26 +## Added +-Add Petsc solver +-Add new constant gradient boundary conditions +-Add new compressed output flag + + + + ##[1.0.0] - 2025-07-28 ## Added -Additional boundary conditions diff --git a/Makefile b/Makefile index 6a0f200..d7715eb 100644 --- a/Makefile +++ b/Makefile @@ -9,8 +9,17 @@ SRC_DIR := ./src BUILD_DIR := ./obj BIN_DIR := ./bin -# Compiler (gfortran with OpenMP for threading) -FC := gfortran +# Compiler (prefer system MPI wrapper for PETSc builds, allow user override) +SYSTEM_PATH := PATH=/usr/bin:/bin +ifeq ($(origin FC), default) +ifneq ($(wildcard /usr/bin/mpifort),) +FC := env $(SYSTEM_PATH) /usr/bin/mpifort +else ifneq ($(wildcard /usr/bin/gfortran),) +FC := env $(SYSTEM_PATH) /usr/bin/gfortran +else +FC := gfortran +endif +endif # Core count NCORES := $(shell nproc) @@ -19,7 +28,11 @@ NCORES := $(shell nproc) CONDA_PREFIX ?= $(shell conda info --base 2>/dev/null || echo /home/hm556/miniforge3) # PETSc (discover dynamically when possible) -PKG_CONFIG ?= pkg-config +ifneq ($(wildcard /usr/bin/pkg-config),) +PKG_CONFIG := env $(SYSTEM_PATH) /usr/bin/pkg-config +else +PKG_CONFIG := pkg-config +endif PETSC_PKG_CFLAGS := $(shell $(PKG_CONFIG) --cflags petsc 2>/dev/null || $(PKG_CONFIG) --cflags PETSc 2>/dev/null) PETSC_PKG_LIBS := $(shell $(PKG_CONFIG) --libs petsc 2>/dev/null || $(PKG_CONFIG) --libs PETSc 2>/dev/null) @@ -47,8 +60,22 @@ PETSC_LIB := -L/usr/lib/petscdir/petsc3.15/x86_64-linux-gnu-real/lib -lpetsc -W PETSC_NOTE := (legacy PETSc 3.15 fallback) endif -# Use OpenBLAS for multi-threaded BLAS/LAPACK (better than reference BLAS/ATLAS) -BLAS_FLAGS := -lopenblas -lgomp -lpthread -lm +# BLAS/LAPACK backend +OPENBLAS_LIBS := $(shell $(PKG_CONFIG) --libs openblas 2>/dev/null) +CONDA_BLAS_LIB := $(firstword $(wildcard $(CONDA_PREFIX)/lib/libblas.so.3 $(CONDA_PREFIX)/lib/libblas.so)) +CONDA_LAPACK_LIB := $(firstword $(wildcard $(CONDA_PREFIX)/lib/liblapack.so.3 $(CONDA_PREFIX)/lib/liblapack.so)) +ifeq ($(strip $(OPENBLAS_LIBS)),) +ifneq ($(strip $(CONDA_BLAS_LIB)$(CONDA_LAPACK_LIB)),) +BLAS_FLAGS := -Wl,--disable-new-dtags -Wl,-rpath,$(CONDA_PREFIX)/lib -Wl,--no-as-needed $(CONDA_BLAS_LIB) $(CONDA_LAPACK_LIB) -Wl,--as-needed -lpthread -lm +BLAS_NOTE := (OpenBLAS from CONDA_PREFIX) +else +BLAS_FLAGS := -lblas -llapack -lpthread -lm +BLAS_NOTE := (system BLAS/LAPACK fallback) +endif +else +BLAS_FLAGS := $(OPENBLAS_LIBS) -lpthread -lm +BLAS_NOTE := (OpenBLAS via pkg-config) +endif # Flags OPTFLAGS := -O3 @@ -86,12 +113,14 @@ SRCS := \ OBJS := $(addprefix $(BUILD_DIR)/,$(notdir $(SRCS:.f90=.o))) +.NOTPARALLEL: + .PHONY: all debug clean distclean run help show all: show $(TARGET) show: - @printf 'Building %s %s\n' '$(NAME)' '$(PETSC_NOTE)' + @printf 'Building %s %s %s\n' '$(NAME)' '$(PETSC_NOTE)' '$(BLAS_NOTE)' $(BIN_DIR) $(BUILD_DIR): mkdir -p $@ @@ -112,8 +141,9 @@ debug: FFLAGS = $(DEBUGFLAGS) debug: clean show $(TARGET) run: $(TARGET) - OMP_NUM_THREADS=$(NCORES) \ - OPENBLAS_NUM_THREADS=$(NCORES) \ + mpiexec -n $(NCORES) \ + OMP_NUM_THREADS=1 \ + OPENBLAS_NUM_THREADS=1 \ OMP_PROC_BIND=spread \ OMP_PLACES=cores \ $< $(RUN_ARGS) @@ -130,7 +160,7 @@ help: @echo "Targets:" @echo " make / make all - build optimized" @echo " make debug - debug build" - @echo " make run - run with all cores" + @echo " make run - run distributed across all cores via MPI" @echo " make clean - remove objects/modules" @echo " make distclean - remove executable" @echo "Variables:" diff --git a/src/heatflow.f90 b/src/heatflow.f90 index 6817c67..cc16531 100644 --- a/src/heatflow.f90 +++ b/src/heatflow.f90 @@ -28,7 +28,7 @@ program HEATFLOW_V0_3 use evolution, only: simulate use setup, only: set_global_variables use INITIAL, only: initial_evolve - use petsc_solver, only: petsc_init, petsc_finalize + use petsc_solver, only: petsc_init, petsc_finalize, petsc_is_root implicit none real(real12) :: cpustart, cpuend, cpustart2, progress @@ -47,7 +47,7 @@ program HEATFLOW_V0_3 !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^! ! give feedback to user that code has begun - write(*,*) 'Setup initialising' + if (petsc_is_root()) write(*,*) 'Setup initialising' !-------------------------------------------------------------! ! Read parameters from input file and set global variables ...! @@ -66,7 +66,7 @@ program HEATFLOW_V0_3 ! give feedback to user that main simulation is begining - write(*,*) 'Setup complete, running simulation' + if (petsc_is_root()) write(*,*) 'Setup complete, running simulation' !-------------------------------------------------------------! ! run simulation for 'ntime' time steps ! @@ -74,7 +74,7 @@ program HEATFLOW_V0_3 do itime=1,ntime - if (iverb.eq.0) then + if (petsc_is_root() .and. iverb.eq.0) then if (Lpercentage) then progress = real(itime)/real(ntime)*100.0 write(*,'(A,A,F12.4,A)', advance = 'no') achar(13)& @@ -93,8 +93,8 @@ program HEATFLOW_V0_3 ! Write results - CALL data_write(itime) - if (IVERB.ge.3) CALL final_print + if (petsc_is_root()) CALL data_write(itime) + if (petsc_is_root() .and. IVERB.ge.3) CALL final_print end do CALL petsc_finalize() @@ -105,11 +105,11 @@ program HEATFLOW_V0_3 ! calculate end time and print to user ! !-------------------------------------------------------------! CALL cpu_time(cpuend) - write(*,'(A,F12.6)') ' time=', cpuend-cpustart + if (petsc_is_root()) write(*,'(A,F12.6)') ' time=', cpuend-cpustart !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^! ! give feedback to user that code has ended - write(*,*) 'all done' + if (petsc_is_root()) write(*,*) 'all done' end program HEATFLOW_V0_3 diff --git a/src/heatflow/mod_inputs.f90 b/src/heatflow/mod_inputs.f90 index d26f629..7f1fd9b 100644 --- a/src/heatflow/mod_inputs.f90 +++ b/src/heatflow/mod_inputs.f90 @@ -75,6 +75,7 @@ module inputs use constants, only: real12, int12 use constructions, only: heatblock, material + use mpi implicit none integer :: unit, newunit @@ -367,18 +368,24 @@ end subroutine read_param !!!################################################################################################# subroutine check_param(readvar,n) implicit none - integer::n,i + integer::n,i,ierr,comm_rank integer,dimension(n)::readvar + logical :: is_root ! Not currently in use + call MPI_Comm_rank(MPI_COMM_WORLD, comm_rank, ierr) + is_root = (comm_rank == 0) + if(any(readvar.gt.1)) then - write(6,*) - write(6,'(A43)') '###############################' - write(6,'(A43)') '########## ERROR ##########' - write(6,'(A43)') '###############################' - write(6,*) - write(6,'(A)') ' --- Error in subroutine "check_param" ---' - write(6,'(A)') ' --- ERROR: same KEYWORD apears more than once ---' + if (is_root) then + write(6,*) + write(6,'(A43)') '###############################' + write(6,'(A43)') '########## ERROR ##########' + write(6,'(A43)') '###############################' + write(6,*) + write(6,'(A)') ' --- Error in subroutine "check_param" ---' + write(6,'(A)') ' --- ERROR: same KEYWORD apears more than once ---' + end if stop end if @@ -389,13 +396,15 @@ subroutine check_param(readvar,n) !------------------------------------------------------------------------------------ ErrKB:if (((any(readvar(9:11).eq.0)) .or. any(readvar(29:31).eq.0)) & .and. (readvar(28) .eq. 0) )then - write(6,*) - write(6,'(A43)') '###############################' - write(6,'(A43)') '########## ERORR ##########' - write(6,'(A43)') '###############################' - write(6,*) - write(6,'(A)') ' --- Error in subroutine "check_param" ---' - write(6,'(A)') ' --- ERROR: KappaBoundx,y,z and KappaBound are not set ---' + if (is_root) then + write(6,*) + write(6,'(A43)') '###############################' + write(6,'(A43)') '########## ERORR ##########' + write(6,'(A43)') '###############################' + write(6,*) + write(6,'(A)') ' --- Error in subroutine "check_param" ---' + write(6,'(A)') ' --- ERROR: KappaBoundx,y,z and KappaBound are not set ---' + end if stop end if ErrKB !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -405,13 +414,15 @@ subroutine check_param(readvar,n) WarKBO:if (((all(readvar(9:11).eq.1)) .or. all(readvar(29:31).eq.1)) & .and. (readvar(28) .eq. 0) )then - write(6,*) - write(6,'(A43)') '###############################' - write(6,'(A43)') '########## Warning ##########' - write(6,'(A43)') '###############################' - write(6,*) - write(6,'(A)') ' --- Warning in subroutine "check_param" ---' - write(6,'(A)') ' --- Warning: KappaBound is not set ---' + if (is_root) then + write(6,*) + write(6,'(A43)') '###############################' + write(6,'(A43)') '########## Warning ##########' + write(6,'(A43)') '###############################' + write(6,*) + write(6,'(A)') ' --- Warning in subroutine "check_param" ---' + write(6,'(A)') ' --- Warning: KappaBound is not set ---' + end if readvar(28) = 1 end if WarKBO @@ -421,13 +432,15 @@ subroutine check_param(readvar,n) !------------------------------------------------------------------------------------ WarKB:if (((any(readvar(9:11).eq.0)) .or. any(readvar(29:31).eq.0)) & .and. (readvar(28) .eq. 1) )then - write(6,*) - write(6,'(A43)') '###############################' - write(6,'(A43)') '########## WARNING ##########' - write(6,'(A43)') '###############################' - write(6,*) - write(6,'(A)') ' --- Warning in subroutine "check_param" ---' - write(6,'(A)') ' --- WARNING: KappaBoundx,y,z not set, using KappaBound ---' + if (is_root) then + write(6,*) + write(6,'(A43)') '###############################' + write(6,'(A43)') '########## WARNING ##########' + write(6,'(A43)') '###############################' + write(6,*) + write(6,'(A)') ' --- Warning in subroutine "check_param" ---' + write(6,'(A)') ' --- WARNING: KappaBoundx,y,z not set, using KappaBound ---' + end if kappaBoundx1 = KappaBound kappaBoundy1 = KappaBound kappaBoundz1 = KappaBound @@ -438,39 +451,45 @@ subroutine check_param(readvar,n) !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ elseif (((any(readvar(9:11).eq.0)) .or. any(readvar(29:31).eq.0)) & .or. (readvar(28) .gt. 0) ) then - write(6,*) - write(6,'(A43)') '###############################' - write(6,'(A43)') '########## WARNING ##########' - write(6,'(A43)') '###############################' - write(6,*) - write(6,'(A)') ' --- Warning in subroutine "check_param" ---' - write(6,'(A)') ' --- WARNING: Periodic Boundry set, set Kappa are ignored ---' + if (is_root) then + write(6,*) + write(6,'(A43)') '###############################' + write(6,'(A43)') '########## WARNING ##########' + write(6,'(A43)') '###############################' + write(6,*) + write(6,'(A)') ' --- Warning in subroutine "check_param" ---' + write(6,'(A)') ' --- WARNING: Periodic Boundry set, set Kappa are ignored ---' + end if end if PB !------------------------------------------------------------------------------------ ! warning about missing bath temps. reassine to T_Bath !------------------------------------------------------------------------------------ if ((readvar(42) .eq. 1) .and. (T_BathCG .gt. 0)) then - write(6,*) - write(6,'(A43)') '###############################' - write(6,'(A43)') '########## WARNING ##########' - write(6,'(A43)') '###############################' - write(6,*) - write(6,'(A)') ' --- Warning in subroutine "check_param" ---' - write(6,'(A)') ' --- WARNING: T_BathCG set T_Bath/ T_Bath x,y,z will not be used ---' + if (is_root) then + write(6,*) + write(6,'(A43)') '###############################' + write(6,'(A43)') '########## WARNING ##########' + write(6,'(A43)') '###############################' + write(6,*) + write(6,'(A)') ' --- Warning in subroutine "check_param" ---' + write(6,'(A)') ' --- WARNING: T_BathCG set T_Bath/ T_Bath x,y,z will not be used ---' + end if !set all T_Bath value checks to 1 readvar(20:25) = 1 readvar(27) = 1 end if WarBath:if ( any(readvar(20:25).eq.0) ) then - write(6,*) - write(6,'(A43)') '###############################' - write(6,'(A43)') '########## WARNING ##########' - write(6,'(A43)') '###############################' - write(6,*) - write(6,'(A)') ' --- Warning in subroutine "check_param" ---' - write(6,'(A)') ' --- WARNING: T_Bath x,y,z not set T_Bath will be used ---' + if (is_root) then + write(6,*) + write(6,'(A43)') '###############################' + write(6,'(A43)') '########## WARNING ##########' + write(6,'(A43)') '###############################' + write(6,*) + write(6,'(A)') ' --- Warning in subroutine "check_param" ---' + write(6,'(A)') ' --- WARNING: T_Bath x,y,z not set T_Bath will be used ---' + end if T_Bathx1 = T_Bath T_Bathx2 = T_Bath T_Bathy1 = T_Bath @@ -481,13 +500,15 @@ subroutine check_param(readvar,n) !Check if T_BathCG less than 0 if ((readvar(42) .eq. 1) .and. (T_BathCG .lt. 0.0_real12)) then - write(6,*) - write(6,'(A43)') '###############################' - write(6,'(A43)') '########## WARNING ##########' - write(6,'(A43)') '###############################' - write(6,*) - write(6,'(A)') ' --- Warning in subroutine "check_param" ---' - write(6,'(A)') ' --- WARNING: T_BathCG is negative, are you sure you? ---' + if (is_root) then + write(6,*) + write(6,'(A43)') '###############################' + write(6,'(A43)') '########## WARNING ##########' + write(6,'(A43)') '###############################' + write(6,*) + write(6,'(A)') ' --- Warning in subroutine "check_param" ---' + write(6,'(A)') ' --- WARNING: T_BathCG is negative, are you sure you? ---' + end if end if if (readvar(42) .eq. 0) then T_BathCG = 0 @@ -500,25 +521,29 @@ subroutine check_param(readvar,n) ! Further warnings !------------------------------------------------------------------------------------ if (any(readvar(32:37).eq.0)) then - write(6,*) - write(6,'(A43)') '###############################' - write(6,'(A43)') '########## WARNING ##########' - write(6,'(A43)') '###############################' - write(6,*) - write(6,'(A)') ' --- WARNING in subroutine "check_param" ---' - write(6,'(A)') ' --- WARNING: Some or All output write cells paramters are not defined ---' - write(6,*) ' --- USING: ', 'Start_ix = ', start_ix, ', end_ix = ', end_ix, ', start_iy = ', & + if (is_root) then + write(6,*) + write(6,'(A43)') '###############################' + write(6,'(A43)') '########## WARNING ##########' + write(6,'(A43)') '###############################' + write(6,*) + write(6,'(A)') ' --- WARNING in subroutine "check_param" ---' + write(6,'(A)') ' --- WARNING: Some or All output write cells paramters are not defined ---' + write(6,*) ' --- USING: ', 'Start_ix = ', start_ix, ', end_ix = ', end_ix, ', start_iy = ', & start_iy,', end_iy = ', end_iy, ', start_iz = ', start_iz, ', end_iz = ', end_iz + end if end if if (readvar(39) .eq. 0) then - write(6,*) - write(6,'(A43)') '###############################' - write(6,'(A43)') '########## WARNING ##########' - write(6,'(A43)') '###############################' - write(6,*) - write(6,'(A)') ' --- WARNING in subroutine "check_param" ---' - write(6,'(A)') ' --- WARNING: TempDepProp not set, no action needed ---' + if (is_root) then + write(6,*) + write(6,'(A43)') '###############################' + write(6,'(A43)') '########## WARNING ##########' + write(6,'(A43)') '###############################' + write(6,*) + write(6,'(A)') ' --- WARNING in subroutine "check_param" ---' + write(6,'(A)') ' --- WARNING: TempDepProp not set, no action needed ---' + end if readvar(39) = 1 end if @@ -527,20 +552,21 @@ subroutine check_param(readvar,n) end if if (any(readvar.eq.0)) then - write(6,*) - write(6,'(A43)') '###############################' - write(6,'(A43)') '########## WARNING ##########' - write(6,'(A43)') '###############################' - write(6,*) - write(6,'(A)') ' --- WARNING in subroutine "check_param" ---' - write(6,'(A)') ' --- WARNING: Essential parameters missing ---' - ! Print all indices of readvar that are 0 - do i = 1, size(readvar) - if (readvar(i) == 0) then + if (is_root) then + write(6,*) + write(6,'(A43)') '###############################' + write(6,'(A43)') '########## WARNING ##########' + write(6,'(A43)') '###############################' + write(6,*) + write(6,'(A)') ' --- WARNING in subroutine "check_param" ---' + write(6,'(A)') ' --- WARNING: Essential parameters missing ---' + do i = 1, size(readvar) + if (readvar(i) == 0) then write(6, '(A,I3)') 'Index ', i, ' of readvar is 0' - end if - end do - write(6,*) + end if + end do + write(6,*) + end if end if !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -548,7 +574,7 @@ subroutine check_param(readvar,n) !------------------------------------------------------------------------------------ ! verbos to check for errors !------------------------------------------------------------------------------------ - if(IVERB .ge. 1) then + if(is_root .and. IVERB .ge. 1) then write(6,'(A)') ' vebose printing option' write(6,'(A)') ' running calculation with :' write(6,'(A35,I6)') ' IVERB = ', IVERB diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90 index a65e01d..be8871c 100644 --- a/src/heatflow/mod_petsc_solver.f90 +++ b/src/heatflow/mod_petsc_solver.f90 @@ -2,17 +2,17 @@ module petsc_solver #include "petsc/finclude/petscsys.h" #include "petsc/finclude/petscksp.h" use petscksp - use iso_c_binding + use mpi implicit none private - public :: petsc_init, petsc_finalize, solve_petsc_csr, petsc_cleanup + public :: petsc_init, petsc_finalize, solve_petsc_csr, petsc_cleanup, petsc_is_root, petsc_world_size ! ===== PRECONDITIONER SELECTION ===== ! Change this to switch between preconditioners: ! 'GAMG' = Algebraic Multigrid (best for elliptic PDEs, 10-20x faster) ! 'ILU' = Incomplete LU (good general purpose, robust) ! 'LU' = Direct solver (most robust, uses more memory) - character(len=10), parameter :: PRECONDITIONER = 'ILU' ! <-- Change here! + character(len=10), parameter :: PRECONDITIONER = 'GAMG' ! ==================================== ! Persistent PETSc objects (reused across timesteps for memory efficiency) @@ -22,14 +22,33 @@ module petsc_solver KSP, save :: ksp_saved = PETSC_NULL_KSP logical, save :: initialized = .false. integer, save :: n_saved = 0 + integer, save :: comm_rank_saved = 0 + integer, save :: comm_size_saved = 1 + integer, save :: row_start_saved = 1 + integer, save :: row_end_saved = 0 + PetscInt, allocatable, save :: ia_saved(:), ja_saved(:) + PetscScalar, allocatable, save :: aval_saved(:) + PetscInt, allocatable, save :: diag_nnz_saved(:), offdiag_nnz_saved(:) + integer, allocatable, save :: recvcounts_saved(:), displs_saved(:) + real(8), allocatable, save :: b_local_saved(:), x_local_saved(:) contains subroutine petsc_init() integer :: ierr call PetscInitialize(PETSC_NULL_CHARACTER, ierr) + call MPI_Comm_rank(PETSC_COMM_WORLD, comm_rank_saved, ierr) + call MPI_Comm_size(PETSC_COMM_WORLD, comm_size_saved, ierr) end subroutine petsc_init + logical function petsc_is_root() + petsc_is_root = (comm_rank_saved == 0) + end function petsc_is_root + + integer function petsc_world_size() + petsc_world_size = comm_size_saved + end function petsc_world_size + subroutine petsc_finalize() integer :: ierr call petsc_cleanup() @@ -37,226 +56,273 @@ subroutine petsc_finalize() end subroutine petsc_finalize subroutine petsc_cleanup() - ! Clean up persistent PETSc objects integer :: ierr + if (A_saved /= PETSC_NULL_MAT) call MatDestroy(A_saved, ierr) if (bb_saved /= PETSC_NULL_VEC) call VecDestroy(bb_saved, ierr) if (xx_saved /= PETSC_NULL_VEC) call VecDestroy(xx_saved, ierr) if (ksp_saved /= PETSC_NULL_KSP) call KSPDestroy(ksp_saved, ierr) + if (allocated(ia_saved)) deallocate(ia_saved) + if (allocated(ja_saved)) deallocate(ja_saved) + if (allocated(aval_saved)) deallocate(aval_saved) + if (allocated(diag_nnz_saved)) deallocate(diag_nnz_saved) + if (allocated(offdiag_nnz_saved)) deallocate(offdiag_nnz_saved) + if (allocated(recvcounts_saved)) deallocate(recvcounts_saved) + if (allocated(displs_saved)) deallocate(displs_saved) + if (allocated(b_local_saved)) deallocate(b_local_saved) + if (allocated(x_local_saved)) deallocate(x_local_saved) + A_saved = PETSC_NULL_MAT bb_saved = PETSC_NULL_VEC xx_saved = PETSC_NULL_VEC ksp_saved = PETSC_NULL_KSP initialized = .false. n_saved = 0 + row_start_saved = 1 + row_end_saved = 0 end subroutine petsc_cleanup + subroutine compute_partition(n, rank, nproc, row_start, row_end) + integer, intent(in) :: n, rank, nproc + integer, intent(out) :: row_start, row_end + integer :: base_rows, remainder_rows, local_rows + + base_rows = n / nproc + remainder_rows = mod(n, nproc) + local_rows = base_rows + if (rank < remainder_rows) local_rows = local_rows + 1 + + row_start = rank * base_rows + min(rank, remainder_rows) + 1 + row_end = row_start + local_rows - 1 + end subroutine compute_partition + + subroutine preallocate_local_rows(n, ia, ja) + integer, intent(in) :: n + integer, intent(in) :: ia(:), ja(:) + integer :: local_row, global_row, entry_idx, nlocal + integer :: diag_begin, diag_end + + nlocal = max(0, row_end_saved - row_start_saved + 1) + allocate(diag_nnz_saved(nlocal), offdiag_nnz_saved(nlocal)) + diag_nnz_saved = 0 + offdiag_nnz_saved = 0 + + diag_begin = row_start_saved + diag_end = row_end_saved + do local_row = 1, nlocal + global_row = row_start_saved + local_row - 1 + do entry_idx = ia(global_row), ia(global_row + 1) - 1 + if (ja(entry_idx) >= diag_begin .and. ja(entry_idx) <= diag_end) then + diag_nnz_saved(local_row) = diag_nnz_saved(local_row) + 1 + else + offdiag_nnz_saved(local_row) = offdiag_nnz_saved(local_row) + 1 + end if + end do + end do + end subroutine preallocate_local_rows + + subroutine build_distributed_matrix(n, ia, ja, aval) + integer, intent(in) :: n + integer, intent(in) :: ia(:), ja(:) + real(8), intent(in) :: aval(:) + + PetscInt :: row_idx(1) + PetscInt, allocatable :: cols0(:) + PetscScalar, allocatable :: vals0(:) + integer :: ierr, global_row, local_row, nlocal, row_nnz, max_row_nnz + + call preallocate_local_rows(n, ia, ja) + nlocal = max(0, row_end_saved - row_start_saved + 1) + max_row_nnz = max(1, maxval(ia(2:n + 1) - ia(1:n))) + + call MatCreate(PETSC_COMM_WORLD, A_saved, ierr) + call MatSetSizes(A_saved, nlocal, nlocal, n, n, ierr) + call MatSetType(A_saved, MATAIJ, ierr) + call MatSeqAIJSetPreallocation(A_saved, 0, diag_nnz_saved, ierr) + call MatMPIAIJSetPreallocation(A_saved, 0, diag_nnz_saved, 0, offdiag_nnz_saved, ierr) + + allocate(cols0(max_row_nnz), vals0(max_row_nnz)) + do local_row = 1, nlocal + global_row = row_start_saved + local_row - 1 + row_nnz = ia(global_row + 1) - ia(global_row) + if (row_nnz <= 0) cycle + + row_idx(1) = global_row - 1 + cols0(1:row_nnz) = ja(ia(global_row):ia(global_row + 1) - 1) - 1 + vals0(1:row_nnz) = aval(ia(global_row):ia(global_row + 1) - 1) + call MatSetValues(A_saved, 1, row_idx, row_nnz, cols0, vals0, INSERT_VALUES, ierr) + end do + deallocate(cols0, vals0) + + call MatAssemblyBegin(A_saved, MAT_FINAL_ASSEMBLY, ierr) + call MatAssemblyEnd(A_saved, MAT_FINAL_ASSEMBLY, ierr) + end subroutine build_distributed_matrix + + subroutine update_distributed_matrix(n, ia, ja, aval) + integer, intent(in) :: n + integer, intent(in) :: ia(:), ja(:) + real(8), intent(in) :: aval(:) + + PetscInt :: row_idx(1) + PetscInt, allocatable :: cols0(:) + PetscScalar, allocatable :: vals0(:) + integer :: ierr, global_row, local_row, nlocal, row_nnz, max_row_nnz + + nlocal = max(0, row_end_saved - row_start_saved + 1) + max_row_nnz = max(1, maxval(ia(2:n + 1) - ia(1:n))) + allocate(cols0(max_row_nnz), vals0(max_row_nnz)) + + call MatZeroEntries(A_saved, ierr) + do local_row = 1, nlocal + global_row = row_start_saved + local_row - 1 + row_nnz = ia(global_row + 1) - ia(global_row) + if (row_nnz <= 0) cycle + + row_idx(1) = global_row - 1 + cols0(1:row_nnz) = ja(ia(global_row):ia(global_row + 1) - 1) - 1 + vals0(1:row_nnz) = aval(ia(global_row):ia(global_row + 1) - 1) + call MatSetValues(A_saved, 1, row_idx, row_nnz, cols0, vals0, INSERT_VALUES, ierr) + end do + deallocate(cols0, vals0) + + call MatAssemblyBegin(A_saved, MAT_FINAL_ASSEMBLY, ierr) + call MatAssemblyEnd(A_saved, MAT_FINAL_ASSEMBLY, ierr) + end subroutine update_distributed_matrix + subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) - integer, intent(in) :: n - integer, intent(in) :: ia(:), ja(:) - real(8), intent(in) :: aval(:), b(:) - real(8), intent(inout) :: x(:) - real(8), intent(in) :: rtol - integer, intent(in) :: maxit - - PC :: pc - integer :: ierr, i, row_nz, start_k, its - integer, allocatable :: cols0(:), idx(:), d_nnz(:) - PetscInt :: idx_array(1) - PetscScalar :: val_array(1) - real(8), allocatable :: vals(:) + integer, intent(in) :: n + integer, intent(in) :: ia(:), ja(:) + real(8), intent(in) :: aval(:), b(:) + real(8), intent(inout) :: x(:) + real(8), intent(in) :: rtol + integer, intent(in) :: maxit + + PC :: pc + integer :: ierr, its real(8) :: rnorm logical :: rebuild_needed + PetscScalar, pointer :: vec_array(:) + integer :: nlocal, rank_idx - ! write(*,'(A,I0)') ' [DEBUG] Entered solve_petsc_csr, n=', n - ! call flush(6) - - if (size(ia) /= n+1) stop 'solve_petsc_csr: ia size mismatch' + if (size(ia) /= n + 1) stop 'solve_petsc_csr: ia size mismatch' if (size(b) /= n .or. size(x) /= n) stop 'solve_petsc_csr: vector size mismatch' - ! write(*,'(A)') ' [DEBUG] Size checks passed' - ! call flush(6) - - ! Determine if we need to rebuild the matrix structure rebuild_needed = .false. if (.not. initialized) rebuild_needed = .true. if (n /= n_saved) rebuild_needed = .true. - - ! write(*,'(A,L1)') ' [DEBUG] rebuild_needed=', rebuild_needed - ! call flush(6) - - ! Create PETSc objects on first call or if size changed + + if (.not. rebuild_needed .and. allocated(ia_saved)) then + if (size(ia_saved) /= size(ia) .or. size(ja_saved) /= size(ja) .or. size(aval_saved) /= size(aval)) then + rebuild_needed = .true. + else if (.not. all(ia_saved == ia - 1) .or. .not. all(ja_saved == ja - 1)) then + rebuild_needed = .true. + end if + end if + if (rebuild_needed) then - ! write(*,'(A)') ' [DEBUG] Starting PETSc object creation...' - ! call flush(6) - - ! Clean up old objects if they exist if (initialized) call petsc_cleanup() - - ! write(*,'(A)') ' [DEBUG] Preallocating matrix...' - ! call flush(6) - - ! Preallocate matrix with exact nonzeros per row (saves memory) - allocate(d_nnz(n)) - do i = 1, n - d_nnz(i) = ia(i+1) - ia(i) + + call compute_partition(n, comm_rank_saved, comm_size_saved, row_start_saved, row_end_saved) + nlocal = max(0, row_end_saved - row_start_saved + 1) + + allocate(ia_saved(size(ia)), ja_saved(size(ja)), aval_saved(size(aval))) + ia_saved = ia - 1 + ja_saved = ja - 1 + aval_saved = aval + + call build_distributed_matrix(n, ia, ja, aval) + + allocate(recvcounts_saved(comm_size_saved), displs_saved(comm_size_saved)) + do rank_idx = 0, comm_size_saved - 1 + call compute_partition(n, rank_idx, comm_size_saved, ierr, its) + recvcounts_saved(rank_idx + 1) = max(0, its - ierr + 1) + displs_saved(rank_idx + 1) = ierr - 1 end do - - ! write(*,'(A,I0,A,I0)') ' [DEBUG] Creating matrix: n=', n, ', max_nnz/row=', maxval(d_nnz) - ! call flush(6) - - ! Create matrix with exact preallocation (most memory-efficient) - call MatCreateSeqAIJ(PETSC_COMM_SELF, n, n, 0, d_nnz, A_saved, ierr) - if (ierr /= 0) then - write(0,*) "ERROR: MatCreateSeqAIJ failed with ierr=", ierr - write(0,*) " Matrix size may exceed system limits" - write(0,*) " n=", n, ", nnz=", sum(int(d_nnz,8)), ", max_nnz/row=", maxval(d_nnz) - stop - end if - - deallocate(d_nnz) - - ! Create persistent vectors - call VecCreateSeq(PETSC_COMM_SELF, n, bb_saved, ierr) - call VecCreateSeq(PETSC_COMM_SELF, n, xx_saved, ierr) - - ! Create and configure KSP solver (persistent across timesteps) - call KSPCreate(PETSC_COMM_SELF, ksp_saved, ierr) + + allocate(b_local_saved(max(1, nlocal)), x_local_saved(max(1, nlocal))) + + call VecCreateMPI(PETSC_COMM_WORLD, nlocal, n, bb_saved, ierr) + call VecDuplicate(bb_saved, xx_saved, ierr) + + call KSPCreate(PETSC_COMM_WORLD, ksp_saved, ierr) call KSPSetOperators(ksp_saved, A_saved, A_saved, ierr) + call KSPSetInitialGuessNonzero(ksp_saved, PETSC_TRUE, ierr) call KSPGetPC(ksp_saved, pc, ierr) - - ! Select preconditioner based on parameter at top of module + select case (trim(PRECONDITIONER)) case ('GAMG') - ! Algebraic Multigrid - Best for elliptic PDEs with varying coefficients - ! Optimal O(1) iterations, 10-20x faster than ILU for large problems call PCSetType(pc, PCGAMG, ierr) - call KSPSetType(ksp_saved, KSPGMRES, ierr) ! GMRES works well with AMG - write(*,'(A)') ' [Solver] Using GAMG (Algebraic Multigrid) preconditioner with GMRES' - + call KSPSetType(ksp_saved, KSPGMRES, ierr) + if (petsc_is_root()) then + write(*,'(A,I0,A)') ' [Solver] Using GAMG (Algebraic Multigrid) with GMRES across ', & + comm_size_saved, ' MPI ranks' + end if + case ('ILU') - ! Incomplete LU - Good general purpose, robust call PCSetType(pc, PCILU, ierr) - call KSPSetType(ksp_saved, KSPBCGS, ierr) ! BiCGSTAB works well with ILU - write(*,'(A)') ' [Solver] Using ILU preconditioner with BiCGSTAB' - + call KSPSetType(ksp_saved, KSPBCGS, ierr) + if (petsc_is_root()) then + write(*,'(A,I0,A)') ' [Solver] Using ILU preconditioner with BiCGSTAB across ', & + comm_size_saved, ' MPI ranks' + end if + case ('LU') - ! Direct LU - Most robust, more memory intensive call PCSetType(pc, PCLU, ierr) - call KSPSetType(ksp_saved, KSPPREONLY, ierr) ! Direct solve - write(*,'(A)') ' [Solver] Using direct LU solver' - + call KSPSetType(ksp_saved, KSPPREONLY, ierr) + if (petsc_is_root()) then + write(*,'(A,I0,A)') ' [Solver] Using direct LU solver across ', comm_size_saved, ' MPI ranks' + end if + case default - write(*,'(A,A)') ' [Warning] Unknown preconditioner: ', trim(PRECONDITIONER) - write(*,'(A)') ' Defaulting to ILU' + if (petsc_is_root()) then + write(*,'(A,A)') ' [Warning] Unknown preconditioner: ', trim(PRECONDITIONER) + write(*,'(A)') ' Defaulting to ILU' + end if call PCSetType(pc, PCILU, ierr) call KSPSetType(ksp_saved, KSPBCGS, ierr) end select - - call KSPSetTolerances(ksp_saved, rtol, PETSC_DEFAULT_REAL, & - PETSC_DEFAULT_REAL, maxit, ierr) + + call KSPSetTolerances(ksp_saved, rtol, PETSC_DEFAULT_REAL, PETSC_DEFAULT_REAL, maxit, ierr) call KSPSetNormType(ksp_saved, KSP_NORM_UNPRECONDITIONED, ierr) call KSPSetFromOptions(ksp_saved, ierr) - + initialized = .true. n_saved = n + else + if (any(aval_saved /= aval)) then + aval_saved = aval + call update_distributed_matrix(n, ia, ja, aval) + end if end if - ! Update matrix values (always needed each timestep) - call MatZeroEntries(A_saved, ierr) - do i = 1, n - row_nz = ia(i+1) - ia(i) - if (row_nz > 0) then - start_k = ia(i) - allocate(cols0(row_nz), vals(row_nz)) - ! Convert column indices from 1-based to 0-based for PETSc - cols0 = ja(start_k:start_k+row_nz-1) - 1 - vals = aval(start_k:start_k+row_nz-1) - - ! Set row i-1 (0-based) with column indices cols0 (0-based) - call MatSetValues(A_saved, 1, (/i-1/), row_nz, cols0, vals, INSERT_VALUES, ierr) - deallocate(cols0, vals) - end if - end do - call MatAssemblyBegin(A_saved, MAT_FINAL_ASSEMBLY, ierr) - call MatAssemblyEnd(A_saved, MAT_FINAL_ASSEMBLY, ierr) - - ! Optional: Verify matrix assembly (uncomment for debugging) - ! call MatView(A_saved, PETSC_VIEWER_STDOUT_SELF, ierr) - - ! Update RHS vector in batches to avoid memory issues with huge systems - block - integer, parameter :: VEC_CHUNK = 1000000 - integer :: vec_start, vec_end, vec_len, k - integer, allocatable :: idx_vec(:) - - do vec_start = 1, n, VEC_CHUNK - vec_end = min(vec_start + VEC_CHUNK - 1, n) - vec_len = vec_end - vec_start + 1 - - allocate(idx_vec(vec_len)) - idx_vec = [(vec_start + k - 2, k=1,vec_len)] ! 0-based indices - - call VecSetValues(bb_saved, vec_len, idx_vec, b(vec_start:vec_end), INSERT_VALUES, ierr) - deallocate(idx_vec) - end do - end block - call VecAssemblyBegin(bb_saved,ierr); call VecAssemblyEnd(bb_saved,ierr) - - ! Update initial guess in batches - block - integer, parameter :: VEC_CHUNK = 1000000 - integer :: vec_start, vec_end, vec_len, k - integer, allocatable :: idx_vec(:) - - do vec_start = 1, n, VEC_CHUNK - vec_end = min(vec_start + VEC_CHUNK - 1, n) - vec_len = vec_end - vec_start + 1 - - allocate(idx_vec(vec_len)) - idx_vec = [(vec_start + k - 2, k=1,vec_len)] ! 0-based indices - - call VecSetValues(xx_saved, vec_len, idx_vec, x(vec_start:vec_end), INSERT_VALUES, ierr) - deallocate(idx_vec) - end do - end block - call VecAssemblyBegin(xx_saved,ierr); call VecAssemblyEnd(xx_saved,ierr) + nlocal = max(0, row_end_saved - row_start_saved + 1) + if (nlocal > 0) then + b_local_saved(1:nlocal) = b(row_start_saved:row_end_saved) + x_local_saved(1:nlocal) = x(row_start_saved:row_end_saved) + end if + + call VecGetArrayF90(bb_saved, vec_array, ierr) + if (nlocal > 0) vec_array(1:nlocal) = b_local_saved(1:nlocal) + call VecRestoreArrayF90(bb_saved, vec_array, ierr) + + call VecGetArrayF90(xx_saved, vec_array, ierr) + if (nlocal > 0) vec_array(1:nlocal) = x_local_saved(1:nlocal) + call VecRestoreArrayF90(xx_saved, vec_array, ierr) - ! Solve the system call KSPSolve(ksp_saved, bb_saved, xx_saved, ierr) - if (ierr /= 0) then - write(0,*) "ERROR: KSPSolve failed with error code:", ierr - stop + write(0,*) 'ERROR: KSPSolve failed with error code:', ierr + stop end if - + call KSPGetIterationNumber(ksp_saved, its, ierr) call KSPGetResidualNorm(ksp_saved, rnorm, ierr) - ! Extract solution vector using batched VecGetValues - block - integer, parameter :: CHUNK_SIZE = 100000 - PetscInt, allocatable :: idx_batch(:) - PetscScalar, allocatable :: val_batch(:) - integer :: i_start, i_end, chunk_len, j - - do i_start = 1, n, CHUNK_SIZE - i_end = min(i_start + CHUNK_SIZE - 1, n) - chunk_len = i_end - i_start + 1 - - allocate(idx_batch(chunk_len), val_batch(chunk_len)) - - do j = 1, chunk_len - idx_batch(j) = i_start + j - 2 - end do - - call VecGetValues(xx_saved, chunk_len, idx_batch, val_batch, ierr) - x(i_start:i_end) = val_batch(1:chunk_len) - - deallocate(idx_batch, val_batch) - end do - end block + call VecGetArrayF90(xx_saved, vec_array, ierr) + if (nlocal > 0) x_local_saved(1:nlocal) = vec_array(1:nlocal) + call VecRestoreArrayF90(xx_saved, vec_array, ierr) + call MPI_Allgatherv(x_local_saved, nlocal, MPI_DOUBLE_PRECISION, x, recvcounts_saved, displs_saved, & + MPI_DOUBLE_PRECISION, PETSC_COMM_WORLD, ierr) end subroutine solve_petsc_csr - end module petsc_solver \ No newline at end of file diff --git a/src/heatflow/mod_setup.f90 b/src/heatflow/mod_setup.f90 index 2b3ebe6..57cd2f7 100644 --- a/src/heatflow/mod_setup.f90 +++ b/src/heatflow/mod_setup.f90 @@ -16,6 +16,7 @@ !!!################################################################################################# module setup use constants, only: real12, int12, TINY + use mpi use inputs, only: nx, ny, nz, NA, grid, time_step, kappaBoundx1, kappaBoundy1, kappaBoundz1 use inputs, only: Check_Sparse_Full, Check_Stability, ntime,IVERB, Periodicx, Periodicy use inputs, only: Periodicz ! @@ -36,9 +37,14 @@ module setup !!!################################################################################################# subroutine set_global_variables() integer(int12) :: ix,iy,iz,index + integer :: ierr, comm_rank + logical :: is_root real(real12) :: kappa,kappa3D,h_conv,heat_capacity,rho,sound_speed,tau, em real(real12), dimension(3) :: vel + call MPI_Comm_rank(MPI_COMM_WORLD, comm_rank, ierr) + is_root = (comm_rank == 0) + allocate(Temp_cur(nx, ny, nz)) allocate(Temp_p(NA)) allocate(Temp_pp(NA)) @@ -52,12 +58,14 @@ subroutine set_global_variables() ! can be expanded to include more properties at a ! later date !--------------------------------------------------- - write(*,*) "Setting up material properties" - write(*,'(A,I10,A)') " Processing ", NA, " grid cells..." + if (is_root) then + write(*,*) "Setting up material properties" + write(*,'(A,I10,A)') " Processing ", NA, " grid cells..." + end if index = 0 do iz = 1, nz ! Progress reporting every 10% for large grids - if (mod(iz-1, max(1,nz/10)) == 0 .and. iz > 1) then + if (is_root .and. mod(iz-1, max(1,nz/10)) == 0 .and. iz > 1) then write(*,'(A,I3,A)') " Progress: ", int(100.0*real(iz)/real(nz)), "%" end if do iy = 1, ny @@ -81,7 +89,7 @@ subroutine set_global_variables() !--------------------------------------------------- ! Check if the sparse matrix matches the full matrix !--------------------------------------------------- - write(*,*) "Building sparse H matrix..." + if (is_root) write(*,*) "Building sparse H matrix..." if (Check_Sparse_Full) then print*, "CHECK SPARSE FULL" CALL build_Hmatrix() @@ -89,7 +97,7 @@ subroutine set_global_variables() ! Build CSR format directly (acsr, ja, ia are allocated inside sparse_Hmatrix) CALL sparse_Hmatrix() ! No need for COO->CSR conversion anymore, it's already in CSR format! - write(*,*) "Sparse matrix setup complete." + if (is_root) write(*,*) "Sparse matrix setup complete." end if !^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -103,6 +111,8 @@ end subroutine set_global_variables !!!################################################################################################# subroutine sparse_Hmatrix() implicit none + integer :: ierr, comm_rank + logical :: is_root real(real12) :: H0 ! Holds the value of the H matrix integer(int12) :: i, j, count, k, row ! i and j are the row and column of the H matrix integer(int12) :: nnz_estimate @@ -112,14 +122,12 @@ subroutine sparse_Hmatrix() real(real12), allocatable, dimension(:) :: row_vals integer(int12), allocatable, dimension(:) :: row_cols integer(int12) :: row_count, max_row_size + + call MPI_Comm_rank(MPI_COMM_WORLD, comm_rank, ierr) + is_root = (comm_rank == 0) ra%n = NA ! The number of rows in the H matrix - ra%len = len ! The number of non-zero elements in the H matrix - ! Allocate the arrays to hold the H matrix in sparse storage - allocate(ra%val(len), ra%irow(len), ra%jcol(len)) - ra%val(:)=0 - ra%irow(:)=-2 - ra%jcol(:)=-1 + ra%len = 0 addit = [1] ! The values to add to the row to get the column if (Periodicx) addit = [addit, (nx-1)] if (ny .gt. 1) addit = [addit, nx] ! Add the values to add to the row to get the column @@ -127,6 +135,18 @@ subroutine sparse_Hmatrix() if (nz .gt. 1) addit = [addit, nx*ny] ! Add the values to add to the row to get the column if ((Periodicz).and.(nz .gt. 1)) addit = [addit, (nz-1)*ny*nx] + max_row_size = 1 + 2*size(addit,1) + nnz_estimate = NA * max_row_size + + if (allocated(acsr)) deallocate(acsr) + if (allocated(ja)) deallocate(ja) + if (allocated(ia)) deallocate(ia) + allocate(acsr(nnz_estimate), ja(nnz_estimate), ia(NA+1)) + allocate(row_vals(max_row_size), row_cols(max_row_size)) + acsr = 0.0_real12 + ja = 0 + ia = 0 + !write(6,*) NA, nx,ny,nz !write(6,*) addit,size(addit,1) @@ -138,10 +158,10 @@ subroutine sparse_Hmatrix() ia(1) = 1 ! CSR row pointer (1-based for Fortran) ! Build CSR format row-by-row - write(*,'(A)') " Building CSR matrix row-by-row..." + if (is_root) write(*,'(A)') " Building CSR matrix row-by-row..." parent_loop: do row = 1, NA ! Progress reporting every 10% - if (mod(row-1, max(1,NA/10)) == 0 .and. row > 1) then + if (is_root .and. mod(row-1, max(1,NA/10)) == 0 .and. row > 1) then write(*,'(A,I3,A,I12,A)') " Progress: ", int(100.0*real(row)/real(NA)), & "%, nnz=", count, "" end if @@ -200,12 +220,12 @@ subroutine sparse_Hmatrix() ! Trim arrays to actual size if we over-estimated if (count < size(acsr)) then - write(*,'(A,I12,A,I12)') " Trimming arrays from ", size(acsr), " to ", count + if (is_root) write(*,'(A,I12,A,I12)') " Trimming arrays from ", size(acsr), " to ", count call trim_csr_arrays(acsr, ja, count) end if - deallocate(row_vals, row_cols) - write(*,'(A,I12,A)') " CSR matrix built successfully. Actual nonzeros: ", count, "" + deallocate(addit, row_vals, row_cols) + if (is_root) write(*,'(A,I12,A)') " CSR matrix built successfully. Actual nonzeros: ", count, "" end subroutine sparse_Hmatrix !!!################################################################################################# From 5fe5f036a1985576bb0ba25d7afad8ed0f55bd88 Mon Sep 17 00:00:00 2001 From: Ned Taylor <71959356+nedtaylor@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:57:46 +0100 Subject: [PATCH 28/28] Improve fpm compilation --- fpm.toml | 16 +++++++++++++-- src/heatflow/mod_petsc_solver.f90 | 33 ++++++++++++++++++++----------- 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/fpm.toml b/fpm.toml index a1f0b5d..fcc5f3f 100644 --- a/fpm.toml +++ b/fpm.toml @@ -9,12 +9,24 @@ description = "A Fortran executable for modelling Cattaneo heat flow" [preprocess.cpp] suffixes = ["F90", "f90"] +[dependencies] +mpi = "*" + +[features] +petsc.build.external-modules = ["petscksp"] +petsc.build.link = ["petsc"] +petsc.flags = "-I../petsc/include -I../petsc/arch-darwin-c-debug/include" +petsc.link-time-flags = "-L../petsc/arch-darwin-c-debug/lib -Wl,-rpath,../petsc/arch-darwin-c-debug/lib" + +[profiles] +default = ["petsc"] + [library] source-dir="src" [fortran] -implicit-typing = false -implicit-external = false +implicit-typing = true +implicit-external = true source-form = "free" [[executable]] diff --git a/src/heatflow/mod_petsc_solver.f90 b/src/heatflow/mod_petsc_solver.f90 index be8871c..65bbf45 100644 --- a/src/heatflow/mod_petsc_solver.f90 +++ b/src/heatflow/mod_petsc_solver.f90 @@ -27,6 +27,7 @@ module petsc_solver integer, save :: row_start_saved = 1 integer, save :: row_end_saved = 0 PetscInt, allocatable, save :: ia_saved(:), ja_saved(:) + PetscInt, allocatable, save :: local_indices_saved(:) PetscScalar, allocatable, save :: aval_saved(:) PetscInt, allocatable, save :: diag_nnz_saved(:), offdiag_nnz_saved(:) integer, allocatable, save :: recvcounts_saved(:), displs_saved(:) @@ -64,6 +65,7 @@ subroutine petsc_cleanup() if (ksp_saved /= PETSC_NULL_KSP) call KSPDestroy(ksp_saved, ierr) if (allocated(ia_saved)) deallocate(ia_saved) if (allocated(ja_saved)) deallocate(ja_saved) + if (allocated(local_indices_saved)) deallocate(local_indices_saved) if (allocated(aval_saved)) deallocate(aval_saved) if (allocated(diag_nnz_saved)) deallocate(diag_nnz_saved) if (allocated(offdiag_nnz_saved)) deallocate(offdiag_nnz_saved) @@ -201,8 +203,7 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) integer :: ierr, its real(8) :: rnorm logical :: rebuild_needed - PetscScalar, pointer :: vec_array(:) - integer :: nlocal, rank_idx + integer :: nlocal, rank_idx, local_row if (size(ia) /= n + 1) stop 'solve_petsc_csr: ia size mismatch' if (size(b) /= n .or. size(x) /= n) stop 'solve_petsc_csr: vector size mismatch' @@ -230,6 +231,13 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) ja_saved = ja - 1 aval_saved = aval + allocate(local_indices_saved(max(1, nlocal))) + if (nlocal > 0) then + do local_row = 1, nlocal + local_indices_saved(local_row) = row_start_saved + local_row - 2 + end do + end if + call build_distributed_matrix(n, ia, ja, aval) allocate(recvcounts_saved(comm_size_saved), displs_saved(comm_size_saved)) @@ -301,13 +309,16 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) x_local_saved(1:nlocal) = x(row_start_saved:row_end_saved) end if - call VecGetArrayF90(bb_saved, vec_array, ierr) - if (nlocal > 0) vec_array(1:nlocal) = b_local_saved(1:nlocal) - call VecRestoreArrayF90(bb_saved, vec_array, ierr) - - call VecGetArrayF90(xx_saved, vec_array, ierr) - if (nlocal > 0) vec_array(1:nlocal) = x_local_saved(1:nlocal) - call VecRestoreArrayF90(xx_saved, vec_array, ierr) + call VecSet(bb_saved, 0.0d0, ierr) + call VecSet(xx_saved, 0.0d0, ierr) + if (nlocal > 0) then + call VecSetValues(bb_saved, nlocal, local_indices_saved, b_local_saved, INSERT_VALUES, ierr) + call VecSetValues(xx_saved, nlocal, local_indices_saved, x_local_saved, INSERT_VALUES, ierr) + end if + call VecAssemblyBegin(bb_saved, ierr) + call VecAssemblyEnd(bb_saved, ierr) + call VecAssemblyBegin(xx_saved, ierr) + call VecAssemblyEnd(xx_saved, ierr) call KSPSolve(ksp_saved, bb_saved, xx_saved, ierr) if (ierr /= 0) then @@ -318,9 +329,7 @@ subroutine solve_petsc_csr(n, ia, ja, aval, b, x, rtol, maxit) call KSPGetIterationNumber(ksp_saved, its, ierr) call KSPGetResidualNorm(ksp_saved, rnorm, ierr) - call VecGetArrayF90(xx_saved, vec_array, ierr) - if (nlocal > 0) x_local_saved(1:nlocal) = vec_array(1:nlocal) - call VecRestoreArrayF90(xx_saved, vec_array, ierr) + if (nlocal > 0) call VecGetValues(xx_saved, nlocal, local_indices_saved, x_local_saved, ierr) call MPI_Allgatherv(x_local_saved, nlocal, MPI_DOUBLE_PRECISION, x, recvcounts_saved, displs_saved, & MPI_DOUBLE_PRECISION, PETSC_COMM_WORLD, ierr)