| 1 | ! DOUG - Domain decomposition On Unstructured Grids |
|---|
| 2 | ! Copyright (C) 1998-2006 Faculty of Computer Science, University of Tartu and |
|---|
| 3 | ! Department of Mathematics, University of Bath |
|---|
| 4 | ! |
|---|
| 5 | ! This library is free software; you can redistribute it and/or |
|---|
| 6 | ! modify it under the terms of the GNU Lesser General Public |
|---|
| 7 | ! License as published by the Free Software Foundation; either |
|---|
| 8 | ! version 2.1 of the License, or (at your option) any later version. |
|---|
| 9 | ! |
|---|
| 10 | ! This library is distributed in the hope that it will be useful, |
|---|
| 11 | ! but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 12 | ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|---|
| 13 | ! Lesser General Public License for more details. |
|---|
| 14 | ! |
|---|
| 15 | ! You should have received a copy of the GNU Lesser General Public |
|---|
| 16 | ! License along with this library; if not, write to the Free Software |
|---|
| 17 | ! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|---|
| 18 | ! or contact the authors (University of Tartu, Faculty of Computer Science, Chair |
|---|
| 19 | ! of Distributed Systems, Liivi 2, 50409 Tartu, Estonia, http://dougdevel.org, |
|---|
| 20 | ! mailto:info(at)dougdevel.org) |
|---|
| 21 | |
|---|
| 22 | !!---------------------------------- |
|---|
| 23 | !! Preconditioned Conjugate Gradient |
|---|
| 24 | !!---------------------------------- |
|---|
| 25 | module pcg_mod |
|---|
| 26 | |
|---|
| 27 | use ConvInf_mod |
|---|
| 28 | use SpMtx_mods |
|---|
| 29 | use Mesh_class |
|---|
| 30 | use globals |
|---|
| 31 | |
|---|
| 32 | implicit none |
|---|
| 33 | |
|---|
| 34 | #include<doug_config.h> |
|---|
| 35 | |
|---|
| 36 | #ifdef D_COMPLEX |
|---|
| 37 | #define float complex |
|---|
| 38 | #else |
|---|
| 39 | #define float real |
|---|
| 40 | #endif |
|---|
| 41 | |
|---|
| 42 | integer,save :: coarseid=0 |
|---|
| 43 | private :: & |
|---|
| 44 | preconditioner, & |
|---|
| 45 | msolve |
|---|
| 46 | |
|---|
| 47 | ! profiling |
|---|
| 48 | real(kind=rk), private, save :: time_preconditioner = 0 |
|---|
| 49 | |
|---|
| 50 | contains |
|---|
| 51 | |
|---|
| 52 | !-------------------------------------------- |
|---|
| 53 | ! Preconditioned conjugate gradient method |
|---|
| 54 | !-------------------------------------------- |
|---|
| 55 | subroutine pcg (A, b, x, Msh, tol_, maxit_, & |
|---|
| 56 | x0_, solinf, resvects_, CoarseMtx_) |
|---|
| 57 | implicit none |
|---|
| 58 | |
|---|
| 59 | type(SpMtx),intent(in out) :: A ! System matrix (sparse) |
|---|
| 60 | float(kind=rk),dimension(:),pointer :: b ! RHS |
|---|
| 61 | float(kind=rk),dimension(:),pointer :: x ! Solution |
|---|
| 62 | ! Mesh - aux data for Ax operation |
|---|
| 63 | type(Mesh), intent(in) :: Msh |
|---|
| 64 | |
|---|
| 65 | ! Optional arguments |
|---|
| 66 | ! Tolerance of the method |
|---|
| 67 | real(kind=rk), intent(in), optional :: tol_ |
|---|
| 68 | ! Max number of iterations |
|---|
| 69 | integer, intent(in), optional :: maxit_ |
|---|
| 70 | ! Initial guess |
|---|
| 71 | float(kind=rk), dimension(:), intent(in), optional :: x0_ |
|---|
| 72 | ! Solution statistics |
|---|
| 73 | type(ConvInf), intent(in out), optional :: solinf |
|---|
| 74 | ! Fill in the 'resvect' or not |
|---|
| 75 | logical, intent(in), optional :: resvects_ |
|---|
| 76 | type(SpMtx),optional :: CoarseMtx_ ! Coarse matrix |
|---|
| 77 | |
|---|
| 78 | real(kind=rk) :: tol ! Tolerance |
|---|
| 79 | integer :: maxit ! Max number of iterations |
|---|
| 80 | logical :: resvects=.false. |
|---|
| 81 | |
|---|
| 82 | !real(kind=rk), dimension(size(b)) :: r, p, q, m, z, b_k |
|---|
| 83 | real(kind=rk),dimension(:),pointer :: r, p, q, m, z, b_k |
|---|
| 84 | real(kind=rk) :: rho_curr, rho_prev, init_norm, res_norm |
|---|
| 85 | real(kind=rk) :: alpha, beta, tmp, ratio_norm |
|---|
| 86 | integer :: iter,nfreds |
|---|
| 87 | integer :: ierr |
|---|
| 88 | ! + test |
|---|
| 89 | float(kind=rk), dimension(:), pointer :: arr_copy, gv_tmp |
|---|
| 90 | |
|---|
| 91 | !!$ real(kind=rk) :: r_max |
|---|
| 92 | |
|---|
| 93 | write(stream,'(/a)') 'Preconditioned conjugate gradient:' |
|---|
| 94 | |
|---|
| 95 | if (size(b) /= size(x)) & |
|---|
| 96 | call DOUG_abort('[pcg] : SEVERE : size(b) /= size(x)',-1) |
|---|
| 97 | |
|---|
| 98 | !!$ if (present(x0_)) |
|---|
| 99 | !!$ ! use method with initial guess |
|---|
| 100 | |
|---|
| 101 | |
|---|
| 102 | nfreds=size(b) |
|---|
| 103 | allocate(r(nfreds)) |
|---|
| 104 | allocate(p(nfreds)) |
|---|
| 105 | allocate(q(nfreds)) |
|---|
| 106 | allocate(m(nfreds)) |
|---|
| 107 | allocate(z(nfreds)) |
|---|
| 108 | allocate(b_k(nfreds)) |
|---|
| 109 | |
|---|
| 110 | |
|---|
| 111 | tol = sctls%solve_tolerance |
|---|
| 112 | if (present(tol_)) & |
|---|
| 113 | tol = tol_ |
|---|
| 114 | |
|---|
| 115 | maxit = sctls%solve_maxiters |
|---|
| 116 | if (maxit==-1) maxit=100000 |
|---|
| 117 | if (present(maxit_)) & |
|---|
| 118 | maxit = maxit_ |
|---|
| 119 | |
|---|
| 120 | if (present(resvects_).and.(.not.present(solinf))) & |
|---|
| 121 | call DOUG_abort('[pcg] : SEVERE : "resvects_" must be given along'//& |
|---|
| 122 | ' with "solinf".',-1) |
|---|
| 123 | |
|---|
| 124 | ! Allocate convergence info structure |
|---|
| 125 | if (present(solinf).and.(.not.present(resvects_))) then |
|---|
| 126 | call ConvInf_Init(solinf) |
|---|
| 127 | else if (present(solinf)) then |
|---|
| 128 | if (present(resvects_).and.(resvects_.eqv.(.true.))) then |
|---|
| 129 | call ConvInf_Init(solinf, maxit) |
|---|
| 130 | resvects = .true. |
|---|
| 131 | end if |
|---|
| 132 | end if |
|---|
| 133 | |
|---|
| 134 | ! Initialise auxiliary data structures |
|---|
| 135 | ! to assist with pmvm |
|---|
| 136 | call pmvmCommStructs_init(A, Msh) |
|---|
| 137 | |
|---|
| 138 | ! + test |
|---|
| 139 | if (ismaster()) & |
|---|
| 140 | allocate(gv_tmp(Msh%ngf)) |
|---|
| 141 | allocate(arr_copy(Msh%nlf)) |
|---|
| 142 | |
|---|
| 143 | ! Build preconditioner |
|---|
| 144 | ! call preconditioner(A, m, CoarseMtx_) |
|---|
| 145 | !!$ ! + test |
|---|
| 146 | !!$ arr_copy = m |
|---|
| 147 | !!$ call Vect_newToOldPerm(arr_copy) |
|---|
| 148 | !!$ gv_tmp = 0.0_rk |
|---|
| 149 | !!$ call Vect_Gather(arr_copy, gv_tmp, Msh) |
|---|
| 150 | !!$ if (ismaster()) & |
|---|
| 151 | !!$ call Vect_Print(gv_tmp, 'm ::') |
|---|
| 152 | |
|---|
| 153 | !!$ ! + test |
|---|
| 154 | !!$ !b = 1.0_rk |
|---|
| 155 | !!$ arr_copy = b |
|---|
| 156 | !!$ call Vect_newToOldPerm(arr_copy) |
|---|
| 157 | !!$ call Vect_Print(arr_copy, 'b local (original ordering) ::') |
|---|
| 158 | !!$ gv_tmp = 0.0_rk |
|---|
| 159 | !!$ call Vect_Gather(arr_copy, gv_tmp, Msh) |
|---|
| 160 | !!$ if (ismaster()) & |
|---|
| 161 | !!$ call Vect_Print(gv_tmp, 'b global (original ordering) ::') |
|---|
| 162 | |
|---|
| 163 | !!$ ! + test |
|---|
| 164 | !!$ arr_copy = x |
|---|
| 165 | !!$ call Vect_newToOldPerm(arr_copy) |
|---|
| 166 | !!$ call Vect_Print(arr_copy, 'x local (original ordering) ::') |
|---|
| 167 | !!$ gv_tmp = 0.0_rk |
|---|
| 168 | !!$ call Vect_Gather(arr_copy, gv_tmp, Msh) |
|---|
| 169 | !!$ if (ismaster()) & |
|---|
| 170 | !!$ call Vect_Print(gv_tmp, 'x global (original ordering) ::') |
|---|
| 171 | |
|---|
| 172 | !r = b - SpMtx_pmvm(A, x, Msh) |
|---|
| 173 | call SpMtx_pmvm(r,A,x,Msh) |
|---|
| 174 | r = b - r |
|---|
| 175 | !call Vect_Print(r,'initial residual') |
|---|
| 176 | |
|---|
| 177 | ! + test |
|---|
| 178 | !!$ arr_copy = r |
|---|
| 179 | !!$ call Vect_newToOldPerm(arr_copy) |
|---|
| 180 | !!$ gv_tmp = 0.0_rk |
|---|
| 181 | !!$ call Vect_Gather(arr_copy, gv_tmp, Msh) |
|---|
| 182 | !!$ if (ismaster()) & |
|---|
| 183 | !!$ call Vect_Print(gv_tmp, 'r ::') |
|---|
| 184 | |
|---|
| 185 | init_norm = Vect_dot_product(r,r) |
|---|
| 186 | if (init_norm == 0.0) & |
|---|
| 187 | init_norm = 1.0 |
|---|
| 188 | |
|---|
| 189 | write(stream,'(a,i6,a,e8.2)') 'maxit = ',maxit,', tol = ',tol |
|---|
| 190 | write(stream,'(a,e22.15)') 'init_norm = ', dsqrt(init_norm) |
|---|
| 191 | |
|---|
| 192 | iter = 1 |
|---|
| 193 | ratio_norm = 1.0_rk |
|---|
| 194 | do while((ratio_norm > tol*tol).and.(iter <= maxit)) |
|---|
| 195 | call msolve(m,r,z) |
|---|
| 196 | !if (present(CoarseMtx_)) then |
|---|
| 197 | ! call preconditioner(z,A,r,CoarseMtx_) |
|---|
| 198 | ! !z=r |
|---|
| 199 | !else |
|---|
| 200 | ! call preconditioner(z,A,r) |
|---|
| 201 | !endif |
|---|
| 202 | |
|---|
| 203 | !call Vect_Print(z,'preconditioner') |
|---|
| 204 | |
|---|
| 205 | ! compute current rho |
|---|
| 206 | rho_curr = Vect_dot_product(r,z) |
|---|
| 207 | if (iter == 1) then |
|---|
| 208 | p = z |
|---|
| 209 | else |
|---|
| 210 | beta = rho_curr / rho_prev |
|---|
| 211 | p = z + beta * p |
|---|
| 212 | end if |
|---|
| 213 | !q = SpMtx_pmvm(A, p, Msh) |
|---|
| 214 | call SpMtx_pmvm(q,A,p,Msh) |
|---|
| 215 | ! compute alpha |
|---|
| 216 | alpha = rho_curr / Vect_dot_product(p,q) |
|---|
| 217 | x = x + alpha * p |
|---|
| 218 | r = r - alpha * q |
|---|
| 219 | rho_prev = rho_curr |
|---|
| 220 | ! check |
|---|
| 221 | res_norm = Vect_dot_product(r,r) |
|---|
| 222 | ratio_norm = dsqrt(res_norm) / dsqrt(init_norm) |
|---|
| 223 | !!$ ratio_norm = res_norm / init_norm |
|---|
| 224 | if (ismaster()) & |
|---|
| 225 | write(stream, '(i5,a,e22.15)') iter,': res_norm=',dsqrt(res_norm) |
|---|
| 226 | !write(stream, '(i5,a,e22.15,e22.15)') iter,': dsqrt(ratio_norm)=',& |
|---|
| 227 | !dsqrt(ratio_norm), dsqrt(res_norm) |
|---|
| 228 | iter = iter + 1 |
|---|
| 229 | end do |
|---|
| 230 | |
|---|
| 231 | ! Deallocate auxiliary data structures |
|---|
| 232 | ! helped to assist with pmvm |
|---|
| 233 | call pmvmCommStructs_destroy() |
|---|
| 234 | |
|---|
| 235 | deallocate(b_k) |
|---|
| 236 | deallocate(z) |
|---|
| 237 | deallocate(m) |
|---|
| 238 | deallocate(q) |
|---|
| 239 | deallocate(p) |
|---|
| 240 | deallocate(r) |
|---|
| 241 | if (associated(gv_tmp)) deallocate(gv_tmp) |
|---|
| 242 | if (associated(arr_copy)) deallocate(arr_copy) |
|---|
| 243 | end subroutine pcg |
|---|
| 244 | |
|---|
| 245 | subroutine preconditioner_1level(sol,A,rhs,M,res,A_interf_,refactor_) |
|---|
| 246 | use subsolvers |
|---|
| 247 | implicit none |
|---|
| 248 | real(kind=rk),dimension(:),pointer :: sol !< solution |
|---|
| 249 | type(SpMtx) :: A !< sparse system matrix |
|---|
| 250 | real(kind=rk),dimension(:),pointer :: rhs !< right hand side |
|---|
| 251 | type(Mesh),intent(in) :: M !< Mesh |
|---|
| 252 | real(kind=rk),dimension(:),pointer :: res !< residual vector, allocated |
|---|
| 253 | !! here for multiplicative Schwarz |
|---|
| 254 | type(SpMtx),optional :: A_interf_ !< matr@interf. |
|---|
| 255 | logical,intent(inout),optional :: refactor_ |
|---|
| 256 | |
|---|
| 257 | type(SpMtx) :: A_tmp |
|---|
| 258 | |
|---|
| 259 | if (refactor_.and.present(A_interf_)) then!{ |
|---|
| 260 | if (sctls%verbose>9) then |
|---|
| 261 | !call SpMtx_printMat(A) |
|---|
| 262 | call SpMtx_printRaw(A) |
|---|
| 263 | !call SpMtx_printMat(A_interf_) |
|---|
| 264 | call SpMtx_printRaw(A_interf_) |
|---|
| 265 | !stop |
|---|
| 266 | endif |
|---|
| 267 | if (sctls%input_type==DCTL_INPUT_TYPE_ASSEMBLED) then |
|---|
| 268 | ! we need the fully sorted entries but A has block struct... |
|---|
| 269 | ! keep the original values... |
|---|
| 270 | if (associated(A%M_bound)) then |
|---|
| 271 | A_tmp=SpMtx_newInit(nnz=A%nnz,nblocks=A%nblocks,& |
|---|
| 272 | nrows=A%nrows,ncols=A%ncols,& |
|---|
| 273 | indi=A%indi,indj=A%indj,val=A%val,& |
|---|
| 274 | arrange_type=A%arrange_type,M_bound=A%M_bound) |
|---|
| 275 | else |
|---|
| 276 | A_tmp=SpMtx_newInit(nnz=A%nnz,nblocks=A%nblocks,& |
|---|
| 277 | nrows=A%nrows,ncols=A%ncols,& |
|---|
| 278 | indi=A%indi,indj=A%indj,val=A%val,& |
|---|
| 279 | arrange_type=A%arrange_type) |
|---|
| 280 | endif |
|---|
| 281 | A%arrange_type=D_SpMtx_ARRNG_NO |
|---|
| 282 | !A%nrows=max(A%nrows,A_interf_%nrows) |
|---|
| 283 | !A%ncols=max(A%nrows,A_interf_%ncols) |
|---|
| 284 | !if (associated(A%M_bound)) then |
|---|
| 285 | ! deallocate(A%M_bound) |
|---|
| 286 | !endif |
|---|
| 287 | call SpMtx_arrange(A,D_SpMtx_ARRNG_ROWS,sort=.true.) |
|---|
| 288 | call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, & |
|---|
| 289 | A_interf_=A_interf_, & |
|---|
| 290 | refactor=refactor_) !fine solves |
|---|
| 291 | if (sctls%method>1) then ! For multiplicative Schwarz method...: |
|---|
| 292 | refactor_=.false. |
|---|
| 293 | call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, & |
|---|
| 294 | A_interf_=A_interf_, & |
|---|
| 295 | refactor=refactor_) !fine solves |
|---|
| 296 | endif |
|---|
| 297 | ! put the original structure and orders back: |
|---|
| 298 | A%indi=A_tmp%indi |
|---|
| 299 | A%indj=A_tmp%indj |
|---|
| 300 | A%val=A_tmp%val |
|---|
| 301 | if (associated(A_tmp%M_bound)) A%M_bound=A_tmp%M_bound |
|---|
| 302 | A%nrows=A_tmp%nrows |
|---|
| 303 | A%ncols=A_tmp%ncols |
|---|
| 304 | A%arrange_type=A_tmp%arrange_type |
|---|
| 305 | call SpMtx_Destroy(A_tmp) |
|---|
| 306 | else |
|---|
| 307 | call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, & |
|---|
| 308 | A_interf_=A_interf_, & |
|---|
| 309 | refactor=refactor_) !fine solves |
|---|
| 310 | if (sctls%method>1) then ! For multiplicative Schwarz method...: |
|---|
| 311 | refactor_=.false. |
|---|
| 312 | call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, & |
|---|
| 313 | A_interf_=A_interf_, & |
|---|
| 314 | refactor=refactor_) !fine solves |
|---|
| 315 | endif |
|---|
| 316 | endif |
|---|
| 317 | elseif (refactor_.and.sctls%input_type==DCTL_INPUT_TYPE_ASSEMBLED) then!}{ |
|---|
| 318 | if (associated(A%M_bound)) then |
|---|
| 319 | A_tmp=SpMtx_newInit(nnz=size(A%indi),nblocks=A%nblocks,& |
|---|
| 320 | nrows=A%nrows,ncols=A%ncols,& |
|---|
| 321 | indi=A%indi,indj=A%indj,val=A%val,& |
|---|
| 322 | arrange_type=A%arrange_type,M_bound=A%M_bound) |
|---|
| 323 | else |
|---|
| 324 | A_tmp=SpMtx_newInit(nnz=size(A%indi),nblocks=A%nblocks,& |
|---|
| 325 | nrows=A%nrows,ncols=A%ncols,& |
|---|
| 326 | indi=A%indi,indj=A%indj,val=A%val,& |
|---|
| 327 | arrange_type=A%arrange_type) |
|---|
| 328 | endif |
|---|
| 329 | A_tmp%nnz=A%nnz |
|---|
| 330 | A%arrange_type=D_SpMtx_ARRNG_NO |
|---|
| 331 | call SpMtx_arrange(A,D_SpMtx_ARRNG_ROWS,sort=.true.) |
|---|
| 332 | call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, & |
|---|
| 333 | refactor=refactor_) !fine solves |
|---|
| 334 | if (sctls%method>1) then ! For multiplicative Schwarz method...: |
|---|
| 335 | refactor_=.false. |
|---|
| 336 | call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, & |
|---|
| 337 | refactor=refactor_) !fine solves |
|---|
| 338 | endif |
|---|
| 339 | ! put the original structure and orders back: |
|---|
| 340 | A%indi=A_tmp%indi |
|---|
| 341 | A%indj=A_tmp%indj |
|---|
| 342 | A%val=A_tmp%val |
|---|
| 343 | if (associated(A_tmp%M_bound)) A%M_bound=A_tmp%M_bound |
|---|
| 344 | A%nrows=A_tmp%nrows |
|---|
| 345 | A%ncols=A_tmp%ncols |
|---|
| 346 | A%arrange_type=A_tmp%arrange_type |
|---|
| 347 | call SpMtx_Destroy(A_tmp) |
|---|
| 348 | else!}{ |
|---|
| 349 | call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, & |
|---|
| 350 | A_interf_=A_interf_, & |
|---|
| 351 | refactor=refactor_) !fine solves |
|---|
| 352 | if (sctls%method>1) then ! For multiplicative Schwarz method...: |
|---|
| 353 | refactor_=.false. |
|---|
| 354 | call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, & |
|---|
| 355 | A_interf_=A_interf_, & |
|---|
| 356 | refactor=refactor_) !fine solves |
|---|
| 357 | endif |
|---|
| 358 | endif!} |
|---|
| 359 | end subroutine preconditioner_1level |
|---|
| 360 | |
|---|
| 361 | !------------------------------- |
|---|
| 362 | !> Make preconditioner |
|---|
| 363 | !------------------------------- |
|---|
| 364 | subroutine preconditioner(sol,A,rhs,M, & |
|---|
| 365 | A_interf_,CoarseMtx_,Restrict,refactor_,bugtrack_) |
|---|
| 366 | use subsolvers |
|---|
| 367 | use CoarseMtx_mod |
|---|
| 368 | use CoarseAllgathers |
|---|
| 369 | use Vect_mod |
|---|
| 370 | implicit none |
|---|
| 371 | real(kind=rk),dimension(:),pointer :: sol !< solution |
|---|
| 372 | type(SpMtx) :: A !< sparse system matrix |
|---|
| 373 | real(kind=rk),dimension(:),pointer :: rhs !< right hand side |
|---|
| 374 | type(Mesh),intent(in) :: M !< Mesh |
|---|
| 375 | real(kind=rk),dimension(:),pointer :: res !< residual vector, allocated |
|---|
| 376 | !! here for multiplicative Schwarz |
|---|
| 377 | type(SpMtx),optional :: A_interf_ !< matr@interf. |
|---|
| 378 | type(SpMtx),optional :: CoarseMtx_ !< Coarse matrix |
|---|
| 379 | type(SpMtx),optional :: Restrict !< Restriction matrix |
|---|
| 380 | logical,intent(inout),optional :: refactor_ |
|---|
| 381 | logical,optional :: bugtrack_ |
|---|
| 382 | ! ----- local: ------ |
|---|
| 383 | real(kind=rk),dimension(:),pointer,save :: csol,crhs,tmpsol,tmpsol2,clrhs |
|---|
| 384 | integer :: i,ol |
|---|
| 385 | logical :: add,bugtrack |
|---|
| 386 | real(kind=rk) :: t1 |
|---|
| 387 | |
|---|
| 388 | t1 = MPI_WTime() |
|---|
| 389 | |
|---|
| 390 | if (present(bugtrack_)) then |
|---|
| 391 | bugtrack=bugtrack_ |
|---|
| 392 | else |
|---|
| 393 | bugtrack=.false. |
|---|
| 394 | endif |
|---|
| 395 | ! ---------------------------- |
|---|
| 396 | if (sctls%method==0) then |
|---|
| 397 | sol=rhs |
|---|
| 398 | return |
|---|
| 399 | endif |
|---|
| 400 | sol=0.0_rk |
|---|
| 401 | if (sctls%method>1) then ! For multiplicative Schwarz method...: |
|---|
| 402 | allocate(res(size(rhs))) |
|---|
| 403 | endif |
|---|
| 404 | if (present(CoarseMtx_)) then !{ |
|---|
| 405 | if (.not.present(Restrict)) call DOUG_abort("Restriction matrix needs to be passed along with the coarse matrix!") |
|---|
| 406 | if (cdat%active) then |
|---|
| 407 | ! Not the first iteration, so send the coarse vector here |
|---|
| 408 | if (.not.(present(refactor_).and.refactor_)) then |
|---|
| 409 | call SpMtx_Ax(clrhs,Restrict,rhs,dozero=.true.) ! restrict <RA> |
|---|
| 410 | ! And send |
|---|
| 411 | if (bugtrack)write(stream,*) "local coarse RHS is:",clrhs |
|---|
| 412 | if (cdat_vec%active) then |
|---|
| 413 | call AllSendCoarseVector(clrhs,cdat_vec%nprocs,cdat_vec%cdisps,& |
|---|
| 414 | cdat_vec%send,useprev=.true.) |
|---|
| 415 | else |
|---|
| 416 | call AllSendCoarseVector(clrhs,cdat%nprocs,cdat%cdisps,& |
|---|
| 417 | cdat%send,useprev=.true.) |
|---|
| 418 | endif |
|---|
| 419 | else ! First iteration - send matrix |
|---|
| 420 | call AllSendCoarseMtx(cdat%LAC,CoarseMtx_,cdat%lg_cfmap,& |
|---|
| 421 | cdat%ngfc,cdat%nprocs,cdat%send) |
|---|
| 422 | endif |
|---|
| 423 | endif |
|---|
| 424 | |
|---|
| 425 | ol=max(sctls%overlap,sctls%smoothers) |
|---|
| 426 | if (sctls%input_type==DCTL_INPUT_TYPE_ELEMENTAL.or.numprocs>1) then |
|---|
| 427 | call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, & |
|---|
| 428 | A_interf_=A_interf_, & |
|---|
| 429 | refactor=refactor_) !fine solves |
|---|
| 430 | else |
|---|
| 431 | call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, & |
|---|
| 432 | A_interf_=A_interf_,AC=CoarseMtx_, & |
|---|
| 433 | refactor=refactor_,Restrict=Restrict) !fine solves |
|---|
| 434 | if (bugtrack)call Print_Glob_Vect(sol,M,'global sol===') |
|---|
| 435 | endif |
|---|
| 436 | |
|---|
| 437 | if (sctls%levels>1) then |
|---|
| 438 | if (ol==0) then |
|---|
| 439 | add=.false. |
|---|
| 440 | else |
|---|
| 441 | add=.true. |
|---|
| 442 | endif |
|---|
| 443 | if (present(refactor_).and.refactor_) then !{ setup coarse solve: |
|---|
| 444 | ! We have a matrix send pending |
|---|
| 445 | if (cdat%active) then |
|---|
| 446 | !write(stream,*) "Waiting the Matrix!" |
|---|
| 447 | call AllRecvCoarseMtx(CoarseMtx_,cdat%send,add=add) ! Recieve it |
|---|
| 448 | !write(stream,*) "Got Matrix!" |
|---|
| 449 | allocate(clrhs(Restrict%nrows)) ! allocate memory for vector |
|---|
| 450 | call SpMtx_Ax(clrhs,Restrict,rhs,dozero=.true.) ! restrict <RA> |
|---|
| 451 | ! And send |
|---|
| 452 | if (bugtrack)write(stream,*) "(f) local Coarse RHS is:",clrhs |
|---|
| 453 | if (cdat_vec%active) then |
|---|
| 454 | call AllSendCoarseVector(clrhs,cdat_vec%nprocs,& |
|---|
| 455 | cdat_vec%cdisps,cdat_vec%send) |
|---|
| 456 | else |
|---|
| 457 | call AllSendCoarseVector(clrhs,cdat%nprocs,& |
|---|
| 458 | cdat%cdisps,cdat%send) |
|---|
| 459 | endif |
|---|
| 460 | endif |
|---|
| 461 | if (associated(crhs)) then |
|---|
| 462 | if (size(crhs)/=CoarseMtx_%ncols) then |
|---|
| 463 | deallocate(csol) |
|---|
| 464 | deallocate(crhs) |
|---|
| 465 | allocate(crhs(CoarseMtx_%ncols)) |
|---|
| 466 | allocate(csol(CoarseMtx_%nrows)) |
|---|
| 467 | endif |
|---|
| 468 | else |
|---|
| 469 | allocate(crhs(CoarseMtx_%ncols)) |
|---|
| 470 | allocate(csol(CoarseMtx_%nrows)) |
|---|
| 471 | endif |
|---|
| 472 | if (.not.associated(tmpsol)) then |
|---|
| 473 | !allocate(tmpsol(A%nrows)) |
|---|
| 474 | allocate(tmpsol(size(rhs))) |
|---|
| 475 | endif |
|---|
| 476 | |
|---|
| 477 | if (.not.cdat%active) then |
|---|
| 478 | if (sctls%smoothers==-1) then |
|---|
| 479 | allocate(tmpsol2(A%nrows)) |
|---|
| 480 | tmpsol2=0.0_rk |
|---|
| 481 | call exact_sparse_multismoother(tmpsol2,A,rhs) |
|---|
| 482 | call SpMtx_Ax(crhs,Restrict,tmpsol2,dozero=.true.) ! restriction |
|---|
| 483 | else |
|---|
| 484 | if (sctls%method>1.and.sctls%method/=5) then ! multiplicative Schwarz |
|---|
| 485 | call SpMtx_Ax(crhs,Restrict,res,dozero=.true.) ! restriction |
|---|
| 486 | else |
|---|
| 487 | call SpMtx_Ax(crhs,Restrict,rhs,dozero=.true.) ! restriction |
|---|
| 488 | endif |
|---|
| 489 | endif |
|---|
| 490 | endif |
|---|
| 491 | !csol=0.0_rk |
|---|
| 492 | write (stream,*) & |
|---|
| 493 | 'factorising coarse matrix of size',CoarseMtx_%nrows, & |
|---|
| 494 | ' and nnz:',CoarseMtx_%nnz |
|---|
| 495 | call free_spmtx_subsolves(CoarseMtx_) |
|---|
| 496 | allocate(CoarseMtx_%subsolve_ids(1)) |
|---|
| 497 | CoarseMtx_%subsolve_ids=0 |
|---|
| 498 | CoarseMtx_%nsubsolves=1 |
|---|
| 499 | |
|---|
| 500 | !! TODO: force it to do it AFTER factorization, but before solve |
|---|
| 501 | !if (present(cdat)) then |
|---|
| 502 | if (cdat%active) then |
|---|
| 503 | ! Factorise the matrix |
|---|
| 504 | if (sctls%verbose>2) then |
|---|
| 505 | write(stream,*)'Global coarse matrix is:---------' |
|---|
| 506 | do i=1,CoarseMtx_%nnz |
|---|
| 507 | write(stream,*) CoarseMtx_%indi(i),& |
|---|
| 508 | CoarseMtx_%indj(i),CoarseMtx_%val(i) |
|---|
| 509 | enddo |
|---|
| 510 | write(stream,*)'---------------------------------' |
|---|
| 511 | endif |
|---|
| 512 | call factorise(CoarseMtx_%subsolve_ids(1), & |
|---|
| 513 | nfreds=CoarseMtx_%nrows, & |
|---|
| 514 | nnz=CoarseMtx_%nnz, & |
|---|
| 515 | indi=CoarseMtx_%indi, & |
|---|
| 516 | indj=CoarseMtx_%indj, & |
|---|
| 517 | val=CoarseMtx_%val) |
|---|
| 518 | ! Recieve the vector for solve |
|---|
| 519 | if (cdat_vec%active) then |
|---|
| 520 | call AllRecvCoarseVector(crhs,cdat_vec%nprocs,& |
|---|
| 521 | cdat_vec%cdisps,cdat_vec%glg_cfmap,cdat_vec%send) |
|---|
| 522 | else |
|---|
| 523 | call AllRecvCoarseVector(crhs,cdat%nprocs,& |
|---|
| 524 | cdat%cdisps,cdat%glg_cfmap,cdat%send) |
|---|
| 525 | endif |
|---|
| 526 | !write(stream,*) "Got coarse vector!" |
|---|
| 527 | !call MPI_BARRIER(MPI_COMM_WORLD,i) |
|---|
| 528 | endif |
|---|
| 529 | if (bugtrack) then |
|---|
| 530 | if (isnan(sum(crhs))) then |
|---|
| 531 | write(stream,*) "(f) global Coarse RHS is:",crhs |
|---|
| 532 | call DOUG_abort('Got NaN receive fromAllRecvCoarseVector...',838465) |
|---|
| 533 | endif |
|---|
| 534 | !else |
|---|
| 535 | ! write(stream,*) "(f) global Coarse RHS is:",crhs |
|---|
| 536 | endif |
|---|
| 537 | |
|---|
| 538 | |
|---|
| 539 | call sparse_singlesolve(CoarseMtx_%subsolve_ids(1),csol,crhs, & |
|---|
| 540 | nfreds=CoarseMtx_%nrows, & |
|---|
| 541 | nnz=CoarseMtx_%nnz, & |
|---|
| 542 | indi=CoarseMtx_%indi, & |
|---|
| 543 | indj=CoarseMtx_%indj, & |
|---|
| 544 | val=CoarseMtx_%val) |
|---|
| 545 | write (stream,*) 'coarse factorisation done!',CoarseMtx_%subsolve_ids(1) |
|---|
| 546 | if (bugtrack)write(stream,*) "(f) Coarse SOL is:",csol |
|---|
| 547 | |
|---|
| 548 | if (cdat_vec%active) then |
|---|
| 549 | CoarseMtx_%indi=CoarseMtx_%indi+1 |
|---|
| 550 | CoarseMtx_%indj=CoarseMtx_%indj+1 |
|---|
| 551 | call Vect_remap(csol,clrhs,cdat_vec%gl_cfmap,dozero=.true.) |
|---|
| 552 | call SpMtx_Ax(tmpsol,Restrict,clrhs,dozero=.true.,transp=.true.) |
|---|
| 553 | elseif (cdat%active) then |
|---|
| 554 | CoarseMtx_%indi=CoarseMtx_%indi+1 |
|---|
| 555 | CoarseMtx_%indj=CoarseMtx_%indj+1 |
|---|
| 556 | call Vect_remap(csol,clrhs,cdat%gl_cfmap,dozero=.true.) |
|---|
| 557 | call SpMtx_Ax(tmpsol,Restrict,clrhs,dozero=.true.,transp=.true.) |
|---|
| 558 | else |
|---|
| 559 | if (sctls%smoothers==-1) then |
|---|
| 560 | call SpMtx_Ax(tmpsol2,Restrict,csol,dozero=.true.,transp=.true.) ! interpolation |
|---|
| 561 | tmpsol=0.0_rk |
|---|
| 562 | call exact_sparse_multismoother(tmpsol,A,tmpsol2) |
|---|
| 563 | else |
|---|
| 564 | call SpMtx_Ax(tmpsol,Restrict,csol,dozero=.true.,transp=.true.) ! interpolation |
|---|
| 565 | endif |
|---|
| 566 | endif |
|---|
| 567 | if (bugtrack)call Print_Glob_Vect(tmpsol,M,'tmpsol===',chk_endind=M%ninner) |
|---|
| 568 | else !}{ apply coarse solve: |
|---|
| 569 | if (cdat_vec%active) then |
|---|
| 570 | call AllRecvCoarseVector(crhs,cdat_vec%nprocs,cdat_vec%cdisps,& |
|---|
| 571 | cdat_vec%glg_cfmap,cdat_vec%send) |
|---|
| 572 | elseif (cdat%active) then |
|---|
| 573 | call AllRecvCoarseVector(crhs,cdat%nprocs,cdat%cdisps,& |
|---|
| 574 | cdat%glg_cfmap,cdat%send) |
|---|
| 575 | else |
|---|
| 576 | if (sctls%smoothers==-1) then |
|---|
| 577 | tmpsol2=0.0_rk |
|---|
| 578 | call exact_sparse_multismoother(tmpsol2,A,rhs) |
|---|
| 579 | call SpMtx_Ax(crhs,Restrict,tmpsol2,dozero=.true.) ! restriction |
|---|
| 580 | else |
|---|
| 581 | call SpMtx_Ax(crhs,Restrict,rhs,dozero=.true.) ! restriction |
|---|
| 582 | endif |
|---|
| 583 | endif |
|---|
| 584 | call sparse_singlesolve(CoarseMtx_%subsolve_ids(1),csol,crhs, & |
|---|
| 585 | nfreds=CoarseMtx_%nrows) |
|---|
| 586 | if (cdat_vec%active) then |
|---|
| 587 | call Vect_remap(csol,clrhs,cdat_vec%gl_cfmap,dozero=.true.) |
|---|
| 588 | call SpMtx_Ax(tmpsol,Restrict,clrhs,dozero=.true.,transp=.true.) |
|---|
| 589 | elseif (cdat%active) then |
|---|
| 590 | call Vect_remap(csol,clrhs,cdat%gl_cfmap,dozero=.true.) |
|---|
| 591 | call SpMtx_Ax(tmpsol,Restrict,clrhs,dozero=.true.,transp=.true.) |
|---|
| 592 | else |
|---|
| 593 | if (sctls%smoothers==-1) then |
|---|
| 594 | call SpMtx_Ax(tmpsol2,Restrict,csol,dozero=.true.,transp=.true.) ! interpolation |
|---|
| 595 | tmpsol=0.0_rk |
|---|
| 596 | call exact_sparse_multismoother(tmpsol,A,tmpsol2) |
|---|
| 597 | else |
|---|
| 598 | call SpMtx_Ax(tmpsol,Restrict,csol,dozero=.true.,transp=.true.) ! interpolation |
|---|
| 599 | ! call Vect_print(tmpsol,"csol") |
|---|
| 600 | endif |
|---|
| 601 | endif |
|---|
| 602 | endif !} |
|---|
| 603 | if (sctls%method==1) then |
|---|
| 604 | !call Print_Glob_Vect(sol,M,'sol===',chk_endind=M%ninner) |
|---|
| 605 | sol(1:A%nrows)=sol(1:A%nrows)+tmpsol(1:A%nrows) |
|---|
| 606 | elseif (sctls%method==3) then ! fully multiplicative Schwarz |
|---|
| 607 | sol(1:A%nrows)=sol(1:A%nrows)+tmpsol(1:A%nrows) |
|---|
| 608 | ! calculate the residual: |
|---|
| 609 | call SpMtx_Ax(res,A,sol,dozero=.true.) ! |
|---|
| 610 | res=rhs-res |
|---|
| 611 | elseif (sctls%method==2.and.ol>0) then ! fully multiplicative Schwarz |
|---|
| 612 | sol(1:A%nrows)=tmpsol(1:A%nrows) |
|---|
| 613 | ! calculate the residual: |
|---|
| 614 | call SpMtx_Ax(res,A,sol,dozero=.true.) ! |
|---|
| 615 | res=rhs-res |
|---|
| 616 | endif |
|---|
| 617 | endif |
|---|
| 618 | if (sctls%method>1) then ! For multiplicative Schwarz method...: |
|---|
| 619 | refactor_=.false. |
|---|
| 620 | if (sctls%input_type==DCTL_INPUT_TYPE_ELEMENTAL) then |
|---|
| 621 | call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, & |
|---|
| 622 | A_interf_=A_interf_, & |
|---|
| 623 | refactor=refactor_) !fine solves |
|---|
| 624 | else |
|---|
| 625 | call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, & |
|---|
| 626 | A_interf_=A_interf_,AC=CoarseMtx_, & |
|---|
| 627 | refactor=refactor_,Restrict=Restrict) !fine solves |
|---|
| 628 | endif |
|---|
| 629 | endif |
|---|
| 630 | if (((ol==0.and.sctls%method==2).or.sctls%method==5).and.sctls%levels>1) then |
|---|
| 631 | ! multiplicative on fine level, additive with coarse level: |
|---|
| 632 | sol(1:A%nrows)=sol(1:A%nrows)+tmpsol(1:A%nrows) |
|---|
| 633 | endif |
|---|
| 634 | else !}{ |
|---|
| 635 | call preconditioner_1level(sol,A,rhs,M,res,A_interf_,refactor_) |
|---|
| 636 | endif !} |
|---|
| 637 | if (sctls%method>1) then ! For multiplicative Schwarz method...: |
|---|
| 638 | if (associated(res)) deallocate(res) |
|---|
| 639 | endif |
|---|
| 640 | |
|---|
| 641 | time_preconditioner = time_preconditioner + MPI_WTime()-t1 |
|---|
| 642 | |
|---|
| 643 | end subroutine preconditioner |
|---|
| 644 | |
|---|
| 645 | !-------------------------- |
|---|
| 646 | ! Solve |
|---|
| 647 | !-------------------------- |
|---|
| 648 | subroutine msolve (m, r, z) |
|---|
| 649 | implicit none |
|---|
| 650 | real(kind=rk), dimension(:), intent(in) :: m, r |
|---|
| 651 | real(kind=rk), dimension(:), intent(in out) :: z |
|---|
| 652 | integer :: i |
|---|
| 653 | |
|---|
| 654 | do i = 1, size(r) |
|---|
| 655 | z(i) = r(i) !/ m(i) |
|---|
| 656 | end do |
|---|
| 657 | end subroutine msolve |
|---|
| 658 | |
|---|
| 659 | !-------------------------- |
|---|
| 660 | !> Preconditioned conjugent gradient method with eigenvalues |
|---|
| 661 | !-------------------------- |
|---|
| 662 | subroutine pcg_weigs (A,b,x,Msh,it,cond_num,A_interf_,tol_,maxit_, & |
|---|
| 663 | x0_,solinf,resvects_,CoarseMtx_,Restrict,refactor_) |
|---|
| 664 | use CoarseAllgathers |
|---|
| 665 | |
|---|
| 666 | implicit none |
|---|
| 667 | |
|---|
| 668 | type(SpMtx),intent(in out) :: A !< System matrix (sparse) |
|---|
| 669 | float(kind=rk),dimension(:),pointer :: b !< right hand side |
|---|
| 670 | float(kind=rk),dimension(:),pointer :: x !< Solution |
|---|
| 671 | type(Mesh),intent(in) :: Msh !< Mesh - aux data for Ax operation |
|---|
| 672 | |
|---|
| 673 | integer,intent(out) :: it |
|---|
| 674 | real(kind=rk),intent(out) :: cond_num |
|---|
| 675 | |
|---|
| 676 | ! Optional arguments |
|---|
| 677 | type(SpMtx),intent(in out),optional :: A_interf_ !< matr@interf. |
|---|
| 678 | real(kind=rk), intent(in), optional :: tol_ !< Tolerance of the method |
|---|
| 679 | integer, intent(in), optional :: maxit_ !< Max number of iterations |
|---|
| 680 | float(kind=rk), dimension(:), intent(in), optional :: x0_ !< Initial guess |
|---|
| 681 | type(ConvInf), intent(in out), optional :: solinf !< Solution statistics |
|---|
| 682 | logical, intent(in), optional :: resvects_ !< Fill in the 'resvect' or not |
|---|
| 683 | type(SpMtx),optional :: CoarseMtx_ !< Coarse matrix |
|---|
| 684 | type(SpMtx),optional :: Restrict !< Restriction mtx |
|---|
| 685 | logical,intent(in),optional :: refactor_ |
|---|
| 686 | |
|---|
| 687 | ! Local variables |
|---|
| 688 | logical :: refactor |
|---|
| 689 | |
|---|
| 690 | real(kind=rk) :: tol ! Tolerance |
|---|
| 691 | integer :: maxit ! Max number of iterations |
|---|
| 692 | logical :: resvects=.false. |
|---|
| 693 | |
|---|
| 694 | real(kind=rk),dimension(:),pointer,save :: r, p, q, m, z, b_k |
|---|
| 695 | real(kind=rk),dimension(:),pointer,save :: ztmp !TODO remove me |
|---|
| 696 | real(kind=rk) :: rho_curr, rho_prev, init_norm, res_norm |
|---|
| 697 | real(kind=rk) :: tmp,ratio_norm |
|---|
| 698 | integer :: nfreds |
|---|
| 699 | integer :: ierr |
|---|
| 700 | real(kind=rk),dimension(:),pointer,save :: dd,ee,alpha,beta |
|---|
| 701 | !logical,parameter :: bugtrack=.true. |
|---|
| 702 | logical,parameter :: bugtrack=.false. |
|---|
| 703 | ! profiling |
|---|
| 704 | real(8) :: time_iterations, t1 |
|---|
| 705 | |
|---|
| 706 | write(stream,'(/a)') 'Preconditioned conjugate gradient:' |
|---|
| 707 | if (present(refactor_).and.refactor_) then |
|---|
| 708 | refactor=.true. |
|---|
| 709 | else |
|---|
| 710 | refactor=.false. |
|---|
| 711 | endif |
|---|
| 712 | |
|---|
| 713 | if (size(b) /= size(x)) & |
|---|
| 714 | call DOUG_abort('[pcg] : SEVERE : size(b) /= size(x)',-1) |
|---|
| 715 | |
|---|
| 716 | nfreds=size(b) |
|---|
| 717 | if (.not.associated(r)) then |
|---|
| 718 | allocate(r(nfreds)) |
|---|
| 719 | allocate(p(nfreds)) |
|---|
| 720 | allocate(q(nfreds)) |
|---|
| 721 | allocate(m(nfreds)) |
|---|
| 722 | allocate(z(nfreds)) |
|---|
| 723 | allocate(b_k(nfreds)) |
|---|
| 724 | endif |
|---|
| 725 | |
|---|
| 726 | tol = sctls%solve_tolerance |
|---|
| 727 | if (present(tol_)) & |
|---|
| 728 | tol = tol_ |
|---|
| 729 | |
|---|
| 730 | maxit = sctls%solve_maxiters |
|---|
| 731 | if (maxit==-1) maxit=100000 |
|---|
| 732 | if (present(maxit_)) & |
|---|
| 733 | maxit = maxit_ |
|---|
| 734 | |
|---|
| 735 | if (present(resvects_).and.(.not.present(solinf))) & |
|---|
| 736 | call DOUG_abort('[pcg] : SEVERE : "resvects_" must be given along'//& |
|---|
| 737 | ' with "solinf".',-1) |
|---|
| 738 | |
|---|
| 739 | ! Allocate convergence info structure |
|---|
| 740 | if (present(solinf).and.(.not.present(resvects_))) then |
|---|
| 741 | call ConvInf_Init(solinf) |
|---|
| 742 | else if (present(solinf).and.& |
|---|
| 743 | (present(resvects_).and.(resvects_.eqv.(.true.)))) then |
|---|
| 744 | call ConvInf_Init(solinf, maxit) |
|---|
| 745 | resvects = .true. |
|---|
| 746 | end if |
|---|
| 747 | |
|---|
| 748 | |
|---|
| 749 | ! Initialise auxiliary data structures |
|---|
| 750 | ! to assist with pmvm |
|---|
| 751 | call pmvmCommStructs_init(A, Msh) |
|---|
| 752 | |
|---|
| 753 | |
|---|
| 754 | if (bugtrack)call Print_Glob_Vect(x,Msh,'global x===') |
|---|
| 755 | call SpMtx_pmvm(r,A,x,Msh) |
|---|
| 756 | |
|---|
| 757 | r = b - r |
|---|
| 758 | init_norm = Vect_dot_product(r,r) |
|---|
| 759 | if (init_norm == 0.0) & |
|---|
| 760 | init_norm = 1.0 |
|---|
| 761 | |
|---|
| 762 | |
|---|
| 763 | write(stream,'(a,i6,a,e8.2)') 'maxit = ',maxit,', tol = ',tol |
|---|
| 764 | write(stream,'(a,e22.15)') 'init_norm = ', dsqrt(init_norm) |
|---|
| 765 | |
|---|
| 766 | it = 0 |
|---|
| 767 | ratio_norm = 1.0_rk |
|---|
| 768 | ! arrays for eigenvalue calculation: |
|---|
| 769 | if (.not.associated(dd)) then |
|---|
| 770 | allocate(dd(maxit)) |
|---|
| 771 | allocate(ee(maxit)) |
|---|
| 772 | allocate(alpha(maxit)) |
|---|
| 773 | allocate(beta(maxit)) |
|---|
| 774 | endif |
|---|
| 775 | |
|---|
| 776 | time_iterations = 0. |
|---|
| 777 | ! iterations |
|---|
| 778 | do while((ratio_norm > tol*tol).and.(it < maxit)) |
|---|
| 779 | it = it + 1 |
|---|
| 780 | t1 = MPI_WTime() |
|---|
| 781 | |
|---|
| 782 | if (bugtrack)call Print_Glob_Vect(r,Msh,'global r===',chk_endind=Msh%ninner) |
|---|
| 783 | call preconditioner(sol=z, & |
|---|
| 784 | A=A, & |
|---|
| 785 | rhs=r, & |
|---|
| 786 | M=Msh, & |
|---|
| 787 | A_interf_=A_interf_, & |
|---|
| 788 | CoarseMtx_=CoarseMtx_, & |
|---|
| 789 | Restrict=Restrict, & |
|---|
| 790 | refactor_=refactor, & |
|---|
| 791 | bugtrack_=bugtrack) |
|---|
| 792 | |
|---|
| 793 | refactor=.false. |
|---|
| 794 | if (bugtrack)call Print_Glob_Vect(z,Msh,'global bef comm z===',chk_endind=Msh%ninner) |
|---|
| 795 | if (sctls%method/=0) then |
|---|
| 796 | call Add_common_interf(z,A,Msh) |
|---|
| 797 | endif |
|---|
| 798 | !call Print_Glob_Vect(z,Msh,'global aft comm z===',chk_endind=Msh%ninner) |
|---|
| 799 | !call Print_Glob_Vect(z,Msh,'global z===') |
|---|
| 800 | ! compute current rho |
|---|
| 801 | rho_curr = Vect_dot_product(r,z) |
|---|
| 802 | |
|---|
| 803 | if (it == 1) then |
|---|
| 804 | p = z |
|---|
| 805 | else |
|---|
| 806 | beta(it) = rho_curr / rho_prev |
|---|
| 807 | p = z + beta(it) * p |
|---|
| 808 | end if |
|---|
| 809 | call SpMtx_pmvm(q,A, p, Msh) |
|---|
| 810 | if (bugtrack)call Print_Glob_Vect(q,Msh,'global q===') |
|---|
| 811 | ! compute alpha |
|---|
| 812 | alpha(it) = rho_curr / Vect_dot_product(p,q) |
|---|
| 813 | x = x + alpha(it) * p |
|---|
| 814 | if (bugtrack)call Print_Glob_Vect(x,Msh,'global x===') |
|---|
| 815 | r = r - alpha(it) * q |
|---|
| 816 | if (bugtrack)call Print_Glob_Vect(r,Msh,'global r===') |
|---|
| 817 | rho_prev = rho_curr |
|---|
| 818 | ! check |
|---|
| 819 | res_norm = Vect_dot_product(r,r) |
|---|
| 820 | !write (stream,*) "Norm is", res_norm |
|---|
| 821 | ratio_norm = res_norm / init_norm |
|---|
| 822 | |
|---|
| 823 | time_iterations = time_iterations + MPI_WTime()-t1 |
|---|
| 824 | |
|---|
| 825 | if (ismaster()) & |
|---|
| 826 | write(stream, '(i5,a,e22.15)') it,': res_norm=',dsqrt(res_norm) |
|---|
| 827 | end do |
|---|
| 828 | |
|---|
| 829 | if(pstream/=0) then |
|---|
| 830 | write(pstream, "(I0,':pcg iterations:',I0)") myrank, it |
|---|
| 831 | write(pstream, "(I0,':iterations time:',F0.3)") myrank, time_iterations |
|---|
| 832 | write(pstream, "(I0,':preconditioner time:',F0.3)") myrank, time_preconditioner |
|---|
| 833 | end if |
|---|
| 834 | |
|---|
| 835 | if (ismaster()) then |
|---|
| 836 | call CalculateEigenvalues(it,dd,ee,alpha,beta,maxit) |
|---|
| 837 | cond_num=dd(it)/dd(1) |
|---|
| 838 | write(stream,'(a,i3,a,e10.4)') '#it:',it,' Cond#: ',cond_num |
|---|
| 839 | endif |
|---|
| 840 | !deallocate(beta) |
|---|
| 841 | !deallocate(alpha) |
|---|
| 842 | !deallocate(ee) |
|---|
| 843 | !deallocate(dd) |
|---|
| 844 | ! Deallocate auxiliary data structures |
|---|
| 845 | ! helped to assist with pmvm |
|---|
| 846 | ! call pmvmCommStructs_destroy() |
|---|
| 847 | |
|---|
| 848 | !deallocate(b_k) |
|---|
| 849 | !deallocate(z) |
|---|
| 850 | !deallocate(m) |
|---|
| 851 | !deallocate(q) |
|---|
| 852 | !deallocate(p) |
|---|
| 853 | !deallocate(r) |
|---|
| 854 | end subroutine pcg_weigs |
|---|
| 855 | |
|---|
| 856 | |
|---|
| 857 | subroutine CalculateEigenvalues(it,dd,ee,alpha,beta,MaxIt) |
|---|
| 858 | ! Finds eigenvalues using Lanczos connection |
|---|
| 859 | implicit none |
|---|
| 860 | integer :: it,MaxIt |
|---|
| 861 | real(kind=rk),dimension(:),pointer :: dd,ee,alpha,beta |
|---|
| 862 | integer :: i,j,ierr |
|---|
| 863 | |
|---|
| 864 | dd(1) = 1.0_rk/alpha(1) |
|---|
| 865 | ee(1) = 0.0_rk |
|---|
| 866 | do i=2,it |
|---|
| 867 | if (alpha(i)==0.or.beta(i)<0) then |
|---|
| 868 | print *,'Unable to compute eigenvalue number',i,' and ' |
|---|
| 869 | do j=1,i |
|---|
| 870 | print *,j,' alpha:',alpha(j),' beta:',beta |
|---|
| 871 | enddo |
|---|
| 872 | return |
|---|
| 873 | else |
|---|
| 874 | dd(i) = 1.0_rk/alpha(i) + beta(i)/alpha(i-1) |
|---|
| 875 | ee(i) = -dsqrt(beta(i))/alpha(i-1) |
|---|
| 876 | endif |
|---|
| 877 | enddo |
|---|
| 878 | !call tql1(it,dd,ee,MaxIt,ierr) |
|---|
| 879 | call tql1(it,dd,ee,ierr) |
|---|
| 880 | if (ierr > 0) then |
|---|
| 881 | print *,'Cancelled after EigMaxIt while calculating eig',ierr |
|---|
| 882 | endif |
|---|
| 883 | end subroutine CalculateEigenvalues |
|---|
| 884 | |
|---|
| 885 | subroutine tql1(n,d,e,ierr) |
|---|
| 886 | implicit none |
|---|
| 887 | integer :: i,j,l,m,n,ii,l1,l2,mml,ierr |
|---|
| 888 | real(kind=rk) :: d(n),e(n) |
|---|
| 889 | real(kind=rk) :: c,c2,c3,dl1,el1,f,g,h,p,r,s,s2,tst1,tst2 !,pythag |
|---|
| 890 | ! |
|---|
| 891 | ! this subroutine is a translation of the algol procedure tql1, |
|---|
| 892 | ! num. math. 11, 293-306(1968) by bowdler, martin, reinsch, and |
|---|
| 893 | ! wilkinson. |
|---|
| 894 | ! handbook for auto. comp., vol.ii-linear algebra, 227-240(1971). |
|---|
| 895 | ! |
|---|
| 896 | ! this subroutine finds the eigenvalues of a symmetric |
|---|
| 897 | ! tridiagonal matrix by the ql method. |
|---|
| 898 | ! |
|---|
| 899 | ! on input |
|---|
| 900 | ! n is the order of the matrix. |
|---|
| 901 | ! d contains the diagonal elements of the input matrix. |
|---|
| 902 | ! e contains the subdiagonal elements of the input matrix |
|---|
| 903 | ! in its last n-1 positions. e(1) is arbitrary. |
|---|
| 904 | ! |
|---|
| 905 | ! on output |
|---|
| 906 | ! d contains the eigenvalues in ascending order. if an |
|---|
| 907 | ! error exit is made, the eigenvalues are correct and |
|---|
| 908 | ! ordered for indices 1,2,...ierr-1, but may not be |
|---|
| 909 | ! the smallest eigenvalues. |
|---|
| 910 | ! e has been destroyed. |
|---|
| 911 | ! ierr is set to |
|---|
| 912 | ! zero for normal return, |
|---|
| 913 | ! j if the j-th eigenvalue has not been |
|---|
| 914 | ! determined after 30 iterations. |
|---|
| 915 | ! calls pythag for sqrt(a*a + b*b) . |
|---|
| 916 | ! |
|---|
| 917 | ! questions and comments should be directed to burton s. garbow, |
|---|
| 918 | ! mathematics and computer science div, argonne national laboratory |
|---|
| 919 | ! |
|---|
| 920 | ! this version dated august 1983. |
|---|
| 921 | ! |
|---|
| 922 | ! ------------------------------------------------------------------ |
|---|
| 923 | ! |
|---|
| 924 | ierr = 0 |
|---|
| 925 | if (n .eq. 1) go to 1001 |
|---|
| 926 | ! |
|---|
| 927 | do 100 i = 2, n |
|---|
| 928 | 100 e(i-1) = e(i) |
|---|
| 929 | ! |
|---|
| 930 | f = 0.0e0 |
|---|
| 931 | tst1 = 0.0e0 |
|---|
| 932 | e(n) = 0.0e0 |
|---|
| 933 | |
|---|
| 934 | do 290 l = 1, n |
|---|
| 935 | j = 0 |
|---|
| 936 | h = abs(d(l)) + abs(e(l)) |
|---|
| 937 | if (tst1 .lt. h) tst1 = h |
|---|
| 938 | ! .......... look for small sub-diagonal element .......... |
|---|
| 939 | do 110 m = l, n |
|---|
| 940 | tst2 = tst1 + abs(e(m)) |
|---|
| 941 | if (tst2 .eq. tst1) go to 120 |
|---|
| 942 | ! .......... e(n) is always zero, so there is no exit |
|---|
| 943 | ! throu gh the bottom of the loop .......... |
|---|
| 944 | 110 continue |
|---|
| 945 | |
|---|
| 946 | 120 if (m .eq. l) go to 210 |
|---|
| 947 | 130 if (j .eq. 30 ) go to 1000 |
|---|
| 948 | j = j + 1 |
|---|
| 949 | ! .......... form shift .......... |
|---|
| 950 | l1 = l + 1 |
|---|
| 951 | l2 = l1 + 1 |
|---|
| 952 | g = d(l) |
|---|
| 953 | p = (d(l1) - g) / (2.0e0 * e(l)) |
|---|
| 954 | r = pythag(p,1.0e0_rk) |
|---|
| 955 | d(l) = e(l) / (p + sign(r,p)) |
|---|
| 956 | d(l1) = e(l) * (p + sign(r,p)) |
|---|
| 957 | dl1 = d(l1) |
|---|
| 958 | h = g - d(l) |
|---|
| 959 | if (l2 .gt. n) go to 145 |
|---|
| 960 | |
|---|
| 961 | do 140 i = l2, n |
|---|
| 962 | 140 d(i) = d(i) - h |
|---|
| 963 | |
|---|
| 964 | 145 f = f + h |
|---|
| 965 | ! .......... ql transformation .......... |
|---|
| 966 | p = d(m) |
|---|
| 967 | c = 1.0e0 |
|---|
| 968 | c2 = c |
|---|
| 969 | el1 = e(l1) |
|---|
| 970 | s = 0.0e0 |
|---|
| 971 | mml = m - l |
|---|
| 972 | ! .......... for i=m-1 step -1 until l do -- .......... |
|---|
| 973 | do 200 ii = 1, mml |
|---|
| 974 | c3 = c2 |
|---|
| 975 | c2 = c |
|---|
| 976 | s2 = s |
|---|
| 977 | i = m - ii |
|---|
| 978 | g = c * e(i) |
|---|
| 979 | h = c * p |
|---|
| 980 | r = pythag(p,e(i)) |
|---|
| 981 | e(i+1) = s * r |
|---|
| 982 | s = e(i) / r |
|---|
| 983 | c = p / r |
|---|
| 984 | p = c * d(i) - s * g |
|---|
| 985 | d(i+1) = h + s * (c * g + s * d(i)) |
|---|
| 986 | 200 continue |
|---|
| 987 | |
|---|
| 988 | p = -s * s2 * c3 * el1 * e(l) / dl1 |
|---|
| 989 | e(l) = s * p |
|---|
| 990 | d(l) = c * p |
|---|
| 991 | tst2 = tst1 + abs(e(l)) |
|---|
| 992 | if (tst2 .gt. tst1) go to 130 |
|---|
| 993 | 210 p = d(l) + f |
|---|
| 994 | ! .......... order eigenvalues .......... |
|---|
| 995 | if (l .eq. 1) go to 250 |
|---|
| 996 | ! .......... for i=l step -1 until 2 do -- .......... |
|---|
| 997 | do 230 ii = 2, l |
|---|
| 998 | i = l + 2 - ii |
|---|
| 999 | if (p .ge. d(i-1)) go to 270 |
|---|
| 1000 | d(i) = d(i-1) |
|---|
| 1001 | 230 continue |
|---|
| 1002 | |
|---|
| 1003 | 250 i = 1 |
|---|
| 1004 | 270 d(i) = p |
|---|
| 1005 | 290 continue |
|---|
| 1006 | |
|---|
| 1007 | go to 1001 |
|---|
| 1008 | ! .......... set error -- no convergence to an |
|---|
| 1009 | ! eigenvalue after 30 iterations .......... |
|---|
| 1010 | 1000 ierr = l |
|---|
| 1011 | 1001 return |
|---|
| 1012 | end subroutine tql1 |
|---|
| 1013 | |
|---|
| 1014 | function pythag(a,b) result(pyt) |
|---|
| 1015 | ! finds dsqrt(a**2+b**2) without overflow or destructive underflow |
|---|
| 1016 | implicit none |
|---|
| 1017 | real(kind=rk) :: a,b,pyt |
|---|
| 1018 | real(kind=rk) :: p,r,s,t,u |
|---|
| 1019 | |
|---|
| 1020 | p = dmax1(dabs(a),dabs(b)) |
|---|
| 1021 | if (p .eq. 0.0d0) go to 20 |
|---|
| 1022 | r = (dmin1(dabs(a),dabs(b))/p)**2 |
|---|
| 1023 | 10 continue |
|---|
| 1024 | t = 4.0d0 + r |
|---|
| 1025 | if (t .eq. 4.0d0) go to 20 |
|---|
| 1026 | s = r/t |
|---|
| 1027 | u = 1.0d0 + 2.0d0*s |
|---|
| 1028 | p = u*p |
|---|
| 1029 | r = (s/u)**2 * r |
|---|
| 1030 | go to 10 |
|---|
| 1031 | 20 pyt = p |
|---|
| 1032 | return |
|---|
| 1033 | end function pythag |
|---|
| 1034 | |
|---|
| 1035 | |
|---|
| 1036 | end module pcg_mod |
|---|
| 1037 | |
|---|
| 1038 | |
|---|