| 1 | ! DOUG - Domain decomposition On Unstructured Grids |
|---|
| 2 | ! Copyright (C) 1998-2006 Faculty of Computer Science, University of Tartu and |
|---|
| 3 | ! Department of Mathematics, University of Bath |
|---|
| 4 | ! |
|---|
| 5 | ! This library is free software; you can redistribute it and/or |
|---|
| 6 | ! modify it under the terms of the GNU Lesser General Public |
|---|
| 7 | ! License as published by the Free Software Foundation; either |
|---|
| 8 | ! version 2.1 of the License, or (at your option) any later version. |
|---|
| 9 | ! |
|---|
| 10 | ! This library is distributed in the hope that it will be useful, |
|---|
| 11 | ! but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 12 | ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|---|
| 13 | ! Lesser General Public License for more details. |
|---|
| 14 | ! |
|---|
| 15 | ! You should have received a copy of the GNU Lesser General Public |
|---|
| 16 | ! License along with this library; if not, write to the Free Software |
|---|
| 17 | ! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|---|
| 18 | ! or contact the authors (University of Tartu, Faculty of Computer Science, Chair |
|---|
| 19 | ! of Distributed Systems, Liivi 2, 50409 Tartu, Estonia, http://dougdevel.org, |
|---|
| 20 | ! mailto:info(at)dougdevel.org) |
|---|
| 21 | |
|---|
| 22 | !!---------------------------------- |
|---|
| 23 | !! Preconditioned Conjugate Gradient |
|---|
| 24 | !!---------------------------------- |
|---|
| 25 | module pcg_mod |
|---|
| 26 | |
|---|
| 27 | use ConvInf_mod |
|---|
| 28 | use SpMtx_mods |
|---|
| 29 | use Mesh_class |
|---|
| 30 | use globals |
|---|
| 31 | use subsolvers |
|---|
| 32 | |
|---|
| 33 | implicit none |
|---|
| 34 | |
|---|
| 35 | #include<doug_config.h> |
|---|
| 36 | |
|---|
| 37 | #ifdef D_COMPLEX |
|---|
| 38 | #define float complex |
|---|
| 39 | #else |
|---|
| 40 | #define float real |
|---|
| 41 | #endif |
|---|
| 42 | |
|---|
| 43 | private :: & |
|---|
| 44 | preconditioner, & |
|---|
| 45 | msolve |
|---|
| 46 | |
|---|
| 47 | ! profiling |
|---|
| 48 | real(kind=rk), private, save :: time_preconditioner = 0 |
|---|
| 49 | |
|---|
| 50 | contains |
|---|
| 51 | |
|---|
| 52 | subroutine prec1Level(DD,sol,A,rhs,A_ghost,refactor) |
|---|
| 53 | implicit none |
|---|
| 54 | type(Decomposition),intent(inout) :: DD !< domains |
|---|
| 55 | real(kind=rk),dimension(:),pointer :: sol !< solution |
|---|
| 56 | type(SpMtx) :: A !< sparse system matrix |
|---|
| 57 | real(kind=rk),dimension(:),pointer :: rhs !< right hand side |
|---|
| 58 | type(SpMtx),optional :: A_ghost !< matr@interf. |
|---|
| 59 | logical,intent(inout),optional :: refactor |
|---|
| 60 | |
|---|
| 61 | if (refactor) then!{ |
|---|
| 62 | if (sctls%verbose>4) write(stream,*) "Factorizing 1. level" |
|---|
| 63 | call Factorise_subdomains(DD,A,A_ghost) |
|---|
| 64 | refactor=.false. |
|---|
| 65 | end if |
|---|
| 66 | |
|---|
| 67 | ! solve |
|---|
| 68 | if (sctls%verbose>4) write(stream,*) "Solving 1. level" |
|---|
| 69 | call solve_subdomains(sol,DD,rhs) |
|---|
| 70 | |
|---|
| 71 | end subroutine prec1Level |
|---|
| 72 | |
|---|
| 73 | subroutine prec2Level(prepare,A,sol,rhs,res,CoarseMtx_,Restrict,isFirstIter) |
|---|
| 74 | use CoarseAllgathers |
|---|
| 75 | |
|---|
| 76 | logical, intent(in) :: prepare |
|---|
| 77 | type(SpMtx) :: A !< sparse system matrix |
|---|
| 78 | real(kind=rk),dimension(:),pointer :: sol !< solution |
|---|
| 79 | real(kind=rk),dimension(:),pointer :: rhs !< right hand side |
|---|
| 80 | real(kind=rk),dimension(:),pointer :: res !< residual vector |
|---|
| 81 | type(SpMtx) :: CoarseMtx_ !< Coarse matrix |
|---|
| 82 | type(SpMtx) :: Restrict !< Restriction matrix |
|---|
| 83 | logical :: isFirstIter |
|---|
| 84 | |
|---|
| 85 | real(kind=rk),dimension(:),pointer,save :: csol,crhs,clrhs,tmpsol2,tmpsol |
|---|
| 86 | integer :: ol |
|---|
| 87 | |
|---|
| 88 | ol=max(sctls%overlap,sctls%smoothers) |
|---|
| 89 | |
|---|
| 90 | if (prepare) then |
|---|
| 91 | if (sctls%verbose>4) write(stream,*) "Preparing 2. level" |
|---|
| 92 | call prec2Level_prepare() |
|---|
| 93 | else |
|---|
| 94 | if (sctls%verbose>4) write(stream,*) "Solving 2. level" |
|---|
| 95 | call prec2Level_solve() |
|---|
| 96 | end if |
|---|
| 97 | |
|---|
| 98 | contains |
|---|
| 99 | subroutine prec2Level_exchangeMatrix() |
|---|
| 100 | integer :: ol |
|---|
| 101 | logical :: add |
|---|
| 102 | |
|---|
| 103 | if (sctls%verbose>4) write(stream,*) "Exchanging coarse matrix" |
|---|
| 104 | |
|---|
| 105 | ol=max(sctls%overlap,sctls%smoothers) |
|---|
| 106 | if (ol==0) then |
|---|
| 107 | add=.false. |
|---|
| 108 | else |
|---|
| 109 | add=.true. |
|---|
| 110 | endif |
|---|
| 111 | |
|---|
| 112 | call AllSendCoarseMtx(cdat%LAC,CoarseMtx_,cdat%lg_cfmap,& |
|---|
| 113 | cdat%ngfc,cdat%nprocs,cdat%send) |
|---|
| 114 | call AllRecvCoarseMtx(CoarseMtx_,cdat%send,add=add) ! Recieve it |
|---|
| 115 | |
|---|
| 116 | end subroutine prec2Level_exchangeMatrix |
|---|
| 117 | |
|---|
| 118 | subroutine prec2Level_prepare() |
|---|
| 119 | if (isFirstIter) then |
|---|
| 120 | if (cdat%active) then |
|---|
| 121 | ! First iteration - send matrix |
|---|
| 122 | call prec2Level_exchangeMatrix() |
|---|
| 123 | end if |
|---|
| 124 | |
|---|
| 125 | ! after coarse matrix is received allocate coarse vectors |
|---|
| 126 | if (associated(crhs)) then |
|---|
| 127 | if (size(crhs)/=CoarseMtx_%ncols) then |
|---|
| 128 | deallocate(csol) |
|---|
| 129 | deallocate(crhs) |
|---|
| 130 | allocate(crhs(CoarseMtx_%ncols)) |
|---|
| 131 | allocate(csol(CoarseMtx_%nrows)) |
|---|
| 132 | endif |
|---|
| 133 | else |
|---|
| 134 | allocate(crhs(CoarseMtx_%ncols)) |
|---|
| 135 | allocate(csol(CoarseMtx_%nrows)) |
|---|
| 136 | endif |
|---|
| 137 | ! allocate memory for vector |
|---|
| 138 | if (cdat%active) then |
|---|
| 139 | allocate(clrhs(cdat%nlfc)) |
|---|
| 140 | else |
|---|
| 141 | allocate(clrhs(cdat_vec%nlfc)) |
|---|
| 142 | end if |
|---|
| 143 | end if |
|---|
| 144 | |
|---|
| 145 | if (.not.associated(tmpsol)) then |
|---|
| 146 | !allocate(tmpsol(A%nrows)) |
|---|
| 147 | allocate(tmpsol(size(rhs))) |
|---|
| 148 | endif |
|---|
| 149 | |
|---|
| 150 | if (sctls%verbose>6) write(stream,*) "Restricting into local coarse vector", size(clrhs), Restrict%nrows, Restrict%ncols, & |
|---|
| 151 | cdat%nlfc, cdat%active, cdat_vec%nlfc, cdat_vec%active |
|---|
| 152 | if (cdat%active) then |
|---|
| 153 | ! Send coarse vector |
|---|
| 154 | call SpMtx_Ax(clrhs,Restrict,rhs,dozero=.true.) ! restrict <RA> |
|---|
| 155 | if (cdat_vec%active) then |
|---|
| 156 | call AllSendCoarseVector(clrhs,cdat_vec%nprocs,cdat_vec%cdisps,& |
|---|
| 157 | cdat_vec%send,useprev=.not.isFirstIter) |
|---|
| 158 | else |
|---|
| 159 | call AllSendCoarseVector(clrhs,cdat%nprocs,cdat%cdisps,& |
|---|
| 160 | cdat%send,useprev=.not.isFirstIter) |
|---|
| 161 | endif |
|---|
| 162 | end if ! cdat%active |
|---|
| 163 | end subroutine prec2Level_prepare |
|---|
| 164 | |
|---|
| 165 | subroutine prec2Level_solve() |
|---|
| 166 | if (.not.cdat%active) then ! 1 processor case |
|---|
| 167 | if (sctls%method>1.and.sctls%method/=5) then ! multiplicative Schwarz |
|---|
| 168 | call SpMtx_Ax(crhs,Restrict,res,dozero=.true.) ! restriction |
|---|
| 169 | else |
|---|
| 170 | call SpMtx_Ax(crhs,Restrict,rhs,dozero=.true.) ! restriction |
|---|
| 171 | endif |
|---|
| 172 | end if |
|---|
| 173 | |
|---|
| 174 | !csol=0.0_rk |
|---|
| 175 | if (cdat%active) then |
|---|
| 176 | ! Recieve the vector for solve |
|---|
| 177 | if (cdat_vec%active) then |
|---|
| 178 | call AllRecvCoarseVector(crhs,cdat_vec%nprocs,& |
|---|
| 179 | cdat_vec%cdisps,cdat_vec%glg_cfmap,cdat_vec%send) |
|---|
| 180 | else |
|---|
| 181 | call AllRecvCoarseVector(crhs,cdat%nprocs,& |
|---|
| 182 | cdat%cdisps,cdat%glg_cfmap,cdat%send) |
|---|
| 183 | endif |
|---|
| 184 | !call MPI_BARRIER(MPI_COMM_WORLD,i) |
|---|
| 185 | end if |
|---|
| 186 | |
|---|
| 187 | if (isFirstIter) then |
|---|
| 188 | write (stream,*) & |
|---|
| 189 | 'factorising coarse matrix of size',CoarseMtx_%nrows, & |
|---|
| 190 | ' and nnz:',CoarseMtx_%nnz |
|---|
| 191 | CoarseMtx_%subsolve_id=0 |
|---|
| 192 | end if |
|---|
| 193 | |
|---|
| 194 | ! Coarse solve |
|---|
| 195 | call sparse_singlesolve(CoarseMtx_%subsolve_id,csol,crhs,& |
|---|
| 196 | nfreds=CoarseMtx_%nrows, & |
|---|
| 197 | nnz=CoarseMtx_%nnz, & |
|---|
| 198 | indi=CoarseMtx_%indi, & |
|---|
| 199 | indj=CoarseMtx_%indj, & |
|---|
| 200 | val=CoarseMtx_%val) |
|---|
| 201 | if (isFirstIter) then |
|---|
| 202 | CoarseMtx_%indi=CoarseMtx_%indi+1 |
|---|
| 203 | CoarseMtx_%indj=CoarseMtx_%indj+1 |
|---|
| 204 | end if |
|---|
| 205 | |
|---|
| 206 | if (cdat_vec%active) then |
|---|
| 207 | call Vect_remap(csol,clrhs,cdat%gl_cfmap,dozero=.true.) |
|---|
| 208 | call SpMtx_Ax(tmpsol,Restrict,clrhs,dozero=.true.,transp=.true.) |
|---|
| 209 | |
|---|
| 210 | elseif (cdat%active) then |
|---|
| 211 | call Vect_remap(csol,clrhs,cdat%gl_cfmap,dozero=.true.) |
|---|
| 212 | call SpMtx_Ax(tmpsol,Restrict,clrhs,dozero=.true.,transp=.true.) |
|---|
| 213 | else |
|---|
| 214 | call SpMtx_Ax(tmpsol,Restrict,csol,dozero=.true.,transp=.true.) ! interpolation |
|---|
| 215 | endif |
|---|
| 216 | |
|---|
| 217 | if (sctls%method==1) then |
|---|
| 218 | !call Print_Glob_Vect(sol,M,'sol===',chk_endind=M%ninner) |
|---|
| 219 | sol=sol+tmpsol |
|---|
| 220 | elseif (sctls%method==3) then ! fully multiplicative Schwarz |
|---|
| 221 | sol(1:A%nrows)=sol(1:A%nrows)+tmpsol(1:A%nrows) |
|---|
| 222 | ! calculate the residual: |
|---|
| 223 | call SpMtx_Ax(res,A,sol,dozero=.true.) ! |
|---|
| 224 | res=rhs-res |
|---|
| 225 | elseif (sctls%method==2.and.ol>0) then ! fully multiplicative Schwarz |
|---|
| 226 | sol(1:A%nrows)=tmpsol(1:A%nrows) |
|---|
| 227 | ! calculate the residual: |
|---|
| 228 | call SpMtx_Ax(res,A,sol,dozero=.true.) ! |
|---|
| 229 | res=rhs-res |
|---|
| 230 | endif |
|---|
| 231 | if (((ol==0.and.sctls%method==2).or.sctls%method==5).and.sctls%levels>1) then |
|---|
| 232 | ! multiplicative on fine level, additive with coarse level: |
|---|
| 233 | sol(1:A%nrows)=sol(1:A%nrows)+tmpsol(1:A%nrows) |
|---|
| 234 | endif |
|---|
| 235 | end subroutine prec2Level_solve |
|---|
| 236 | |
|---|
| 237 | end subroutine prec2Level |
|---|
| 238 | |
|---|
| 239 | !------------------------------- |
|---|
| 240 | !> Make preconditioner |
|---|
| 241 | !------------------------------- |
|---|
| 242 | subroutine preconditioner(sol,A,rhs,M,DD,& |
|---|
| 243 | A_ghost,CoarseMtx_,Restrict,refactor,bugtrack_) |
|---|
| 244 | use CoarseAllgathers |
|---|
| 245 | use CoarseMtx_mod |
|---|
| 246 | use Vect_mod |
|---|
| 247 | implicit none |
|---|
| 248 | real(kind=rk),dimension(:),pointer :: sol !< solution |
|---|
| 249 | type(SpMtx) :: A !< sparse system matrix |
|---|
| 250 | real(kind=rk),dimension(:),pointer :: rhs !< right hand side |
|---|
| 251 | type(Mesh),intent(in) :: M !< Mesh |
|---|
| 252 | type(Decomposition),intent(inout) :: DD !< domains |
|---|
| 253 | real(kind=rk),dimension(:),pointer :: res !< residual vector, allocated |
|---|
| 254 | !! here for multiplicative Schwarz |
|---|
| 255 | type(SpMtx),optional :: A_ghost !< matr@interf. |
|---|
| 256 | type(SpMtx),optional :: CoarseMtx_ !< Coarse matrix |
|---|
| 257 | type(SpMtx),optional :: Restrict !< Restriction matrix |
|---|
| 258 | logical,intent(inout),optional :: refactor |
|---|
| 259 | logical,optional :: bugtrack_ |
|---|
| 260 | ! ----- local: ------ |
|---|
| 261 | integer :: i |
|---|
| 262 | logical :: add,bugtrack |
|---|
| 263 | real(kind=rk) :: t1 |
|---|
| 264 | logical :: isFirstIter |
|---|
| 265 | |
|---|
| 266 | if (sctls%verbose>4) write(stream,*) "Applying preconditioner" |
|---|
| 267 | t1 = MPI_WTime() |
|---|
| 268 | |
|---|
| 269 | if (present(bugtrack_)) then |
|---|
| 270 | bugtrack=bugtrack_ |
|---|
| 271 | else |
|---|
| 272 | bugtrack=.false. |
|---|
| 273 | endif |
|---|
| 274 | ! ---------------------------- |
|---|
| 275 | isFirstIter = .false. |
|---|
| 276 | if (present(refactor)) isFirstIter = refactor |
|---|
| 277 | |
|---|
| 278 | if (sctls%method==0) then |
|---|
| 279 | sol=rhs |
|---|
| 280 | return |
|---|
| 281 | endif |
|---|
| 282 | |
|---|
| 283 | sol=0.0_rk |
|---|
| 284 | if (sctls%method>1) then ! For multiplicative Schwarz method...: |
|---|
| 285 | allocate(res(size(rhs))) |
|---|
| 286 | endif |
|---|
| 287 | |
|---|
| 288 | if (sctls%levels>1) then |
|---|
| 289 | call prec2Level(.true.,A,sol,rhs,res,CoarseMtx_,Restrict,isFirstIter) |
|---|
| 290 | end if |
|---|
| 291 | |
|---|
| 292 | ! first level prec |
|---|
| 293 | call prec1Level(DD,sol,A,rhs,A_ghost,refactor) |
|---|
| 294 | |
|---|
| 295 | if (sctls%levels>1) then |
|---|
| 296 | call prec2Level(.false.,A,sol,rhs,res,CoarseMtx_,Restrict,isFirstIter) |
|---|
| 297 | end if |
|---|
| 298 | |
|---|
| 299 | time_preconditioner = time_preconditioner + MPI_WTime()-t1 |
|---|
| 300 | |
|---|
| 301 | end subroutine preconditioner |
|---|
| 302 | |
|---|
| 303 | !-------------------------- |
|---|
| 304 | ! Solve |
|---|
| 305 | !-------------------------- |
|---|
| 306 | subroutine msolve (m, r, z) |
|---|
| 307 | implicit none |
|---|
| 308 | real(kind=rk), dimension(:), intent(in) :: m, r |
|---|
| 309 | real(kind=rk), dimension(:), intent(in out) :: z |
|---|
| 310 | integer :: i |
|---|
| 311 | |
|---|
| 312 | do i = 1, size(r) |
|---|
| 313 | z(i) = r(i) !/ m(i) |
|---|
| 314 | end do |
|---|
| 315 | end subroutine msolve |
|---|
| 316 | |
|---|
| 317 | !-------------------------- |
|---|
| 318 | !> Preconditioned conjugent gradient method with eigenvalues |
|---|
| 319 | !-------------------------- |
|---|
| 320 | subroutine pcg_weigs (A,b,x,Msh,DomDec,it,cond_num,A_interf_,tol_,maxit_, & |
|---|
| 321 | x0_,solinf,resvects_,CoarseMtx_,Restrict,refactor_) |
|---|
| 322 | use CoarseAllgathers |
|---|
| 323 | |
|---|
| 324 | implicit none |
|---|
| 325 | |
|---|
| 326 | type(SpMtx),intent(in out) :: A !< System matrix (sparse) |
|---|
| 327 | float(kind=rk),dimension(:),pointer :: b !< right hand side |
|---|
| 328 | float(kind=rk),dimension(:),pointer :: x !< Solution |
|---|
| 329 | type(Mesh),intent(in) :: Msh !< Mesh - aux data for Ax operation |
|---|
| 330 | type(Decomposition),intent(inout) :: DomDec !< Domain decomposition |
|---|
| 331 | |
|---|
| 332 | integer,intent(out) :: it |
|---|
| 333 | real(kind=rk),intent(out) :: cond_num |
|---|
| 334 | |
|---|
| 335 | ! Optional arguments |
|---|
| 336 | type(SpMtx),intent(in out),optional :: A_interf_ !< matr@interf. |
|---|
| 337 | real(kind=rk), intent(in), optional :: tol_ !< Tolerance of the method |
|---|
| 338 | integer, intent(in), optional :: maxit_ !< Max number of iterations |
|---|
| 339 | float(kind=rk), dimension(:), intent(in), optional :: x0_ !< Initial guess |
|---|
| 340 | type(ConvInf), intent(in out), optional :: solinf !< Solution statistics |
|---|
| 341 | logical, intent(in), optional :: resvects_ !< Fill in the 'resvect' or not |
|---|
| 342 | type(SpMtx),optional :: CoarseMtx_ !< Coarse matrix |
|---|
| 343 | type(SpMtx),optional :: Restrict !< Restriction mtx |
|---|
| 344 | logical,intent(in),optional :: refactor_ |
|---|
| 345 | |
|---|
| 346 | ! Local variables |
|---|
| 347 | logical :: refactor |
|---|
| 348 | |
|---|
| 349 | real(kind=rk) :: tol ! Tolerance |
|---|
| 350 | integer :: maxit ! Max number of iterations |
|---|
| 351 | logical :: resvects=.false. |
|---|
| 352 | |
|---|
| 353 | real(kind=rk),dimension(:),pointer,save :: r, p, q, m, z, b_k |
|---|
| 354 | real(kind=rk),dimension(:),pointer,save :: ztmp !TODO remove me |
|---|
| 355 | real(kind=rk) :: rho_curr, rho_prev, init_norm, res_norm |
|---|
| 356 | real(kind=rk) :: tmp,ratio_norm |
|---|
| 357 | integer :: nfreds |
|---|
| 358 | integer :: ierr |
|---|
| 359 | real(kind=rk),dimension(:),pointer,save :: dd,ee,alpha,beta |
|---|
| 360 | !logical,parameter :: bugtrack=.true. |
|---|
| 361 | logical,parameter :: bugtrack=.false. |
|---|
| 362 | ! profiling |
|---|
| 363 | real(8) :: time_iterations, t1 |
|---|
| 364 | |
|---|
| 365 | write(stream,'(/a)') 'Preconditioned conjugate gradient:' |
|---|
| 366 | if (present(refactor_).and.refactor_) then |
|---|
| 367 | refactor=.true. |
|---|
| 368 | else |
|---|
| 369 | refactor=.false. |
|---|
| 370 | endif |
|---|
| 371 | |
|---|
| 372 | if (size(b) /= size(x)) & |
|---|
| 373 | call DOUG_abort('[pcg] : SEVERE : size(b) /= size(x)',-1) |
|---|
| 374 | |
|---|
| 375 | nfreds=size(b) |
|---|
| 376 | if (.not.associated(r)) then |
|---|
| 377 | allocate(r(nfreds)) |
|---|
| 378 | allocate(p(nfreds)) |
|---|
| 379 | allocate(q(nfreds)) |
|---|
| 380 | allocate(m(nfreds)) |
|---|
| 381 | allocate(z(nfreds)) |
|---|
| 382 | allocate(b_k(nfreds)) |
|---|
| 383 | endif |
|---|
| 384 | |
|---|
| 385 | tol = sctls%solve_tolerance |
|---|
| 386 | if (present(tol_)) & |
|---|
| 387 | tol = tol_ |
|---|
| 388 | |
|---|
| 389 | maxit = sctls%solve_maxiters |
|---|
| 390 | if (maxit==-1) maxit=100000 |
|---|
| 391 | if (present(maxit_)) & |
|---|
| 392 | maxit = maxit_ |
|---|
| 393 | |
|---|
| 394 | if (present(resvects_).and.(.not.present(solinf))) & |
|---|
| 395 | call DOUG_abort('[pcg] : SEVERE : "resvects_" must be given along'//& |
|---|
| 396 | ' with "solinf".',-1) |
|---|
| 397 | |
|---|
| 398 | ! Allocate convergence info structure |
|---|
| 399 | if (present(solinf).and.(.not.present(resvects_))) then |
|---|
| 400 | call ConvInf_Init(solinf) |
|---|
| 401 | else if (present(solinf).and.& |
|---|
| 402 | (present(resvects_).and.(resvects_.eqv.(.true.)))) then |
|---|
| 403 | call ConvInf_Init(solinf, maxit) |
|---|
| 404 | resvects = .true. |
|---|
| 405 | end if |
|---|
| 406 | |
|---|
| 407 | |
|---|
| 408 | ! Initialise auxiliary data structures |
|---|
| 409 | ! to assist with pmvm |
|---|
| 410 | call pmvmCommStructs_init(A, Msh) |
|---|
| 411 | |
|---|
| 412 | |
|---|
| 413 | if (bugtrack)call Print_Glob_Vect(x,Msh,'global x===') |
|---|
| 414 | call SpMtx_pmvm(r,A,x,Msh) |
|---|
| 415 | |
|---|
| 416 | r = b - r |
|---|
| 417 | init_norm = Vect_dot_product(r,r) |
|---|
| 418 | if (init_norm == 0.0) & |
|---|
| 419 | init_norm = 1.0 |
|---|
| 420 | |
|---|
| 421 | |
|---|
| 422 | write(stream,'(a,i6,a,e8.2)') 'maxit = ',maxit,', tol = ',tol |
|---|
| 423 | write(stream,'(a,e22.15)') 'init_norm = ', dsqrt(init_norm) |
|---|
| 424 | |
|---|
| 425 | it = 0 |
|---|
| 426 | ratio_norm = 1.0_rk |
|---|
| 427 | ! arrays for eigenvalue calculation: |
|---|
| 428 | if (.not.associated(dd)) then |
|---|
| 429 | allocate(dd(maxit)) |
|---|
| 430 | allocate(ee(maxit)) |
|---|
| 431 | allocate(alpha(maxit)) |
|---|
| 432 | allocate(beta(maxit)) |
|---|
| 433 | endif |
|---|
| 434 | |
|---|
| 435 | time_iterations = 0. |
|---|
| 436 | ! iterations |
|---|
| 437 | do while((ratio_norm > tol*tol).and.(it < maxit)) |
|---|
| 438 | it = it + 1 |
|---|
| 439 | t1 = MPI_WTime() |
|---|
| 440 | |
|---|
| 441 | if (bugtrack)call Print_Glob_Vect(r,Msh,'global r===',chk_endind=Msh%ninner) |
|---|
| 442 | call preconditioner(sol=z, & |
|---|
| 443 | A=A, & |
|---|
| 444 | rhs=r, & |
|---|
| 445 | M=Msh, & |
|---|
| 446 | DD=DomDec, & |
|---|
| 447 | A_ghost=A_interf_, & |
|---|
| 448 | CoarseMtx_=CoarseMtx_, & |
|---|
| 449 | Restrict=Restrict, & |
|---|
| 450 | refactor=refactor, & |
|---|
| 451 | bugtrack_=bugtrack) |
|---|
| 452 | |
|---|
| 453 | refactor=.false. |
|---|
| 454 | if (bugtrack)call Print_Glob_Vect(z,Msh,'global bef comm z===',chk_endind=Msh%ninner) |
|---|
| 455 | if (sctls%method/=0) then |
|---|
| 456 | call Add_common_interf(z,A,Msh) |
|---|
| 457 | endif |
|---|
| 458 | |
|---|
| 459 | !call Print_Glob_Vect(z,Msh,'global aft comm z===',chk_endind=Msh%ninner) |
|---|
| 460 | !call Print_Glob_Vect(z,Msh,'global z===') |
|---|
| 461 | ! compute current rho |
|---|
| 462 | rho_curr = Vect_dot_product(r,z) |
|---|
| 463 | |
|---|
| 464 | if (it == 1) then |
|---|
| 465 | p = z |
|---|
| 466 | else |
|---|
| 467 | beta(it) = rho_curr / rho_prev |
|---|
| 468 | p = z + beta(it) * p |
|---|
| 469 | end if |
|---|
| 470 | call SpMtx_pmvm(q,A, p, Msh) |
|---|
| 471 | if (bugtrack)call Print_Glob_Vect(q,Msh,'global q===') |
|---|
| 472 | ! compute alpha |
|---|
| 473 | alpha(it) = rho_curr / Vect_dot_product(p,q) |
|---|
| 474 | x = x + alpha(it) * p |
|---|
| 475 | if (bugtrack)call Print_Glob_Vect(x,Msh,'global x===') |
|---|
| 476 | r = r - alpha(it) * q |
|---|
| 477 | if (bugtrack)call Print_Glob_Vect(r,Msh,'global r===') |
|---|
| 478 | rho_prev = rho_curr |
|---|
| 479 | ! check |
|---|
| 480 | res_norm = Vect_dot_product(r,r) |
|---|
| 481 | !write (stream,*) "Norm is", res_norm |
|---|
| 482 | ratio_norm = res_norm / init_norm |
|---|
| 483 | |
|---|
| 484 | time_iterations = time_iterations + MPI_WTime()-t1 |
|---|
| 485 | |
|---|
| 486 | if (ismaster()) & |
|---|
| 487 | write(stream, '(i5,a,e22.15)') it,': res_norm=',dsqrt(res_norm) |
|---|
| 488 | end do |
|---|
| 489 | |
|---|
| 490 | if(pstream/=0) then |
|---|
| 491 | write(pstream, "(I0,':pcg iterations:',I0)") myrank, it |
|---|
| 492 | write(pstream, "(I0,':iterations time:',F0.3)") myrank, time_iterations |
|---|
| 493 | write(pstream, "(I0,':preconditioner time:',F0.3)") myrank, time_preconditioner |
|---|
| 494 | end if |
|---|
| 495 | |
|---|
| 496 | if (ismaster()) then |
|---|
| 497 | call CalculateEigenvalues(it,dd,ee,alpha,beta,maxit) |
|---|
| 498 | cond_num=dd(it)/dd(1) |
|---|
| 499 | write(stream,'(a,i3,a,e10.4)') '#it:',it,' Cond#: ',cond_num |
|---|
| 500 | endif |
|---|
| 501 | !deallocate(beta) |
|---|
| 502 | !deallocate(alpha) |
|---|
| 503 | !deallocate(ee) |
|---|
| 504 | !deallocate(dd) |
|---|
| 505 | ! Deallocate auxiliary data structures |
|---|
| 506 | ! helped to assist with pmvm |
|---|
| 507 | ! call pmvmCommStructs_destroy() |
|---|
| 508 | |
|---|
| 509 | !deallocate(b_k) |
|---|
| 510 | !deallocate(z) |
|---|
| 511 | !deallocate(m) |
|---|
| 512 | !deallocate(q) |
|---|
| 513 | !deallocate(p) |
|---|
| 514 | !deallocate(r) |
|---|
| 515 | end subroutine pcg_weigs |
|---|
| 516 | |
|---|
| 517 | |
|---|
| 518 | subroutine CalculateEigenvalues(it,dd,ee,alpha,beta,MaxIt) |
|---|
| 519 | ! Finds eigenvalues using Lanczos connection |
|---|
| 520 | implicit none |
|---|
| 521 | integer :: it,MaxIt |
|---|
| 522 | real(kind=rk),dimension(:),pointer :: dd,ee,alpha,beta |
|---|
| 523 | integer :: i,j,ierr |
|---|
| 524 | |
|---|
| 525 | dd(1) = 1.0_rk/alpha(1) |
|---|
| 526 | ee(1) = 0.0_rk |
|---|
| 527 | do i=2,it |
|---|
| 528 | if (alpha(i)==0.or.beta(i)<0) then |
|---|
| 529 | print *,'Unable to compute eigenvalue number',i,' and ' |
|---|
| 530 | do j=1,i |
|---|
| 531 | print *,j,' alpha:',alpha(j),' beta:',beta |
|---|
| 532 | enddo |
|---|
| 533 | return |
|---|
| 534 | else |
|---|
| 535 | dd(i) = 1.0_rk/alpha(i) + beta(i)/alpha(i-1) |
|---|
| 536 | ee(i) = -dsqrt(beta(i))/alpha(i-1) |
|---|
| 537 | endif |
|---|
| 538 | enddo |
|---|
| 539 | !call tql1(it,dd,ee,MaxIt,ierr) |
|---|
| 540 | call tql1(it,dd,ee,ierr) |
|---|
| 541 | if (ierr > 0) then |
|---|
| 542 | print *,'Cancelled after EigMaxIt while calculating eig',ierr |
|---|
| 543 | endif |
|---|
| 544 | end subroutine CalculateEigenvalues |
|---|
| 545 | |
|---|
| 546 | subroutine tql1(n,d,e,ierr) |
|---|
| 547 | implicit none |
|---|
| 548 | integer :: i,j,l,m,n,ii,l1,l2,mml,ierr |
|---|
| 549 | real(kind=rk) :: d(n),e(n) |
|---|
| 550 | real(kind=rk) :: c,c2,c3,dl1,el1,f,g,h,p,r,s,s2,tst1,tst2 !,pythag |
|---|
| 551 | ! |
|---|
| 552 | ! this subroutine is a translation of the algol procedure tql1, |
|---|
| 553 | ! num. math. 11, 293-306(1968) by bowdler, martin, reinsch, and |
|---|
| 554 | ! wilkinson. |
|---|
| 555 | ! handbook for auto. comp., vol.ii-linear algebra, 227-240(1971). |
|---|
| 556 | ! |
|---|
| 557 | ! this subroutine finds the eigenvalues of a symmetric |
|---|
| 558 | ! tridiagonal matrix by the ql method. |
|---|
| 559 | ! |
|---|
| 560 | ! on input |
|---|
| 561 | ! n is the order of the matrix. |
|---|
| 562 | ! d contains the diagonal elements of the input matrix. |
|---|
| 563 | ! e contains the subdiagonal elements of the input matrix |
|---|
| 564 | ! in its last n-1 positions. e(1) is arbitrary. |
|---|
| 565 | ! |
|---|
| 566 | ! on output |
|---|
| 567 | ! d contains the eigenvalues in ascending order. if an |
|---|
| 568 | ! error exit is made, the eigenvalues are correct and |
|---|
| 569 | ! ordered for indices 1,2,...ierr-1, but may not be |
|---|
| 570 | ! the smallest eigenvalues. |
|---|
| 571 | ! e has been destroyed. |
|---|
| 572 | ! ierr is set to |
|---|
| 573 | ! zero for normal return, |
|---|
| 574 | ! j if the j-th eigenvalue has not been |
|---|
| 575 | ! determined after 30 iterations. |
|---|
| 576 | ! calls pythag for sqrt(a*a + b*b) . |
|---|
| 577 | ! |
|---|
| 578 | ! questions and comments should be directed to burton s. garbow, |
|---|
| 579 | ! mathematics and computer science div, argonne national laboratory |
|---|
| 580 | ! |
|---|
| 581 | ! this version dated august 1983. |
|---|
| 582 | ! |
|---|
| 583 | ! ------------------------------------------------------------------ |
|---|
| 584 | ! |
|---|
| 585 | ierr = 0 |
|---|
| 586 | if (n .eq. 1) go to 1001 |
|---|
| 587 | ! |
|---|
| 588 | do 100 i = 2, n |
|---|
| 589 | 100 e(i-1) = e(i) |
|---|
| 590 | ! |
|---|
| 591 | f = 0.0e0 |
|---|
| 592 | tst1 = 0.0e0 |
|---|
| 593 | e(n) = 0.0e0 |
|---|
| 594 | |
|---|
| 595 | do 290 l = 1, n |
|---|
| 596 | j = 0 |
|---|
| 597 | h = abs(d(l)) + abs(e(l)) |
|---|
| 598 | if (tst1 .lt. h) tst1 = h |
|---|
| 599 | ! .......... look for small sub-diagonal element .......... |
|---|
| 600 | do 110 m = l, n |
|---|
| 601 | tst2 = tst1 + abs(e(m)) |
|---|
| 602 | if (tst2 .eq. tst1) go to 120 |
|---|
| 603 | ! .......... e(n) is always zero, so there is no exit |
|---|
| 604 | ! throu gh the bottom of the loop .......... |
|---|
| 605 | 110 continue |
|---|
| 606 | |
|---|
| 607 | 120 if (m .eq. l) go to 210 |
|---|
| 608 | 130 if (j .eq. 30 ) go to 1000 |
|---|
| 609 | j = j + 1 |
|---|
| 610 | ! .......... form shift .......... |
|---|
| 611 | l1 = l + 1 |
|---|
| 612 | l2 = l1 + 1 |
|---|
| 613 | g = d(l) |
|---|
| 614 | p = (d(l1) - g) / (2.0e0 * e(l)) |
|---|
| 615 | r = pythag(p,1.0e0_rk) |
|---|
| 616 | d(l) = e(l) / (p + sign(r,p)) |
|---|
| 617 | d(l1) = e(l) * (p + sign(r,p)) |
|---|
| 618 | dl1 = d(l1) |
|---|
| 619 | h = g - d(l) |
|---|
| 620 | if (l2 .gt. n) go to 145 |
|---|
| 621 | |
|---|
| 622 | do 140 i = l2, n |
|---|
| 623 | 140 d(i) = d(i) - h |
|---|
| 624 | |
|---|
| 625 | 145 f = f + h |
|---|
| 626 | ! .......... ql transformation .......... |
|---|
| 627 | p = d(m) |
|---|
| 628 | c = 1.0e0 |
|---|
| 629 | c2 = c |
|---|
| 630 | el1 = e(l1) |
|---|
| 631 | s = 0.0e0 |
|---|
| 632 | mml = m - l |
|---|
| 633 | ! .......... for i=m-1 step -1 until l do -- .......... |
|---|
| 634 | do 200 ii = 1, mml |
|---|
| 635 | c3 = c2 |
|---|
| 636 | c2 = c |
|---|
| 637 | s2 = s |
|---|
| 638 | i = m - ii |
|---|
| 639 | g = c * e(i) |
|---|
| 640 | h = c * p |
|---|
| 641 | r = pythag(p,e(i)) |
|---|
| 642 | e(i+1) = s * r |
|---|
| 643 | s = e(i) / r |
|---|
| 644 | c = p / r |
|---|
| 645 | p = c * d(i) - s * g |
|---|
| 646 | d(i+1) = h + s * (c * g + s * d(i)) |
|---|
| 647 | 200 continue |
|---|
| 648 | |
|---|
| 649 | p = -s * s2 * c3 * el1 * e(l) / dl1 |
|---|
| 650 | e(l) = s * p |
|---|
| 651 | d(l) = c * p |
|---|
| 652 | tst2 = tst1 + abs(e(l)) |
|---|
| 653 | if (tst2 .gt. tst1) go to 130 |
|---|
| 654 | 210 p = d(l) + f |
|---|
| 655 | ! .......... order eigenvalues .......... |
|---|
| 656 | if (l .eq. 1) go to 250 |
|---|
| 657 | ! .......... for i=l step -1 until 2 do -- .......... |
|---|
| 658 | do 230 ii = 2, l |
|---|
| 659 | i = l + 2 - ii |
|---|
| 660 | if (p .ge. d(i-1)) go to 270 |
|---|
| 661 | d(i) = d(i-1) |
|---|
| 662 | 230 continue |
|---|
| 663 | |
|---|
| 664 | 250 i = 1 |
|---|
| 665 | 270 d(i) = p |
|---|
| 666 | 290 continue |
|---|
| 667 | |
|---|
| 668 | go to 1001 |
|---|
| 669 | ! .......... set error -- no convergence to an |
|---|
| 670 | ! eigenvalue after 30 iterations .......... |
|---|
| 671 | 1000 ierr = l |
|---|
| 672 | 1001 return |
|---|
| 673 | end subroutine tql1 |
|---|
| 674 | |
|---|
| 675 | function pythag(a,b) result(pyt) |
|---|
| 676 | ! finds dsqrt(a**2+b**2) without overflow or destructive underflow |
|---|
| 677 | implicit none |
|---|
| 678 | real(kind=rk) :: a,b,pyt |
|---|
| 679 | real(kind=rk) :: p,r,s,t,u |
|---|
| 680 | |
|---|
| 681 | p = dmax1(dabs(a),dabs(b)) |
|---|
| 682 | if (p .eq. 0.0d0) go to 20 |
|---|
| 683 | r = (dmin1(dabs(a),dabs(b))/p)**2 |
|---|
| 684 | 10 continue |
|---|
| 685 | t = 4.0d0 + r |
|---|
| 686 | if (t .eq. 4.0d0) go to 20 |
|---|
| 687 | s = r/t |
|---|
| 688 | u = 1.0d0 + 2.0d0*s |
|---|
| 689 | p = u*p |
|---|
| 690 | r = (s/u)**2 * r |
|---|
| 691 | go to 10 |
|---|
| 692 | 20 pyt = p |
|---|
| 693 | return |
|---|
| 694 | end function pythag |
|---|
| 695 | |
|---|
| 696 | |
|---|
| 697 | end module pcg_mod |
|---|
| 698 | |
|---|
| 699 | |
|---|