source: src/solvers/pcg.F90 @ a10a43b

Revision a10a43b, 35.8 KB checked in by Oleg Batrashev <ogbash@…>, 3 years ago (diff)

First level preconditioner (if used without 2. level) is factored out and works from C.

  • Property mode set to 100644
Line 
1! DOUG - Domain decomposition On Unstructured Grids
2! Copyright (C) 1998-2006 Faculty of Computer Science, University of Tartu and
3! Department of Mathematics, University of Bath
4!
5! This library is free software; you can redistribute it and/or
6! modify it under the terms of the GNU Lesser General Public
7! License as published by the Free Software Foundation; either
8! version 2.1 of the License, or (at your option) any later version.
9!
10! This library is distributed in the hope that it will be useful,
11! but WITHOUT ANY WARRANTY; without even the implied warranty of
12! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13! Lesser General Public License for more details.
14!
15! You should have received a copy of the GNU Lesser General Public
16! License along with this library; if not, write to the Free Software
17! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18! or contact the authors (University of Tartu, Faculty of Computer Science, Chair
19! of Distributed Systems, Liivi 2, 50409 Tartu, Estonia, http://dougdevel.org,
20! mailto:info(at)dougdevel.org)
21
22!!----------------------------------
23!! Preconditioned Conjugate Gradient
24!!----------------------------------
25module pcg_mod
26
27  use ConvInf_mod
28  use SpMtx_mods
29  use Mesh_class
30  use globals
31
32  implicit none
33
34#include<doug_config.h>
35
36#ifdef D_COMPLEX
37#define float complex
38#else
39#define float real
40#endif
41
42  integer,save :: coarseid=0
43  private :: &
44       preconditioner, &
45       msolve
46
47  ! profiling
48  real(kind=rk), private, save :: time_preconditioner = 0
49
50contains
51
52  !--------------------------------------------
53  ! Preconditioned conjugate gradient method
54  !--------------------------------------------
55  subroutine pcg (A, b, x, Msh, tol_, maxit_, &
56       x0_, solinf, resvects_, CoarseMtx_)
57    implicit none
58
59    type(SpMtx),intent(in out) :: A ! System matrix (sparse)
60    float(kind=rk),dimension(:),pointer :: b ! RHS
61    float(kind=rk),dimension(:),pointer :: x ! Solution
62    ! Mesh - aux data for Ax operation
63    type(Mesh),                       intent(in) :: Msh
64
65    ! Optional arguments
66    ! Tolerance of the method
67    real(kind=rk),          intent(in), optional :: tol_
68    ! Max number of iterations
69    integer,                intent(in), optional :: maxit_
70    ! Initial guess
71    float(kind=rk), dimension(:), intent(in), optional :: x0_
72    ! Solution statistics
73    type(ConvInf),            intent(in out), optional :: solinf
74    ! Fill in the 'resvect' or not
75    logical,                      intent(in), optional :: resvects_
76    type(SpMtx),optional                         :: CoarseMtx_ ! Coarse matrix
77
78    real(kind=rk) :: tol   ! Tolerance
79    integer       :: maxit ! Max number of iterations
80    logical       :: resvects=.false.
81
82    !real(kind=rk), dimension(size(b)) :: r, p, q, m, z, b_k
83    real(kind=rk),dimension(:),pointer :: r, p, q, m, z, b_k
84    real(kind=rk) :: rho_curr, rho_prev, init_norm, res_norm
85    real(kind=rk) :: alpha, beta, tmp, ratio_norm
86    integer :: iter,nfreds
87    integer :: ierr
88    ! + test
89    float(kind=rk), dimension(:), pointer :: arr_copy, gv_tmp
90
91!!$    real(kind=rk) :: r_max
92
93    write(stream,'(/a)') 'Preconditioned conjugate gradient:'
94
95    if (size(b) /= size(x)) &
96         call DOUG_abort('[pcg] : SEVERE : size(b) /= size(x)',-1)
97
98!!$    if (present(x0_))
99!!$         ! use method with initial guess
100
101
102    nfreds=size(b)
103    allocate(r(nfreds))
104    allocate(p(nfreds))
105    allocate(q(nfreds))
106    allocate(m(nfreds))
107    allocate(z(nfreds))
108    allocate(b_k(nfreds))
109
110
111    tol = sctls%solve_tolerance
112    if (present(tol_)) &
113         tol = tol_
114
115    maxit = sctls%solve_maxiters
116    if (maxit==-1) maxit=100000
117    if (present(maxit_)) &
118         maxit = maxit_
119
120    if (present(resvects_).and.(.not.present(solinf))) &
121         call DOUG_abort('[pcg] : SEVERE : "resvects_" must be given along'//&
122         ' with "solinf".',-1)
123
124    ! Allocate convergence info structure
125    if (present(solinf).and.(.not.present(resvects_))) then
126       call ConvInf_Init(solinf)
127    else if (present(solinf)) then
128       if (present(resvects_).and.(resvects_.eqv.(.true.))) then
129         call ConvInf_Init(solinf, maxit)
130         resvects = .true.
131       end if
132    end if
133
134    ! Initialise auxiliary data structures
135    ! to assist with pmvm
136    call pmvmCommStructs_init(A, Msh)
137
138    ! + test
139    if (ismaster()) &
140         allocate(gv_tmp(Msh%ngf))
141    allocate(arr_copy(Msh%nlf))
142
143    ! Build preconditioner
144    !  call preconditioner(A, m, CoarseMtx_)
145!!$    ! + test
146!!$    arr_copy = m
147!!$    call Vect_newToOldPerm(arr_copy)
148!!$    gv_tmp = 0.0_rk
149!!$    call Vect_Gather(arr_copy, gv_tmp, Msh)
150!!$    if (ismaster()) &
151!!$         call Vect_Print(gv_tmp, 'm ::')
152
153!!$    ! + test
154!!$    !b = 1.0_rk
155!!$    arr_copy = b
156!!$    call Vect_newToOldPerm(arr_copy)
157!!$    call Vect_Print(arr_copy, 'b local (original ordering) ::')
158!!$    gv_tmp = 0.0_rk
159!!$    call Vect_Gather(arr_copy, gv_tmp, Msh)
160!!$    if (ismaster()) &
161!!$         call Vect_Print(gv_tmp, 'b global (original ordering) ::')
162
163!!$    ! + test
164!!$    arr_copy = x
165!!$    call Vect_newToOldPerm(arr_copy)
166!!$    call Vect_Print(arr_copy, 'x local (original ordering) ::')
167!!$    gv_tmp = 0.0_rk
168!!$    call Vect_Gather(arr_copy, gv_tmp, Msh)
169!!$    if (ismaster()) &
170!!$         call Vect_Print(gv_tmp, 'x global (original ordering) ::')
171
172    !r = b - SpMtx_pmvm(A, x, Msh)
173    call SpMtx_pmvm(r,A,x,Msh)
174    r = b - r
175    !call Vect_Print(r,'initial residual')
176
177    ! + test
178!!$    arr_copy = r
179!!$    call Vect_newToOldPerm(arr_copy)
180!!$    gv_tmp = 0.0_rk
181!!$    call Vect_Gather(arr_copy, gv_tmp, Msh)
182!!$    if (ismaster()) &
183!!$         call Vect_Print(gv_tmp, 'r ::')
184
185    init_norm = Vect_dot_product(r,r)
186    if (init_norm == 0.0) &
187         init_norm = 1.0
188
189    write(stream,'(a,i6,a,e8.2)') 'maxit = ',maxit,', tol = ',tol
190    write(stream,'(a,e22.15)') 'init_norm = ', dsqrt(init_norm)
191
192    iter = 1
193    ratio_norm = 1.0_rk
194    do while((ratio_norm > tol*tol).and.(iter <= maxit))
195       call msolve(m,r,z)
196       !if (present(CoarseMtx_)) then
197       !  call preconditioner(z,A,r,CoarseMtx_)
198       !  !z=r
199       !else
200       !  call preconditioner(z,A,r)
201       !endif
202
203       !call Vect_Print(z,'preconditioner')
204
205       ! compute current rho
206       rho_curr = Vect_dot_product(r,z)
207       if (iter == 1) then
208          p = z
209       else
210          beta = rho_curr / rho_prev
211          p = z + beta * p
212       end if
213       !q = SpMtx_pmvm(A, p, Msh)
214       call SpMtx_pmvm(q,A,p,Msh)
215       ! compute alpha
216       alpha = rho_curr / Vect_dot_product(p,q)
217       x = x + alpha * p
218       r = r - alpha * q
219       rho_prev = rho_curr
220       ! check
221       res_norm = Vect_dot_product(r,r)
222       ratio_norm = dsqrt(res_norm) / dsqrt(init_norm)
223!!$       ratio_norm = res_norm / init_norm
224       if (ismaster()) &
225            write(stream, '(i5,a,e22.15)') iter,': res_norm=',dsqrt(res_norm)
226            !write(stream, '(i5,a,e22.15,e22.15)') iter,': dsqrt(ratio_norm)=',&
227            !dsqrt(ratio_norm), dsqrt(res_norm)
228       iter = iter + 1
229    end do
230
231    ! Deallocate auxiliary data structures
232    ! helped to assist with pmvm
233    call pmvmCommStructs_destroy()
234
235    deallocate(b_k)
236    deallocate(z)
237    deallocate(m)
238    deallocate(q)
239    deallocate(p)
240    deallocate(r)
241    if (associated(gv_tmp)) deallocate(gv_tmp)
242    if (associated(arr_copy)) deallocate(arr_copy)
243  end subroutine pcg
244
245  subroutine preconditioner_1level(sol,A,rhs,M,res,A_interf_,refactor_)
246    use subsolvers
247    implicit none
248    real(kind=rk),dimension(:),pointer :: sol !< solution
249    type(SpMtx)                        :: A   !< sparse system matrix
250    real(kind=rk),dimension(:),pointer :: rhs !< right hand side
251    type(Mesh),intent(in)              :: M   !< Mesh
252    real(kind=rk),dimension(:),pointer :: res !< residual vector, allocated
253                                              !! here for multiplicative Schwarz
254    type(SpMtx),optional               :: A_interf_  !< matr@interf.
255    logical,intent(inout),optional :: refactor_
256
257    type(SpMtx)                        :: A_tmp
258
259    if (refactor_.and.present(A_interf_)) then!{
260       if (sctls%verbose>9) then
261          !call SpMtx_printMat(A)
262          call SpMtx_printRaw(A)
263          !call SpMtx_printMat(A_interf_)
264          call SpMtx_printRaw(A_interf_)
265          !stop
266       endif
267       if (sctls%input_type==DCTL_INPUT_TYPE_ASSEMBLED) then
268          ! we need the fully sorted entries but A has block struct...
269          !   keep the original values...
270          if (associated(A%M_bound)) then
271             A_tmp=SpMtx_newInit(nnz=A%nnz,nblocks=A%nblocks,&
272                  nrows=A%nrows,ncols=A%ncols,&
273                  indi=A%indi,indj=A%indj,val=A%val,&
274                  arrange_type=A%arrange_type,M_bound=A%M_bound)
275          else
276             A_tmp=SpMtx_newInit(nnz=A%nnz,nblocks=A%nblocks,&
277                  nrows=A%nrows,ncols=A%ncols,&
278                  indi=A%indi,indj=A%indj,val=A%val,&
279                  arrange_type=A%arrange_type)
280          endif
281          A%arrange_type=D_SpMtx_ARRNG_NO
282          !A%nrows=max(A%nrows,A_interf_%nrows)
283          !A%ncols=max(A%nrows,A_interf_%ncols)
284          !if (associated(A%M_bound)) then
285          !  deallocate(A%M_bound)
286          !endif
287          call SpMtx_arrange(A,D_SpMtx_ARRNG_ROWS,sort=.true.)
288          call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, &
289               A_interf_=A_interf_, &
290               refactor=refactor_) !fine solves
291          if (sctls%method>1) then ! For multiplicative Schwarz method...:
292             refactor_=.false.
293             call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, &
294                  A_interf_=A_interf_, &
295                  refactor=refactor_) !fine solves
296          endif
297          ! put the original structure and orders back:
298          A%indi=A_tmp%indi
299          A%indj=A_tmp%indj
300          A%val=A_tmp%val
301          if (associated(A_tmp%M_bound)) A%M_bound=A_tmp%M_bound
302          A%nrows=A_tmp%nrows
303          A%ncols=A_tmp%ncols
304          A%arrange_type=A_tmp%arrange_type
305          call SpMtx_Destroy(A_tmp)
306       else
307          call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, &
308               A_interf_=A_interf_, &
309               refactor=refactor_) !fine solves
310          if (sctls%method>1) then ! For multiplicative Schwarz method...:
311             refactor_=.false.
312             call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, &
313                  A_interf_=A_interf_, &
314                  refactor=refactor_) !fine solves
315          endif
316       endif
317    elseif (refactor_.and.sctls%input_type==DCTL_INPUT_TYPE_ASSEMBLED) then!}{
318       if (associated(A%M_bound)) then
319          A_tmp=SpMtx_newInit(nnz=size(A%indi),nblocks=A%nblocks,&
320               nrows=A%nrows,ncols=A%ncols,&
321               indi=A%indi,indj=A%indj,val=A%val,&
322               arrange_type=A%arrange_type,M_bound=A%M_bound)
323       else
324          A_tmp=SpMtx_newInit(nnz=size(A%indi),nblocks=A%nblocks,&
325               nrows=A%nrows,ncols=A%ncols,&
326               indi=A%indi,indj=A%indj,val=A%val,&
327               arrange_type=A%arrange_type)
328       endif
329       A_tmp%nnz=A%nnz
330       A%arrange_type=D_SpMtx_ARRNG_NO
331       call SpMtx_arrange(A,D_SpMtx_ARRNG_ROWS,sort=.true.)
332       call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, &
333            refactor=refactor_) !fine solves
334       if (sctls%method>1) then ! For multiplicative Schwarz method...:
335          refactor_=.false.
336          call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, &
337               refactor=refactor_) !fine solves
338       endif
339       ! put the original structure and orders back:
340       A%indi=A_tmp%indi
341       A%indj=A_tmp%indj
342       A%val=A_tmp%val
343       if (associated(A_tmp%M_bound)) A%M_bound=A_tmp%M_bound
344       A%nrows=A_tmp%nrows
345       A%ncols=A_tmp%ncols
346       A%arrange_type=A_tmp%arrange_type
347       call SpMtx_Destroy(A_tmp)
348    else!}{
349       call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, &
350            A_interf_=A_interf_, &
351            refactor=refactor_) !fine solves
352       if (sctls%method>1) then ! For multiplicative Schwarz method...:
353          refactor_=.false.
354          call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, &
355               A_interf_=A_interf_, &
356               refactor=refactor_) !fine solves
357       endif
358    endif!}
359  end subroutine preconditioner_1level
360
361  !-------------------------------
362  !> Make preconditioner
363  !-------------------------------
364  subroutine preconditioner(sol,A,rhs,M, &
365               A_interf_,CoarseMtx_,Restrict,refactor_,bugtrack_)
366    use subsolvers
367    use CoarseMtx_mod
368    use CoarseAllgathers
369    use Vect_mod
370    implicit none
371    real(kind=rk),dimension(:),pointer :: sol !< solution
372    type(SpMtx)                        :: A   !< sparse system matrix
373    real(kind=rk),dimension(:),pointer :: rhs !< right hand side
374    type(Mesh),intent(in)              :: M   !< Mesh
375    real(kind=rk),dimension(:),pointer :: res !< residual vector, allocated
376                                              !! here for multiplicative Schwarz
377    type(SpMtx),optional               :: A_interf_  !< matr@interf.
378    type(SpMtx),optional               :: CoarseMtx_ !< Coarse matrix
379    type(SpMtx),optional               :: Restrict   !< Restriction matrix
380    logical,intent(inout),optional :: refactor_
381    logical,optional                   :: bugtrack_
382    ! ----- local: ------
383    real(kind=rk),dimension(:),pointer,save :: csol,crhs,tmpsol,tmpsol2,clrhs
384    integer :: i,ol
385    logical :: add,bugtrack
386    real(kind=rk) :: t1
387
388    t1 = MPI_WTime()
389
390    if (present(bugtrack_)) then
391      bugtrack=bugtrack_
392    else
393      bugtrack=.false.
394    endif
395    ! ----------------------------
396    if (sctls%method==0) then
397      sol=rhs
398      return
399    endif
400    sol=0.0_rk
401    if (sctls%method>1) then ! For multiplicative Schwarz method...:
402      allocate(res(size(rhs)))
403    endif
404    if (present(CoarseMtx_)) then !{
405      if (.not.present(Restrict)) call DOUG_abort("Restriction matrix needs to be passed along with the coarse matrix!")
406      if (cdat%active) then
407        ! Not the first iteration, so send the coarse vector here
408        if (.not.(present(refactor_).and.refactor_)) then
409          call SpMtx_Ax(clrhs,Restrict,rhs,dozero=.true.) ! restrict <RA>
410         ! And send
411if (bugtrack)write(stream,*) "local coarse RHS is:",clrhs
412          if (cdat_vec%active) then
413            call AllSendCoarseVector(clrhs,cdat_vec%nprocs,cdat_vec%cdisps,&
414                      cdat_vec%send,useprev=.true.)
415          else
416            call AllSendCoarseVector(clrhs,cdat%nprocs,cdat%cdisps,&
417                      cdat%send,useprev=.true.)
418          endif
419        else ! First iteration - send matrix
420          call AllSendCoarseMtx(cdat%LAC,CoarseMtx_,cdat%lg_cfmap,&
421                                cdat%ngfc,cdat%nprocs,cdat%send)
422        endif
423      endif
424
425      ol=max(sctls%overlap,sctls%smoothers)
426      if (sctls%input_type==DCTL_INPUT_TYPE_ELEMENTAL.or.numprocs>1) then
427          call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, &
428                        A_interf_=A_interf_, &
429                        refactor=refactor_) !fine solves
430      else
431          call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, &
432                        A_interf_=A_interf_,AC=CoarseMtx_, &
433                        refactor=refactor_,Restrict=Restrict) !fine solves
434if (bugtrack)call Print_Glob_Vect(sol,M,'global sol===')
435      endif
436
437      if (sctls%levels>1) then
438        if (ol==0) then
439          add=.false.
440        else
441          add=.true.
442        endif
443        if (present(refactor_).and.refactor_) then !{ setup coarse solve:
444          ! We have a matrix send pending
445          if (cdat%active) then
446            !write(stream,*) "Waiting the Matrix!"
447            call AllRecvCoarseMtx(CoarseMtx_,cdat%send,add=add) ! Recieve it
448            !write(stream,*) "Got Matrix!"
449            allocate(clrhs(Restrict%nrows)) ! allocate memory for vector     
450            call SpMtx_Ax(clrhs,Restrict,rhs,dozero=.true.) ! restrict <RA>
451            ! And send
452if (bugtrack)write(stream,*) "(f) local Coarse RHS is:",clrhs
453            if (cdat_vec%active) then
454              call AllSendCoarseVector(clrhs,cdat_vec%nprocs,&
455                     cdat_vec%cdisps,cdat_vec%send)
456            else
457              call AllSendCoarseVector(clrhs,cdat%nprocs,&
458                     cdat%cdisps,cdat%send)
459            endif
460          endif
461          if (associated(crhs)) then
462            if (size(crhs)/=CoarseMtx_%ncols) then
463              deallocate(csol)
464              deallocate(crhs)
465              allocate(crhs(CoarseMtx_%ncols))
466              allocate(csol(CoarseMtx_%nrows))
467            endif
468          else
469            allocate(crhs(CoarseMtx_%ncols))
470            allocate(csol(CoarseMtx_%nrows))
471          endif
472          if (.not.associated(tmpsol)) then
473            !allocate(tmpsol(A%nrows))
474            allocate(tmpsol(size(rhs)))
475          endif
476
477          if (.not.cdat%active) then
478            if (sctls%smoothers==-1) then
479              allocate(tmpsol2(A%nrows))
480              tmpsol2=0.0_rk
481              call exact_sparse_multismoother(tmpsol2,A,rhs)
482              call SpMtx_Ax(crhs,Restrict,tmpsol2,dozero=.true.) ! restriction
483            else
484              if (sctls%method>1.and.sctls%method/=5) then ! multiplicative Schwarz
485                call SpMtx_Ax(crhs,Restrict,res,dozero=.true.) ! restriction
486              else
487                call SpMtx_Ax(crhs,Restrict,rhs,dozero=.true.) ! restriction
488              endif
489            endif
490          endif
491          !csol=0.0_rk
492          write (stream,*) &
493            'factorising coarse matrix of size',CoarseMtx_%nrows, &
494            ' and nnz:',CoarseMtx_%nnz
495          call free_spmtx_subsolves(CoarseMtx_)
496          allocate(CoarseMtx_%subsolve_ids(1))
497          CoarseMtx_%subsolve_ids=0
498          CoarseMtx_%nsubsolves=1
499
500          !! TODO: force it to do it AFTER factorization, but before solve
501          !if (present(cdat)) then
502          if (cdat%active) then
503            ! Factorise the matrix
504            if (sctls%verbose>2) then
505              write(stream,*)'Global coarse matrix is:---------'
506              do i=1,CoarseMtx_%nnz
507                write(stream,*) CoarseMtx_%indi(i),&
508                CoarseMtx_%indj(i),CoarseMtx_%val(i)
509              enddo
510              write(stream,*)'---------------------------------'
511            endif
512            call factorise(CoarseMtx_%subsolve_ids(1), &
513                 nfreds=CoarseMtx_%nrows,   &
514                 nnz=CoarseMtx_%nnz,        &
515                 indi=CoarseMtx_%indi,      &
516                 indj=CoarseMtx_%indj,      &
517                 val=CoarseMtx_%val)
518            ! Recieve the vector for solve
519            if (cdat_vec%active) then
520              call AllRecvCoarseVector(crhs,cdat_vec%nprocs,&
521                     cdat_vec%cdisps,cdat_vec%glg_cfmap,cdat_vec%send)
522            else
523              call AllRecvCoarseVector(crhs,cdat%nprocs,&
524                     cdat%cdisps,cdat%glg_cfmap,cdat%send)
525            endif
526            !write(stream,*) "Got coarse vector!"
527            !call MPI_BARRIER(MPI_COMM_WORLD,i)
528          endif
529if (bugtrack) then
530 if (isnan(sum(crhs))) then
531  write(stream,*) "(f) global Coarse RHS is:",crhs
532  call DOUG_abort('Got NaN receive fromAllRecvCoarseVector...',838465)
533 endif
534!else
535! write(stream,*) "(f) global Coarse RHS is:",crhs
536endif
537
538
539          call sparse_singlesolve(CoarseMtx_%subsolve_ids(1),csol,crhs, &
540                 nfreds=CoarseMtx_%nrows,   &
541                 nnz=CoarseMtx_%nnz,        &
542                 indi=CoarseMtx_%indi,      &
543                 indj=CoarseMtx_%indj,      &
544                 val=CoarseMtx_%val)
545          write (stream,*) 'coarse factorisation done!',CoarseMtx_%subsolve_ids(1)
546if (bugtrack)write(stream,*) "(f) Coarse SOL is:",csol
547
548          if (cdat_vec%active) then
549            CoarseMtx_%indi=CoarseMtx_%indi+1
550            CoarseMtx_%indj=CoarseMtx_%indj+1
551            call Vect_remap(csol,clrhs,cdat_vec%gl_cfmap,dozero=.true.)
552            call SpMtx_Ax(tmpsol,Restrict,clrhs,dozero=.true.,transp=.true.)
553          elseif (cdat%active) then
554            CoarseMtx_%indi=CoarseMtx_%indi+1
555            CoarseMtx_%indj=CoarseMtx_%indj+1
556            call Vect_remap(csol,clrhs,cdat%gl_cfmap,dozero=.true.)
557            call SpMtx_Ax(tmpsol,Restrict,clrhs,dozero=.true.,transp=.true.)
558          else
559            if (sctls%smoothers==-1) then
560              call SpMtx_Ax(tmpsol2,Restrict,csol,dozero=.true.,transp=.true.) ! interpolation
561              tmpsol=0.0_rk
562              call exact_sparse_multismoother(tmpsol,A,tmpsol2)
563            else
564              call SpMtx_Ax(tmpsol,Restrict,csol,dozero=.true.,transp=.true.) ! interpolation
565            endif
566          endif
567if (bugtrack)call Print_Glob_Vect(tmpsol,M,'tmpsol===',chk_endind=M%ninner)
568        else !}{ apply coarse solve:
569          if (cdat_vec%active) then
570            call AllRecvCoarseVector(crhs,cdat_vec%nprocs,cdat_vec%cdisps,&
571                                          cdat_vec%glg_cfmap,cdat_vec%send)
572          elseif (cdat%active) then
573            call AllRecvCoarseVector(crhs,cdat%nprocs,cdat%cdisps,&
574                                          cdat%glg_cfmap,cdat%send)
575          else
576            if (sctls%smoothers==-1) then
577              tmpsol2=0.0_rk
578              call exact_sparse_multismoother(tmpsol2,A,rhs)
579              call SpMtx_Ax(crhs,Restrict,tmpsol2,dozero=.true.) ! restriction
580            else
581              call SpMtx_Ax(crhs,Restrict,rhs,dozero=.true.) ! restriction
582            endif
583          endif
584          call sparse_singlesolve(CoarseMtx_%subsolve_ids(1),csol,crhs, &
585                 nfreds=CoarseMtx_%nrows)
586          if (cdat_vec%active) then
587            call Vect_remap(csol,clrhs,cdat_vec%gl_cfmap,dozero=.true.)
588            call SpMtx_Ax(tmpsol,Restrict,clrhs,dozero=.true.,transp=.true.)
589          elseif (cdat%active) then
590            call Vect_remap(csol,clrhs,cdat%gl_cfmap,dozero=.true.)
591            call SpMtx_Ax(tmpsol,Restrict,clrhs,dozero=.true.,transp=.true.)
592          else
593            if (sctls%smoothers==-1) then
594              call SpMtx_Ax(tmpsol2,Restrict,csol,dozero=.true.,transp=.true.) ! interpolation
595              tmpsol=0.0_rk
596              call exact_sparse_multismoother(tmpsol,A,tmpsol2)
597            else
598              call SpMtx_Ax(tmpsol,Restrict,csol,dozero=.true.,transp=.true.) ! interpolation
599!              call Vect_print(tmpsol,"csol")
600            endif
601          endif
602        endif !}
603        if (sctls%method==1) then
604!call Print_Glob_Vect(sol,M,'sol===',chk_endind=M%ninner)
605          sol(1:A%nrows)=sol(1:A%nrows)+tmpsol(1:A%nrows)
606        elseif (sctls%method==3) then ! fully multiplicative Schwarz
607          sol(1:A%nrows)=sol(1:A%nrows)+tmpsol(1:A%nrows)
608          ! calculate the residual:
609          call SpMtx_Ax(res,A,sol,dozero=.true.) !
610          res=rhs-res
611        elseif (sctls%method==2.and.ol>0) then ! fully multiplicative Schwarz
612          sol(1:A%nrows)=tmpsol(1:A%nrows)
613          ! calculate the residual:
614          call SpMtx_Ax(res,A,sol,dozero=.true.) !
615          res=rhs-res
616        endif
617      endif
618      if (sctls%method>1) then ! For multiplicative Schwarz method...:
619        refactor_=.false.
620        if (sctls%input_type==DCTL_INPUT_TYPE_ELEMENTAL) then
621            call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, &
622                          A_interf_=A_interf_, &
623                          refactor=refactor_) !fine solves
624        else
625            call sparse_multisolve(sol=sol,A=A,M=M,rhs=rhs,res=res, &
626                          A_interf_=A_interf_,AC=CoarseMtx_, &
627                          refactor=refactor_,Restrict=Restrict) !fine solves
628        endif
629      endif
630      if (((ol==0.and.sctls%method==2).or.sctls%method==5).and.sctls%levels>1) then
631        ! multiplicative on fine level, additive with coarse level:
632        sol(1:A%nrows)=sol(1:A%nrows)+tmpsol(1:A%nrows)
633      endif
634    else !}{
635       call preconditioner_1level(sol,A,rhs,M,res,A_interf_,refactor_)
636    endif !}
637    if (sctls%method>1) then ! For multiplicative Schwarz method...:
638      if (associated(res)) deallocate(res)
639    endif
640
641    time_preconditioner = time_preconditioner + MPI_WTime()-t1
642
643  end subroutine preconditioner
644
645  !--------------------------
646  ! Solve
647  !--------------------------
648  subroutine msolve (m, r, z)
649    implicit none
650    real(kind=rk), dimension(:),     intent(in) :: m, r
651    real(kind=rk), dimension(:), intent(in out) :: z
652    integer :: i
653
654    do i = 1, size(r)
655       z(i) = r(i) !/ m(i)
656    end do
657  end subroutine msolve
658
659  !--------------------------
660  !> Preconditioned conjugent gradient method with eigenvalues
661  !--------------------------
662  subroutine pcg_weigs (A,b,x,Msh,it,cond_num,A_interf_,tol_,maxit_, &
663       x0_,solinf,resvects_,CoarseMtx_,Restrict,refactor_)
664    use CoarseAllgathers
665
666    implicit none
667   
668    type(SpMtx),intent(in out)          :: A !< System matrix (sparse)
669    float(kind=rk),dimension(:),pointer :: b !< right hand side
670    float(kind=rk),dimension(:),pointer :: x !< Solution
671    type(Mesh),intent(in)               :: Msh !< Mesh - aux data for Ax operation
672
673    integer,intent(out) :: it
674    real(kind=rk),intent(out) :: cond_num
675   
676    ! Optional arguments
677    type(SpMtx),intent(in out),optional                :: A_interf_ !< matr@interf.
678    real(kind=rk), intent(in), optional                :: tol_ !< Tolerance of the method
679    integer,       intent(in), optional                :: maxit_ !< Max number of iterations
680    float(kind=rk), dimension(:), intent(in), optional :: x0_ !< Initial guess
681    type(ConvInf),            intent(in out), optional :: solinf !< Solution statistics
682    logical,                      intent(in), optional :: resvects_ !< Fill in the 'resvect' or not
683    type(SpMtx),optional                               :: CoarseMtx_ !< Coarse matrix
684    type(SpMtx),optional                               :: Restrict !< Restriction mtx
685    logical,intent(in),optional                        :: refactor_
686   
687    ! Local variables
688    logical :: refactor
689
690    real(kind=rk) :: tol   ! Tolerance
691    integer       :: maxit ! Max number of iterations
692    logical       :: resvects=.false.
693
694    real(kind=rk),dimension(:),pointer,save :: r, p, q, m, z, b_k
695    real(kind=rk),dimension(:),pointer,save :: ztmp !TODO remove me
696    real(kind=rk) :: rho_curr, rho_prev, init_norm, res_norm
697    real(kind=rk) :: tmp,ratio_norm
698    integer :: nfreds
699    integer :: ierr
700    real(kind=rk),dimension(:),pointer,save :: dd,ee,alpha,beta
701    !logical,parameter :: bugtrack=.true.
702    logical,parameter :: bugtrack=.false.
703    ! profiling
704    real(8) :: time_iterations, t1
705
706    write(stream,'(/a)') 'Preconditioned conjugate gradient:'
707    if (present(refactor_).and.refactor_) then
708      refactor=.true.
709    else
710      refactor=.false.
711    endif
712
713    if (size(b) /= size(x)) &
714         call DOUG_abort('[pcg] : SEVERE : size(b) /= size(x)',-1)
715
716    nfreds=size(b)
717    if (.not.associated(r)) then
718      allocate(r(nfreds))
719      allocate(p(nfreds))
720      allocate(q(nfreds))
721      allocate(m(nfreds))
722      allocate(z(nfreds))
723      allocate(b_k(nfreds))
724    endif
725
726    tol = sctls%solve_tolerance
727    if (present(tol_)) &
728         tol = tol_
729
730    maxit = sctls%solve_maxiters
731    if (maxit==-1) maxit=100000
732    if (present(maxit_)) &
733         maxit = maxit_
734
735    if (present(resvects_).and.(.not.present(solinf))) &
736         call DOUG_abort('[pcg] : SEVERE : "resvects_" must be given along'//&
737         ' with "solinf".',-1)
738
739    ! Allocate convergence info structure
740    if (present(solinf).and.(.not.present(resvects_))) then
741       call ConvInf_Init(solinf)
742    else if (present(solinf).and.&
743         (present(resvects_).and.(resvects_.eqv.(.true.)))) then
744       call ConvInf_Init(solinf, maxit)
745       resvects = .true.
746    end if
747
748
749    ! Initialise auxiliary data structures
750    ! to assist with pmvm
751    call pmvmCommStructs_init(A, Msh)
752
753
754if (bugtrack)call Print_Glob_Vect(x,Msh,'global x===')
755    call SpMtx_pmvm(r,A,x,Msh)
756
757    r = b - r
758    init_norm = Vect_dot_product(r,r)
759    if (init_norm == 0.0) &
760         init_norm = 1.0
761
762
763    write(stream,'(a,i6,a,e8.2)') 'maxit = ',maxit,', tol = ',tol
764    write(stream,'(a,e22.15)') 'init_norm = ', dsqrt(init_norm)
765
766    it = 0
767    ratio_norm = 1.0_rk
768    ! arrays for eigenvalue calculation:
769    if (.not.associated(dd)) then
770      allocate(dd(maxit))
771      allocate(ee(maxit))
772      allocate(alpha(maxit))
773      allocate(beta(maxit))
774    endif
775
776    time_iterations = 0.
777    ! iterations
778    do while((ratio_norm > tol*tol).and.(it < maxit))
779      it = it + 1
780      t1 = MPI_WTime()
781
782if (bugtrack)call Print_Glob_Vect(r,Msh,'global r===',chk_endind=Msh%ninner)
783      call preconditioner(sol=z,          &
784                            A=A,          &
785                          rhs=r,          &
786                            M=Msh,        &
787                    A_interf_=A_interf_,  &
788                   CoarseMtx_=CoarseMtx_, &
789                    Restrict=Restrict,    &
790                    refactor_=refactor,   &
791                    bugtrack_=bugtrack)
792
793      refactor=.false.
794if (bugtrack)call Print_Glob_Vect(z,Msh,'global bef comm z===',chk_endind=Msh%ninner)
795      if (sctls%method/=0) then
796        call Add_common_interf(z,A,Msh)
797      endif
798!call Print_Glob_Vect(z,Msh,'global aft comm z===',chk_endind=Msh%ninner)
799!call Print_Glob_Vect(z,Msh,'global z===')
800      ! compute current rho
801      rho_curr = Vect_dot_product(r,z)
802
803      if (it == 1) then
804         p = z
805      else
806         beta(it) = rho_curr / rho_prev
807         p = z + beta(it) * p
808      end if
809      call SpMtx_pmvm(q,A, p, Msh)
810if (bugtrack)call Print_Glob_Vect(q,Msh,'global q===')
811      ! compute alpha
812      alpha(it) = rho_curr / Vect_dot_product(p,q)
813      x = x + alpha(it) * p
814if (bugtrack)call Print_Glob_Vect(x,Msh,'global x===')
815      r = r - alpha(it) * q
816if (bugtrack)call Print_Glob_Vect(r,Msh,'global r===')
817      rho_prev = rho_curr
818      ! check
819      res_norm = Vect_dot_product(r,r)
820      !write (stream,*) "Norm is", res_norm
821      ratio_norm = res_norm / init_norm
822
823      time_iterations = time_iterations + MPI_WTime()-t1
824
825      if (ismaster()) &
826           write(stream, '(i5,a,e22.15)') it,': res_norm=',dsqrt(res_norm)
827    end do
828
829    if(pstream/=0) then
830       write(pstream, "(I0,':pcg iterations:',I0)") myrank, it
831       write(pstream, "(I0,':iterations time:',F0.3)") myrank, time_iterations
832       write(pstream, "(I0,':preconditioner time:',F0.3)") myrank, time_preconditioner
833    end if
834
835    if (ismaster()) then
836      call CalculateEigenvalues(it,dd,ee,alpha,beta,maxit)
837      cond_num=dd(it)/dd(1)
838      write(stream,'(a,i3,a,e10.4)') '#it:',it,' Cond#: ',cond_num
839    endif
840    !deallocate(beta)
841    !deallocate(alpha)
842    !deallocate(ee)
843    !deallocate(dd)
844    ! Deallocate auxiliary data structures
845    ! helped to assist with pmvm
846 !  call pmvmCommStructs_destroy()
847
848    !deallocate(b_k)
849    !deallocate(z)
850    !deallocate(m)
851    !deallocate(q)
852    !deallocate(p)
853    !deallocate(r)
854  end subroutine pcg_weigs
855
856
857  subroutine CalculateEigenvalues(it,dd,ee,alpha,beta,MaxIt)
858    ! Finds eigenvalues using Lanczos connection
859    implicit none
860    integer :: it,MaxIt
861    real(kind=rk),dimension(:),pointer :: dd,ee,alpha,beta
862    integer :: i,j,ierr
863
864    dd(1) = 1.0_rk/alpha(1)
865    ee(1) = 0.0_rk
866    do i=2,it
867      if (alpha(i)==0.or.beta(i)<0) then
868        print *,'Unable to compute eigenvalue number',i,' and '
869        do j=1,i
870          print *,j,' alpha:',alpha(j),' beta:',beta
871        enddo
872        return
873      else
874        dd(i) = 1.0_rk/alpha(i) + beta(i)/alpha(i-1)
875        ee(i) = -dsqrt(beta(i))/alpha(i-1)
876      endif
877    enddo
878    !call tql1(it,dd,ee,MaxIt,ierr)
879    call tql1(it,dd,ee,ierr)
880    if (ierr > 0) then
881      print *,'Cancelled after EigMaxIt while calculating eig',ierr
882    endif
883  end subroutine CalculateEigenvalues
884
885  subroutine tql1(n,d,e,ierr)
886    implicit none
887    integer :: i,j,l,m,n,ii,l1,l2,mml,ierr
888    real(kind=rk) :: d(n),e(n)
889    real(kind=rk) :: c,c2,c3,dl1,el1,f,g,h,p,r,s,s2,tst1,tst2 !,pythag
890!
891!       this subroutine is a translation of the algol procedure tql1,
892!       num. math. 11, 293-306(1968) by bowdler, martin, reinsch, and
893!       wilkinson.
894!       handbook for auto. comp., vol.ii-linear algebra, 227-240(1971).
895!
896!       this subroutine finds the eigenvalues of a symmetric
897!       tridiagonal matrix by the ql method.
898!
899!       on input
900!          n is the order of the matrix.
901!          d contains the diagonal elements of the input matrix.
902!          e contains the subdiagonal elements of the input matrix
903!            in its last n-1 positions.  e(1) is arbitrary.
904!
905!        on output
906!          d contains the eigenvalues in ascending order.  if an
907!            error exit is made, the eigenvalues are correct and
908!            ordered for indices 1,2,...ierr-1, but may not be
909!            the smallest eigenvalues.
910!          e has been destroyed.
911!          ierr is set to
912!            zero       for normal return,
913!            j          if the j-th eigenvalue has not been
914!                       determined after 30 iterations.
915!       calls pythag for  sqrt(a*a + b*b) .
916!
917!       questions and comments should be directed to burton s. garbow,
918!       mathematics and computer science div, argonne national laboratory
919!
920!       this version dated august 1983.
921!
922!       ------------------------------------------------------------------
923!
924        ierr = 0
925        if (n .eq. 1) go to 1001
926!
927        do 100 i = 2, n
928    100 e(i-1) = e(i)
929!
930        f = 0.0e0
931        tst1 = 0.0e0
932        e(n) = 0.0e0
933
934        do 290 l = 1, n
935           j = 0
936           h = abs(d(l)) + abs(e(l))
937           if (tst1 .lt. h) tst1 = h
938!       .......... look for small sub-diagonal element ..........
939           do 110 m = l, n
940              tst2 = tst1 + abs(e(m))
941              if (tst2 .eq. tst1) go to 120
942!       .......... e(n) is always zero, so there is no exit
943!                  throu     gh the bottom of the loop ..........
944    110    continue
945
946    120    if (m .eq. l)      go to 210
947    130    if (j .eq. 30     ) go to 1000
948           j = j + 1
949!       .......... form shift ..........
950           l1 = l + 1
951           l2 = l1 + 1
952           g = d(l)
953           p = (d(l1) - g) / (2.0e0 * e(l))
954           r = pythag(p,1.0e0_rk)
955           d(l) = e(l) / (p + sign(r,p))
956           d(l1) = e(l) * (p + sign(r,p))
957           dl1 = d(l1)
958           h = g - d(l)
959           if (l2 .gt. n) go to 145
960
961           do 140 i = l2, n
962    140    d(i) = d(i) - h
963
964    145    f = f + h
965!       .......... ql transformation ..........
966           p = d(m)
967           c = 1.0e0
968           c2 = c
969           el1 = e(l1)
970           s = 0.0e0
971           mml = m - l
972!       .......... for i=m-1 step -1 until l do -- ..........
973           do 200 ii = 1, mml
974              c3 = c2
975              c2 = c
976              s2 = s
977              i = m - ii
978              g = c * e(i)
979              h = c * p
980              r = pythag(p,e(i))
981              e(i+1) = s * r
982              s = e(i) / r
983              c = p / r
984              p = c * d(i) - s * g
985              d(i+1) = h + s * (c * g + s * d(i))
986    200    continue
987
988           p = -s * s2 * c3 * el1 * e(l) / dl1
989           e(l) = s * p
990           d(l) = c * p
991           tst2 = tst1 + abs(e(l))
992           if (tst2 .gt. tst1) go to 130
993    210    p = d(l) + f
994!       .......... order eigenvalues ..........
995           if (l .eq. 1) go to 250
996!       .......... for i=l step -1 until 2 do -- ..........
997           do 230 ii = 2, l
998              i = l + 2 - ii
999              if (p .ge. d(i-1)) go to 270
1000              d(i) = d(i-1)
1001    230    continue
1002
1003    250    i = 1
1004    270    d(i) = p
1005    290 continue
1006
1007        go to 1001
1008!       .......... set error -- no convergence to an
1009!                  eigenvalue after 30 iterations ..........
1010   1000 ierr = l
1011   1001 return
1012  end subroutine tql1
1013
1014  function pythag(a,b) result(pyt)
1015    ! finds dsqrt(a**2+b**2) without overflow or destructive underflow
1016    implicit none
1017    real(kind=rk) :: a,b,pyt
1018    real(kind=rk) :: p,r,s,t,u
1019
1020    p = dmax1(dabs(a),dabs(b))
1021    if (p .eq. 0.0d0) go to 20
1022    r = (dmin1(dabs(a),dabs(b))/p)**2
1023 10 continue
1024    t = 4.0d0 + r
1025    if (t .eq. 4.0d0) go to 20
1026    s = r/t
1027    u = 1.0d0 + 2.0d0*s
1028    p = u*p
1029    r = (s/u)**2 * r
1030    go to 10
1031 20 pyt = p
1032    return
1033  end function pythag
1034
1035
1036end module pcg_mod
1037
1038
Note: See TracBrowser for help on using the repository browser.