Actual source code: pipecg.c
petsc-3.4.2 2013-07-02
2: #include <petsc-private/kspimpl.h>
4: /*
5: KSPSetUp_PIPECG - Sets up the workspace needed by the PIPECG method.
7: This is called once, usually automatically by KSPSolve() or KSPSetUp()
8: but can be called directly by KSPSetUp()
9: */
12: PetscErrorCode KSPSetUp_PIPECG(KSP ksp)
13: {
17: /* get work vectors needed by PIPECG */
18: KSPSetWorkVecs(ksp,9);
19: return(0);
20: }
22: /*
23: KSPSolve_PIPECG - This routine actually applies the pipelined conjugate gradient method
25: Input Parameter:
26: . ksp - the Krylov space object that was set to use conjugate gradient, by, for
27: example, KSPCreate(MPI_Comm,KSP *ksp); KSPSetType(ksp,KSPCG);
28: */
31: PetscErrorCode KSPSolve_PIPECG(KSP ksp)
32: {
34: PetscInt i;
35: PetscScalar alpha = 0.0,beta = 0.0,gamma = 0.0,gammaold = 0.0,delta = 0.0;
36: PetscReal dp = 0.0;
37: Vec X,B,Z,P,W,Q,U,M,N,R,S;
38: Mat Amat,Pmat;
39: MatStructure pflag;
40: PetscBool diagonalscale;
43: PCGetDiagonalScale(ksp->pc,&diagonalscale);
44: if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);
46: X = ksp->vec_sol;
47: B = ksp->vec_rhs;
48: M = ksp->work[0];
49: Z = ksp->work[1];
50: P = ksp->work[2];
51: N = ksp->work[3];
52: W = ksp->work[4];
53: Q = ksp->work[5];
54: U = ksp->work[6];
55: R = ksp->work[7];
56: S = ksp->work[8];
58: PCGetOperators(ksp->pc,&Amat,&Pmat,&pflag);
60: ksp->its = 0;
61: if (!ksp->guess_zero) {
62: KSP_MatMult(ksp,Amat,X,R); /* r <- b - Ax */
63: VecAYPX(R,-1.0,B);
64: } else {
65: VecCopy(B,R); /* r <- b (x is 0) */
66: }
68: KSP_PCApply(ksp,R,U); /* u <- Br */
70: switch (ksp->normtype) {
71: case KSP_NORM_PRECONDITIONED:
72: VecNormBegin(U,NORM_2,&dp); /* dp <- u'*u = e'*A'*B'*B*A'*e' */
73: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
74: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
75: VecNormEnd(U,NORM_2,&dp);
76: break;
77: case KSP_NORM_UNPRECONDITIONED:
78: VecNormBegin(R,NORM_2,&dp); /* dp <- r'*r = e'*A'*A*e */
79: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
80: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
81: VecNormEnd(R,NORM_2,&dp);
82: break;
83: case KSP_NORM_NATURAL:
84: VecDotBegin(R,U,&gamma); /* gamma <- u'*r */
85: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
86: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
87: VecDotEnd(R,U,&gamma);
88: if (PetscIsInfOrNanScalar(gamma)) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_FP,"Infinite or not-a-number generated in dot product");
89: dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- r'*u = r'*B*r = e'*A'*B*A*e */
90: break;
91: case KSP_NORM_NONE:
92: KSP_MatMult(ksp,Amat,U,W);
93: dp = 0.0;
94: break;
95: default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
96: }
97: KSPLogResidualHistory(ksp,dp);
98: KSPMonitor(ksp,0,dp);
99: ksp->rnorm = dp;
100: (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP); /* test for convergence */
101: if (ksp->reason) return(0);
103: i = 0;
104: do {
105: if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
106: VecNormBegin(R,NORM_2,&dp);
107: } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
108: VecNormBegin(U,NORM_2,&dp);
109: }
110: if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) {
111: VecDotBegin(R,U,&gamma);
112: }
113: VecDotBegin(W,U,&delta);
114: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
116: KSP_PCApply(ksp,W,M); /* m <- Bw */
117: KSP_MatMult(ksp,Amat,M,N); /* n <- Am */
119: if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
120: VecNormEnd(R,NORM_2,&dp);
121: } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
122: VecNormEnd(U,NORM_2,&dp);
123: }
124: if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) {
125: VecDotEnd(R,U,&gamma);
126: }
127: VecDotEnd(W,U,&delta);
129: if (i > 0) {
130: if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
131: else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
133: ksp->rnorm = dp;
134: KSPLogResidualHistory(ksp,dp);
135: KSPMonitor(ksp,i,dp);
136: (*ksp->converged)(ksp,i,dp,&ksp->reason,ksp->cnvP);
137: if (ksp->reason) break;
138: }
140: if (i == 0) {
141: alpha = gamma / delta;
142: VecCopy(N,Z); /* z <- n */
143: VecCopy(M,Q); /* q <- m */
144: VecCopy(U,P); /* p <- u */
145: VecCopy(W,S); /* s <- w */
146: } else {
147: beta = gamma / gammaold;
148: alpha = gamma / (delta - beta / alpha * gamma);
149: VecAYPX(Z,beta,N); /* z <- n + beta * z */
150: VecAYPX(Q,beta,M); /* q <- m + beta * q */
151: VecAYPX(P,beta,U); /* p <- u + beta * p */
152: VecAYPX(S,beta,W); /* s <- w + beta * s */
153: }
154: VecAXPY(X, alpha,P); /* x <- x + alpha * p */
155: VecAXPY(U,-alpha,Q); /* u <- u - alpha * q */
156: VecAXPY(W,-alpha,Z); /* w <- w - alpha * z */
157: VecAXPY(R,-alpha,S); /* r <- r - alpha * s */
158: gammaold = gamma;
159: i++;
160: ksp->its = i;
162: /* if (i%50 == 0) { */
163: /* KSP_MatMult(ksp,Amat,X,R); /\* w <- b - Ax *\/ */
164: /* VecAYPX(R,-1.0,B); */
165: /* KSP_PCApply(ksp,R,U); */
166: /* KSP_MatMult(ksp,Amat,U,W); */
167: /* } */
169: } while (i<ksp->max_it);
170: if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
171: return(0);
172: }
175: /*MC
176: KSPPIPECG - Pipelined conjugate gradient method.
178: There method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard CG. The
179: non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.
181: See also KSPPIPECR, where the reduction is only overlapped with the matrix-vector product.
183: Level:
184: beginner
186: Notes:
187: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
188: See the FAQ on the PETSc website for details.
190: Contributed by:
191: Pieter Ghysels, Universiteit Antwerpen, Intel Exascience lab Flanders
193: Reference:
194: P. Ghysels and W. Vanroose, "Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm",
195: Submitted to Parallel Computing, 2012.
197: .seealso: KSPCreate(), KSPSetType(), KSPPIPECR, KSPGROPPCG, KSPPGMRES, KSPCG, KSPCGUseSingleReduction()
198: M*/
201: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECG(KSP ksp)
202: {
206: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_LEFT,1);
207: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,1);
208: KSPSetSupportedNorm(ksp,KSP_NORM_NATURAL,PC_LEFT,1);
209: KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);
211: ksp->ops->setup = KSPSetUp_PIPECG;
212: ksp->ops->solve = KSPSolve_PIPECG;
213: ksp->ops->destroy = KSPDestroyDefault;
214: ksp->ops->view = 0;
215: ksp->ops->setfromoptions = 0;
216: ksp->ops->buildsolution = KSPBuildSolutionDefault;
217: ksp->ops->buildresidual = KSPBuildResidualDefault;
218: return(0);
219: }