Actual source code: matptap.c
petsc-3.4.2 2013-07-02
2: /*
3: Defines projective product routines where A is a SeqAIJ matrix
4: C = P^T * A * P
5: */
7: #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
8: #include <../src/mat/utils/freespace.h>
9: #include <petscbt.h>
10: #include <petsctime.h>
14: PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
15: {
19: if (scall == MAT_INITIAL_MATRIX) {
20: PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
21: MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);
22: PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
23: }
24: PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
25: (*(*C)->ops->ptapnumeric)(A,P,*C);
26: PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
27: return(0);
28: }
32: PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
33: {
35: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
36: Mat_PtAP *ptap = a->ptap;
39: PetscFree(ptap->apa);
40: PetscFree(ptap->api);
41: PetscFree(ptap->apj);
42: (ptap->destroy)(A);
43: PetscFree(ptap);
44: return(0);
45: }
49: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(Mat A,Mat P,PetscReal fill,Mat *C)
50: {
51: PetscErrorCode ierr;
52: PetscFreeSpaceList free_space=NULL,current_space=NULL;
53: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
54: PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
55: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj,nspacedouble=0;
56: PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N;
57: PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
58: MatScalar *ca;
59: PetscBT lnkbt;
60: PetscReal afill;
63: /* Get ij structure of P^T */
64: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
65: ptJ = ptj;
67: /* Allocate ci array, arrays for fill computation and */
68: /* free space for accumulating nonzero column info */
69: PetscMalloc((pn+1)*sizeof(PetscInt),&ci);
70: ci[0] = 0;
72: PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);
73: PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));
74: ptasparserow = ptadenserow + an;
76: /* create and initialize a linked list */
77: nlnk = pn+1;
78: PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);
80: /* Set initial free space to be fill*(nnz(A)+ nnz(P)) */
81: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+pi[pm])),&free_space);
82: current_space = free_space;
84: /* Determine symbolic info for each row of C: */
85: for (i=0; i<pn; i++) {
86: ptnzi = pti[i+1] - pti[i];
87: ptanzi = 0;
88: /* Determine symbolic row of PtA: */
89: for (j=0; j<ptnzi; j++) {
90: arow = *ptJ++;
91: anzj = ai[arow+1] - ai[arow];
92: ajj = aj + ai[arow];
93: for (k=0; k<anzj; k++) {
94: if (!ptadenserow[ajj[k]]) {
95: ptadenserow[ajj[k]] = -1;
96: ptasparserow[ptanzi++] = ajj[k];
97: }
98: }
99: }
100: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
101: ptaj = ptasparserow;
102: cnzi = 0;
103: for (j=0; j<ptanzi; j++) {
104: prow = *ptaj++;
105: pnzj = pi[prow+1] - pi[prow];
106: pjj = pj + pi[prow];
107: /* add non-zero cols of P into the sorted linked list lnk */
108: PetscLLAddSorted(pnzj,pjj,pn,nlnk,lnk,lnkbt);
109: cnzi += nlnk;
110: }
112: /* If free space is not available, make more free space */
113: /* Double the amount of total space in the list */
114: if (current_space->local_remaining<cnzi) {
115: PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);
116: nspacedouble++;
117: }
119: /* Copy data into free space, and zero out denserows */
120: PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);
122: current_space->array += cnzi;
123: current_space->local_used += cnzi;
124: current_space->local_remaining -= cnzi;
126: for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
128: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
129: /* For now, we will recompute what is needed. */
130: ci[i+1] = ci[i] + cnzi;
131: }
132: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
133: /* Allocate space for cj, initialize cj, and */
134: /* destroy list of free space and other temporary array(s) */
135: PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);
136: PetscFreeSpaceContiguous(&free_space,cj);
137: PetscFree(ptadenserow);
138: PetscLLDestroy(lnk,lnkbt);
140: /* Allocate space for ca */
141: PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);
142: PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));
144: /* put together the new matrix */
145: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),pn,pn,ci,cj,ca,C);
147: (*C)->rmap->bs = P->cmap->bs;
148: (*C)->cmap->bs = P->cmap->bs;
150: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
151: /* Since these are PETSc arrays, change flags to free them as necessary. */
152: c = (Mat_SeqAIJ*)((*C)->data);
153: c->free_a = PETSC_TRUE;
154: c->free_ij = PETSC_TRUE;
155: c->nonew = 0;
156: (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ_SparseAxpy;
158: /* set MatInfo */
159: afill = (PetscReal)ci[pn]/(ai[am]+pi[pm] + 1.e-5);
160: if (afill < 1.0) afill = 1.0;
161: c->maxnz = ci[pn];
162: c->nz = ci[pn];
163: (*C)->info.mallocs = nspacedouble;
164: (*C)->info.fill_ratio_given = fill;
165: (*C)->info.fill_ratio_needed = afill;
167: /* Clean up. */
168: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
169: #if defined(PETSC_USE_INFO)
170: if (ci[pn] != 0) {
171: PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);
172: PetscInfo1((*C),"Use MatPtAP(A,P,MatReuse,%G,&C) for best performance.\n",afill);
173: } else {
174: PetscInfo((*C),"Empty matrix product\n");
175: }
176: #endif
177: return(0);
178: }
182: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_SparseAxpy(Mat A,Mat P,Mat C)
183: {
185: Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data;
186: Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data;
187: Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data;
188: PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
189: PetscInt *ci=c->i,*cj=c->j,*cjj;
190: PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N;
191: PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
192: MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
195: /* Allocate temporary array for storage of one row of A*P (cn: non-scalable) */
196: PetscMalloc(cn*(sizeof(MatScalar)+sizeof(PetscInt))+c->rmax*sizeof(PetscInt),&apa);
198: apjdense = (PetscInt*)(apa + cn);
199: apj = apjdense + cn;
200: PetscMemzero(apa,cn*(sizeof(MatScalar)+sizeof(PetscInt)));
202: /* Clear old values in C */
203: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
205: for (i=0; i<am; i++) {
206: /* Form sparse row of A*P */
207: anzi = ai[i+1] - ai[i];
208: apnzj = 0;
209: for (j=0; j<anzi; j++) {
210: prow = *aj++;
211: pnzj = pi[prow+1] - pi[prow];
212: pjj = pj + pi[prow];
213: paj = pa + pi[prow];
214: for (k=0; k<pnzj; k++) {
215: if (!apjdense[pjj[k]]) {
216: apjdense[pjj[k]] = -1;
217: apj[apnzj++] = pjj[k];
218: }
219: apa[pjj[k]] += (*aa)*paj[k];
220: }
221: PetscLogFlops(2.0*pnzj);
222: aa++;
223: }
225: /* Sort the j index array for quick sparse axpy. */
226: /* Note: a array does not need sorting as it is in dense storage locations. */
227: PetscSortInt(apnzj,apj);
229: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
230: pnzi = pi[i+1] - pi[i];
231: for (j=0; j<pnzi; j++) {
232: nextap = 0;
233: crow = *pJ++;
234: cjj = cj + ci[crow];
235: caj = ca + ci[crow];
236: /* Perform sparse axpy operation. Note cjj includes apj. */
237: for (k=0; nextap<apnzj; k++) {
238: #if defined(PETSC_USE_DEBUG)
239: if (k >= ci[crow+1] - ci[crow]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"k too large k %d, crow %d",k,crow);
240: #endif
241: if (cjj[k]==apj[nextap]) {
242: caj[k] += (*pA)*apa[apj[nextap++]];
243: }
244: }
245: PetscLogFlops(2.0*apnzj);
246: pA++;
247: }
249: /* Zero the current row info for A*P */
250: for (j=0; j<apnzj; j++) {
251: apa[apj[j]] = 0.;
252: apjdense[apj[j]] = 0;
253: }
254: }
256: /* Assemble the final matrix and clean up */
257: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
258: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
260: PetscFree(apa);
261: return(0);
262: }
266: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
267: {
269: Mat_SeqAIJ *ap,*c;
270: PetscInt *api,*apj,*ci,pn=P->cmap->N;
271: MatScalar *ca;
272: Mat_PtAP *ptap;
273: Mat Pt,AP;
274: PetscBool sparse_axpy=PETSC_TRUE;
277: PetscObjectOptionsBegin((PetscObject)A);
278: /* flag 'sparse_axpy' determines which implementations to be used:
279: 0: do dense axpy in MatPtAPNumeric() - fastest, but requires storage of struct A*P;
280: 1: do two sparse axpy in MatPtAPNumeric() - slowest, does not store structure of A*P. */
281: PetscOptionsBool("-matptap_scalable","Use sparse axpy but slower MatPtAPNumeric()","",sparse_axpy,&sparse_axpy,NULL);
282: PetscOptionsEnd();
283: if (sparse_axpy) {
284: MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A,P,fill,C);
285: return(0);
286: }
288: /* Get symbolic Pt = P^T */
289: MatTransposeSymbolic_SeqAIJ(P,&Pt);
291: /* Get symbolic AP = A*P */
292: MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,P,fill,&AP);
294: ap = (Mat_SeqAIJ*)AP->data;
295: api = ap->i;
296: apj = ap->j;
297: ap->free_ij = PETSC_FALSE; /* api and apj are kept in struct ptap, cannot be destroyed with AP */
299: /* Get C = Pt*AP */
300: MatMatMultSymbolic_SeqAIJ_SeqAIJ(Pt,AP,fill,C);
302: c = (Mat_SeqAIJ*)(*C)->data;
303: ci = c->i;
304: PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);
305: PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));
306: c->a = ca;
307: c->free_a = PETSC_TRUE;
309: /* Create a supporting struct for reuse by MatPtAPNumeric() */
310: PetscNew(Mat_PtAP,&ptap);
312: c->ptap = ptap;
313: ptap->destroy = (*C)->ops->destroy;
314: (*C)->ops->destroy = MatDestroy_SeqAIJ_PtAP;
316: /* Allocate temporary array for storage of one row of A*P */
317: PetscMalloc((pn+1)*sizeof(PetscScalar),&ptap->apa);
318: PetscMemzero(ptap->apa,(pn+1)*sizeof(PetscScalar));
320: (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ;
322: ptap->api = api;
323: ptap->apj = apj;
325: /* Clean up. */
326: MatDestroy(&Pt);
327: MatDestroy(&AP);
328: #if defined(PETSC_USE_INFO)
329: PetscInfo2((*C),"given fill %G, use scalable %d\n",fill,sparse_axpy);
330: #endif
331: return(0);
332: }
334: /* #define PROFILE_MatPtAPNumeric */
337: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
338: {
339: PetscErrorCode ierr;
340: Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data;
341: Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data;
342: Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data;
343: const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*ci=c->i,*cj=c->j;
344: const PetscScalar *aa=a->a,*pa=p->a,*pval;
345: const PetscInt *apj,*pcol,*cjj;
346: const PetscInt am=A->rmap->N,cm=C->rmap->N;
347: PetscInt i,j,k,anz,apnz,pnz,prow,crow,cnz;
348: PetscScalar *apa,*ca=c->a,*caj,pvalj;
349: Mat_PtAP *ptap = c->ptap;
350: #if defined(PROFILE_MatPtAPNumeric)
351: PetscLogDouble t0,tf,time_Cseq0=0.0,time_Cseq1=0.0;
352: PetscInt flops0=0,flops1=0;
353: #endif
356: /* Get temporary array for storage of one row of A*P */
357: apa = ptap->apa;
359: /* Clear old values in C */
360: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
362: for (i=0; i<am; i++) {
363: /* Form sparse row of AP[i,:] = A[i,:]*P */
364: #if defined(PROFILE_MatPtAPNumeric)
365: PetscTime(&t0);
366: #endif
367: anz = ai[i+1] - ai[i];
368: apnz = 0;
369: for (j=0; j<anz; j++) {
370: prow = aj[j];
371: pnz = pi[prow+1] - pi[prow];
372: pcol = pj + pi[prow];
373: pval = pa + pi[prow];
374: for (k=0; k<pnz; k++) {
375: apa[pcol[k]] += aa[j]*pval[k];
376: }
377: PetscLogFlops(2.0*pnz);
378: #if defined(PROFILE_MatPtAPNumeric)
379: flops0 += 2.0*pnz;
380: #endif
381: }
382: aj += anz; aa += anz;
383: #if defined(PROFILE_MatPtAPNumeric)
384: PetscTime(&tf);
386: time_Cseq0 += tf - t0;
387: #endif
389: /* Compute P^T*A*P using outer product P[i,:]^T*AP[i,:]. */
390: #if defined(PROFILE_MatPtAPNumeric)
391: PetscTime(&t0);
392: #endif
393: apj = ptap->apj + ptap->api[i];
394: apnz = ptap->api[i+1] - ptap->api[i];
395: pnz = pi[i+1] - pi[i];
396: pcol = pj + pi[i];
397: pval = pa + pi[i];
399: /* Perform dense axpy */
400: for (j=0; j<pnz; j++) {
401: crow = pcol[j];
402: cjj = cj + ci[crow];
403: caj = ca + ci[crow];
404: pvalj = pval[j];
405: cnz = ci[crow+1] - ci[crow];
406: for (k=0; k<cnz; k++) caj[k] += pvalj*apa[cjj[k]];
407: PetscLogFlops(2.0*cnz);
408: #if defined(PROFILE_MatPtAPNumeric)
409: flops1 += 2.0*cnz;
410: #endif
411: }
412: #if defined(PROFILE_MatPtAPNumeric)
413: PetscTime(&tf);
414: time_Cseq1 += tf - t0;
415: #endif
417: /* Zero the current row info for A*P */
418: for (j=0; j<apnz; j++) apa[apj[j]] = 0.0;
419: }
421: /* Assemble the final matrix and clean up */
422: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
423: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
424: #if defined(PROFILE_MatPtAPNumeric)
425: printf("PtAPNumeric_SeqAIJ time %g + %g, flops %d %d\n",time_Cseq0,time_Cseq1,flops0,flops1);
426: #endif
427: return(0);
428: }