Actual source code: matmatmult.c
petsc-3.4.2 2013-07-02
2: /*
3: Defines matrix-matrix product routines for pairs of SeqAIJ matrices
4: C = A * B
5: */
7: #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
8: #include <../src/mat/utils/freespace.h>
9: #include <../src/mat/utils/petscheap.h>
10: #include <petscbt.h>
11: #include <../src/mat/impls/dense/seq/dense.h>
13: static PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat,Mat,PetscReal,Mat*);
17: PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
18: {
20: PetscBool scalable=PETSC_FALSE,scalable_fast=PETSC_FALSE,heap = PETSC_FALSE,btheap = PETSC_FALSE,llcondensed = PETSC_FALSE;
23: if (scall == MAT_INITIAL_MATRIX) {
24: PetscObjectOptionsBegin((PetscObject)A);
25: PetscOptionsBool("-matmatmult_scalable","Use a scalable but slower C=A*B","",scalable,&scalable,NULL);
26: PetscOptionsBool("-matmatmult_scalable_fast","Use a scalable but slower C=A*B","",scalable_fast,&scalable_fast,NULL);
27: PetscOptionsBool("-matmatmult_heap","Use heap implementation of symbolic factorization C=A*B","",heap,&heap,NULL);
28: PetscOptionsBool("-matmatmult_btheap","Use btheap implementation of symbolic factorization C=A*B","",btheap,&btheap,NULL);
29: PetscOptionsBool("-matmatmult_llcondensed","Use LLCondensed to for symbolic C=A*B","",llcondensed,&llcondensed,NULL);
30: PetscOptionsEnd();
31: PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);
32: if (scalable_fast) {
33: MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A,B,fill,C);
34: } else if (scalable) {
35: MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);
36: } else if (heap) {
37: MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C);
38: } else if (btheap) {
39: MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C);
40: } else if (llcondensed) {
41: MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A,B,fill,C);
42: } else {
43: MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);
44: }
45: PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);
46: }
48: PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);
49: (*(*C)->ops->matmultnumeric)(A,B,*C);
50: PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);
51: return(0);
52: }
56: static PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A,Mat B,PetscReal fill,Mat *C)
57: {
58: PetscErrorCode ierr;
59: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
60: PetscInt *ai=a->i,*bi=b->i,*ci,*cj;
61: PetscInt am =A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
62: PetscReal afill;
63: PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,nlnk_max,*lnk,ndouble=0;
64: PetscBT lnkbt;
65: PetscFreeSpaceList free_space=NULL,current_space=NULL;
68: /* Get ci and cj */
69: /*---------------*/
70: /* Allocate ci array, arrays for fill computation and */
71: /* free space for accumulating nonzero column info */
72: PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);
73: ci[0] = 0;
75: /* create and initialize a linked list */
76: nlnk_max = a->rmax*b->rmax;
77: if (!nlnk_max || nlnk_max > bn) nlnk_max = bn;
78: PetscLLCondensedCreate(nlnk_max,bn,&lnk,&lnkbt);
80: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
81: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);
83: current_space = free_space;
85: /* Determine ci and cj */
86: for (i=0; i<am; i++) {
87: anzi = ai[i+1] - ai[i];
88: aj = a->j + ai[i];
89: for (j=0; j<anzi; j++) {
90: brow = aj[j];
91: bnzj = bi[brow+1] - bi[brow];
92: bj = b->j + bi[brow];
93: /* add non-zero cols of B into the sorted linked list lnk */
94: PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt);
95: }
96: cnzi = lnk[0];
98: /* If free space is not available, make more free space */
99: /* Double the amount of total space in the list */
100: if (current_space->local_remaining<cnzi) {
101: PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);
102: ndouble++;
103: }
105: /* Copy data into free space, then initialize lnk */
106: PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt);
108: current_space->array += cnzi;
109: current_space->local_used += cnzi;
110: current_space->local_remaining -= cnzi;
112: ci[i+1] = ci[i] + cnzi;
113: }
115: /* Column indices are in the list of free space */
116: /* Allocate space for cj, initialize cj, and */
117: /* destroy list of free space and other temporary array(s) */
118: PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);
119: PetscFreeSpaceContiguous(&free_space,cj);
120: PetscLLCondensedDestroy(lnk,lnkbt);
122: /* put together the new symbolic matrix */
123: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);
125: (*C)->rmap->bs = A->rmap->bs;
126: (*C)->cmap->bs = B->cmap->bs;
128: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
129: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
130: c = (Mat_SeqAIJ*)((*C)->data);
131: c->free_a = PETSC_FALSE;
132: c->free_ij = PETSC_TRUE;
133: c->nonew = 0;
134: (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; /* fast, needs non-scalable O(bn) array 'abdense' */
136: /* set MatInfo */
137: afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
138: if (afill < 1.0) afill = 1.0;
139: c->maxnz = ci[am];
140: c->nz = ci[am];
141: (*C)->info.mallocs = ndouble;
142: (*C)->info.fill_ratio_given = fill;
143: (*C)->info.fill_ratio_needed = afill;
145: #if defined(PETSC_USE_INFO)
146: if (ci[am]) {
147: PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);
148: PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);
149: } else {
150: PetscInfo((*C),"Empty matrix product\n");
151: }
152: #endif
153: return(0);
154: }
158: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
159: {
161: PetscLogDouble flops=0.0;
162: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
163: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
164: Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
165: PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
166: PetscInt am =A->rmap->n,cm=C->rmap->n;
167: PetscInt i,j,k,anzi,bnzi,cnzi,brow;
168: PetscScalar *aa=a->a,*ba=b->a,*baj,*ca,valtmp;
169: PetscScalar *ab_dense;
172: /* printf("MatMatMultNumeric_SeqAIJ_SeqAIJ...ca %p\n",c->a); */
173: if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */
174: PetscMalloc((ci[cm]+1)*sizeof(MatScalar),&ca);
175: c->a = ca;
176: c->free_a = PETSC_TRUE;
178: PetscMalloc(B->cmap->N*sizeof(PetscScalar),&ab_dense);
179: PetscMemzero(ab_dense,B->cmap->N*sizeof(PetscScalar));
181: c->matmult_abdense = ab_dense;
182: } else {
183: ca = c->a;
184: ab_dense = c->matmult_abdense;
185: }
187: /* clean old values in C */
188: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
189: /* Traverse A row-wise. */
190: /* Build the ith row in C by summing over nonzero columns in A, */
191: /* the rows of B corresponding to nonzeros of A. */
192: for (i=0; i<am; i++) {
193: anzi = ai[i+1] - ai[i];
194: for (j=0; j<anzi; j++) {
195: brow = aj[j];
196: bnzi = bi[brow+1] - bi[brow];
197: bjj = bj + bi[brow];
198: baj = ba + bi[brow];
199: /* perform dense axpy */
200: valtmp = aa[j];
201: for (k=0; k<bnzi; k++) {
202: ab_dense[bjj[k]] += valtmp*baj[k];
203: }
204: flops += 2*bnzi;
205: }
206: aj += anzi; aa += anzi;
208: cnzi = ci[i+1] - ci[i];
209: for (k=0; k<cnzi; k++) {
210: ca[k] += ab_dense[cj[k]];
211: ab_dense[cj[k]] = 0.0; /* zero ab_dense */
212: }
213: flops += cnzi;
214: cj += cnzi; ca += cnzi;
215: }
216: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
217: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
218: PetscLogFlops(flops);
219: return(0);
220: }
224: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C)
225: {
227: PetscLogDouble flops=0.0;
228: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
229: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
230: Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
231: PetscInt *ai = a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
232: PetscInt am = A->rmap->N,cm=C->rmap->N;
233: PetscInt i,j,k,anzi,bnzi,cnzi,brow;
234: PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp;
235: PetscInt nextb;
238: /* clean old values in C */
239: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
240: /* Traverse A row-wise. */
241: /* Build the ith row in C by summing over nonzero columns in A, */
242: /* the rows of B corresponding to nonzeros of A. */
243: for (i=0; i<am; i++) {
244: anzi = ai[i+1] - ai[i];
245: cnzi = ci[i+1] - ci[i];
246: for (j=0; j<anzi; j++) {
247: brow = aj[j];
248: bnzi = bi[brow+1] - bi[brow];
249: bjj = bj + bi[brow];
250: baj = ba + bi[brow];
251: /* perform sparse axpy */
252: valtmp = aa[j];
253: nextb = 0;
254: for (k=0; nextb<bnzi; k++) {
255: if (cj[k] == bjj[nextb]) { /* ccol == bcol */
256: ca[k] += valtmp*baj[nextb++];
257: }
258: }
259: flops += 2*bnzi;
260: }
261: aj += anzi; aa += anzi;
262: cj += cnzi; ca += cnzi;
263: }
265: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
266: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
267: PetscLogFlops(flops);
268: return(0);
269: }
273: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A,Mat B,PetscReal fill,Mat *C)
274: {
275: PetscErrorCode ierr;
276: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
277: PetscInt *ai = a->i,*bi=b->i,*ci,*cj;
278: PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
279: MatScalar *ca;
280: PetscReal afill;
281: PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,nlnk_max,*lnk,ndouble=0;
282: PetscFreeSpaceList free_space=NULL,current_space=NULL;
285: /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */
286: /*-----------------------------------------------------------------------------------------*/
287: /* Allocate arrays for fill computation and free space for accumulating nonzero column */
288: PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);
289: ci[0] = 0;
291: /* create and initialize a linked list */
292: nlnk_max = a->rmax*b->rmax;
293: if (!nlnk_max || nlnk_max > bn) nlnk_max = bn; /* in case rmax is not defined for A or B */
294: PetscLLCondensedCreate_fast(nlnk_max,&lnk);
296: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
297: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);
298: current_space = free_space;
300: /* Determine ci and cj */
301: for (i=0; i<am; i++) {
302: anzi = ai[i+1] - ai[i];
303: aj = a->j + ai[i];
304: for (j=0; j<anzi; j++) {
305: brow = aj[j];
306: bnzj = bi[brow+1] - bi[brow];
307: bj = b->j + bi[brow];
308: /* add non-zero cols of B into the sorted linked list lnk */
309: PetscLLCondensedAddSorted_fast(bnzj,bj,lnk);
310: }
311: cnzi = lnk[1];
313: /* If free space is not available, make more free space */
314: /* Double the amount of total space in the list */
315: if (current_space->local_remaining<cnzi) {
316: PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);
317: ndouble++;
318: }
320: /* Copy data into free space, then initialize lnk */
321: PetscLLCondensedClean_fast(cnzi,current_space->array,lnk);
323: current_space->array += cnzi;
324: current_space->local_used += cnzi;
325: current_space->local_remaining -= cnzi;
327: ci[i+1] = ci[i] + cnzi;
328: }
330: /* Column indices are in the list of free space */
331: /* Allocate space for cj, initialize cj, and */
332: /* destroy list of free space and other temporary array(s) */
333: PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);
334: PetscFreeSpaceContiguous(&free_space,cj);
335: PetscLLCondensedDestroy_fast(lnk);
337: /* Allocate space for ca */
338: PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);
339: PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));
341: /* put together the new symbolic matrix */
342: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,C);
344: (*C)->rmap->bs = A->rmap->bs;
345: (*C)->cmap->bs = B->cmap->bs;
347: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
348: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
349: c = (Mat_SeqAIJ*)((*C)->data);
350: c->free_a = PETSC_TRUE;
351: c->free_ij = PETSC_TRUE;
352: c->nonew = 0;
354: (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */
356: /* set MatInfo */
357: afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
358: if (afill < 1.0) afill = 1.0;
359: c->maxnz = ci[am];
360: c->nz = ci[am];
361: (*C)->info.mallocs = ndouble;
362: (*C)->info.fill_ratio_given = fill;
363: (*C)->info.fill_ratio_needed = afill;
365: #if defined(PETSC_USE_INFO)
366: if (ci[am]) {
367: PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);
368: PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);
369: } else {
370: PetscInfo((*C),"Empty matrix product\n");
371: }
372: #endif
373: return(0);
374: }
379: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat *C)
380: {
381: PetscErrorCode ierr;
382: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
383: PetscInt *ai = a->i,*bi=b->i,*ci,*cj;
384: PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
385: MatScalar *ca;
386: PetscReal afill;
387: PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,nlnk_max,*lnk,ndouble=0;
388: PetscFreeSpaceList free_space=NULL,current_space=NULL;
391: /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */
392: /*---------------------------------------------------------------------------------------------*/
393: /* Allocate arrays for fill computation and free space for accumulating nonzero column */
394: PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);
395: ci[0] = 0;
397: /* create and initialize a linked list */
398: nlnk_max = a->rmax*b->rmax;
399: if (!nlnk_max || nlnk_max > bn) nlnk_max = bn; /* in case rmax is not defined for A or B */
400: PetscLLCondensedCreate_Scalable(nlnk_max,&lnk);
402: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
403: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);
404: current_space = free_space;
406: /* Determine ci and cj */
407: for (i=0; i<am; i++) {
408: anzi = ai[i+1] - ai[i];
409: aj = a->j + ai[i];
410: for (j=0; j<anzi; j++) {
411: brow = aj[j];
412: bnzj = bi[brow+1] - bi[brow];
413: bj = b->j + bi[brow];
414: /* add non-zero cols of B into the sorted linked list lnk */
415: PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk);
416: }
417: cnzi = lnk[0];
419: /* If free space is not available, make more free space */
420: /* Double the amount of total space in the list */
421: if (current_space->local_remaining<cnzi) {
422: PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);
423: ndouble++;
424: }
426: /* Copy data into free space, then initialize lnk */
427: PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk);
429: current_space->array += cnzi;
430: current_space->local_used += cnzi;
431: current_space->local_remaining -= cnzi;
433: ci[i+1] = ci[i] + cnzi;
434: }
436: /* Column indices are in the list of free space */
437: /* Allocate space for cj, initialize cj, and */
438: /* destroy list of free space and other temporary array(s) */
439: PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);
440: PetscFreeSpaceContiguous(&free_space,cj);
441: PetscLLCondensedDestroy_Scalable(lnk);
443: /* Allocate space for ca */
444: /*-----------------------*/
445: PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);
446: PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));
448: /* put together the new symbolic matrix */
449: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,C);
451: (*C)->rmap->bs = A->rmap->bs;
452: (*C)->cmap->bs = B->cmap->bs;
454: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
455: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
456: c = (Mat_SeqAIJ*)((*C)->data);
457: c->free_a = PETSC_TRUE;
458: c->free_ij = PETSC_TRUE;
459: c->nonew = 0;
461: (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */
463: /* set MatInfo */
464: afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
465: if (afill < 1.0) afill = 1.0;
466: c->maxnz = ci[am];
467: c->nz = ci[am];
468: (*C)->info.mallocs = ndouble;
469: (*C)->info.fill_ratio_given = fill;
470: (*C)->info.fill_ratio_needed = afill;
472: #if defined(PETSC_USE_INFO)
473: if (ci[am]) {
474: PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);
475: PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);
476: } else {
477: PetscInfo((*C),"Empty matrix product\n");
478: }
479: #endif
480: return(0);
481: }
485: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A,Mat B,PetscReal fill,Mat *C)
486: {
487: PetscErrorCode ierr;
488: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
489: const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j;
490: PetscInt *ci,*cj,*bb;
491: PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
492: PetscReal afill;
493: PetscInt i,j,col,ndouble = 0;
494: PetscFreeSpaceList free_space=NULL,current_space=NULL;
495: PetscHeap h;
498: /* Get ci and cj - by merging sorted rows using a heap */
499: /*---------------------------------------------------------------------------------------------*/
500: /* Allocate arrays for fill computation and free space for accumulating nonzero column */
501: PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);
502: ci[0] = 0;
504: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
505: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);
506: current_space = free_space;
508: PetscHeapCreate(a->rmax,&h);
509: PetscMalloc(a->rmax*sizeof(PetscInt),&bb);
511: /* Determine ci and cj */
512: for (i=0; i<am; i++) {
513: const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
514: const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
515: ci[i+1] = ci[i];
516: /* Populate the min heap */
517: for (j=0; j<anzi; j++) {
518: bb[j] = bi[acol[j]]; /* bb points at the start of the row */
519: if (bb[j] < bi[acol[j]+1]) { /* Add if row is nonempty */
520: PetscHeapAdd(h,j,bj[bb[j]++]);
521: }
522: }
523: /* Pick off the min element, adding it to free space */
524: PetscHeapPop(h,&j,&col);
525: while (j >= 0) {
526: if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
527: PetscFreeSpaceGet(PetscMin(2*current_space->total_array_size,16 << 20),¤t_space);
528: ndouble++;
529: }
530: *(current_space->array++) = col;
531: current_space->local_used++;
532: current_space->local_remaining--;
533: ci[i+1]++;
535: /* stash if anything else remains in this row of B */
536: if (bb[j] < bi[acol[j]+1]) {PetscHeapStash(h,j,bj[bb[j]++]);}
537: while (1) { /* pop and stash any other rows of B that also had an entry in this column */
538: PetscInt j2,col2;
539: PetscHeapPeek(h,&j2,&col2);
540: if (col2 != col) break;
541: PetscHeapPop(h,&j2,&col2);
542: if (bb[j2] < bi[acol[j2]+1]) {PetscHeapStash(h,j2,bj[bb[j2]++]);}
543: }
544: /* Put any stashed elements back into the min heap */
545: PetscHeapUnstash(h);
546: PetscHeapPop(h,&j,&col);
547: }
548: }
549: PetscFree(bb);
550: PetscHeapDestroy(&h);
552: /* Column indices are in the list of free space */
553: /* Allocate space for cj, initialize cj, and */
554: /* destroy list of free space and other temporary array(s) */
555: PetscMalloc(ci[am]*sizeof(PetscInt),&cj);
556: PetscFreeSpaceContiguous(&free_space,cj);
558: /* put together the new symbolic matrix */
559: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);
561: (*C)->rmap->bs = A->rmap->bs;
562: (*C)->cmap->bs = B->cmap->bs;
564: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
565: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
566: c = (Mat_SeqAIJ*)((*C)->data);
567: c->free_a = PETSC_TRUE;
568: c->free_ij = PETSC_TRUE;
569: c->nonew = 0;
571: (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
573: /* set MatInfo */
574: afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
575: if (afill < 1.0) afill = 1.0;
576: c->maxnz = ci[am];
577: c->nz = ci[am];
578: (*C)->info.mallocs = ndouble;
579: (*C)->info.fill_ratio_given = fill;
580: (*C)->info.fill_ratio_needed = afill;
582: #if defined(PETSC_USE_INFO)
583: if (ci[am]) {
584: PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);
585: PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);
586: } else {
587: PetscInfo((*C),"Empty matrix product\n");
588: }
589: #endif
590: return(0);
591: }
595: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A,Mat B,PetscReal fill,Mat *C)
596: {
597: PetscErrorCode ierr;
598: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
599: const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j;
600: PetscInt *ci,*cj,*bb;
601: PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
602: PetscReal afill;
603: PetscInt i,j,col,ndouble = 0;
604: PetscFreeSpaceList free_space=NULL,current_space=NULL;
605: PetscHeap h;
606: PetscBT bt;
609: /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */
610: /*---------------------------------------------------------------------------------------------*/
611: /* Allocate arrays for fill computation and free space for accumulating nonzero column */
612: PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);
613: ci[0] = 0;
615: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
616: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);
618: current_space = free_space;
620: PetscHeapCreate(a->rmax,&h);
621: PetscMalloc(a->rmax*sizeof(PetscInt),&bb);
622: PetscBTCreate(bn,&bt);
624: /* Determine ci and cj */
625: for (i=0; i<am; i++) {
626: const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
627: const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
628: const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */
629: ci[i+1] = ci[i];
630: /* Populate the min heap */
631: for (j=0; j<anzi; j++) {
632: PetscInt brow = acol[j];
633: for (bb[j] = bi[brow]; bb[j] < bi[brow+1]; bb[j]++) {
634: PetscInt bcol = bj[bb[j]];
635: if (!PetscBTLookupSet(bt,bcol)) { /* new entry */
636: PetscHeapAdd(h,j,bcol);
637: bb[j]++;
638: break;
639: }
640: }
641: }
642: /* Pick off the min element, adding it to free space */
643: PetscHeapPop(h,&j,&col);
644: while (j >= 0) {
645: if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
646: fptr = NULL; /* need PetscBTMemzero */
647: PetscFreeSpaceGet(PetscMin(2*current_space->total_array_size,16 << 20),¤t_space);
648: ndouble++;
649: }
650: *(current_space->array++) = col;
651: current_space->local_used++;
652: current_space->local_remaining--;
653: ci[i+1]++;
655: /* stash if anything else remains in this row of B */
656: for (; bb[j] < bi[acol[j]+1]; bb[j]++) {
657: PetscInt bcol = bj[bb[j]];
658: if (!PetscBTLookupSet(bt,bcol)) { /* new entry */
659: PetscHeapAdd(h,j,bcol);
660: bb[j]++;
661: break;
662: }
663: }
664: PetscHeapPop(h,&j,&col);
665: }
666: if (fptr) { /* Clear the bits for this row */
667: for (; fptr<current_space->array; fptr++) {PetscBTClear(bt,*fptr);}
668: } else { /* We reallocated so we don't remember (easily) how to clear only the bits we changed */
669: PetscBTMemzero(bn,bt);
670: }
671: }
672: PetscFree(bb);
673: PetscHeapDestroy(&h);
674: PetscBTDestroy(&bt);
676: /* Column indices are in the list of free space */
677: /* Allocate space for cj, initialize cj, and */
678: /* destroy list of free space and other temporary array(s) */
679: PetscMalloc(ci[am]*sizeof(PetscInt),&cj);
680: PetscFreeSpaceContiguous(&free_space,cj);
682: /* put together the new symbolic matrix */
683: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);
685: (*C)->rmap->bs = A->rmap->bs;
686: (*C)->cmap->bs = B->cmap->bs;
688: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
689: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
690: c = (Mat_SeqAIJ*)((*C)->data);
691: c->free_a = PETSC_TRUE;
692: c->free_ij = PETSC_TRUE;
693: c->nonew = 0;
695: (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
697: /* set MatInfo */
698: afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
699: if (afill < 1.0) afill = 1.0;
700: c->maxnz = ci[am];
701: c->nz = ci[am];
702: (*C)->info.mallocs = ndouble;
703: (*C)->info.fill_ratio_given = fill;
704: (*C)->info.fill_ratio_needed = afill;
706: #if defined(PETSC_USE_INFO)
707: if (ci[am]) {
708: PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);
709: PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);
710: } else {
711: PetscInfo((*C),"Empty matrix product\n");
712: }
713: #endif
714: return(0);
715: }
719: /* concatenate unique entries and then sort */
720: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
721: {
722: PetscErrorCode ierr;
723: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
724: const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j;
725: PetscInt *ci,*cj;
726: PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
727: PetscReal afill;
728: PetscInt i,j,ndouble = 0;
729: PetscSegBuffer seg,segrow;
730: char *seen;
733: PetscMalloc((am+1)*sizeof(PetscInt),&ci);
734: ci[0] = 0;
736: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
737: PetscSegBufferCreate(sizeof(PetscInt),(PetscInt)(fill*(ai[am]+bi[bm])),&seg);
738: PetscSegBufferCreate(sizeof(PetscInt),100,&segrow);
739: PetscMalloc(bn*sizeof(char),&seen);
740: PetscMemzero(seen,bn*sizeof(char));
742: /* Determine ci and cj */
743: for (i=0; i<am; i++) {
744: const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
745: const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
746: PetscInt packlen = 0,*PETSC_RESTRICT crow;
747: /* Pack segrow */
748: for (j=0; j<anzi; j++) {
749: PetscInt brow = acol[j],bjstart = bi[brow],bjend = bi[brow+1],k;
750: for (k=bjstart; k<bjend; k++) {
751: PetscInt bcol = bj[k];
752: if (!seen[bcol]) { /* new entry */
753: PetscInt *PETSC_RESTRICT slot;
754: PetscSegBufferGetInts(segrow,1,&slot);
755: *slot = bcol;
756: seen[bcol] = 1;
757: packlen++;
758: }
759: }
760: }
761: PetscSegBufferGetInts(seg,packlen,&crow);
762: PetscSegBufferExtractTo(segrow,crow);
763: PetscSortInt(packlen,crow);
764: ci[i+1] = ci[i] + packlen;
765: for (j=0; j<packlen; j++) seen[crow[j]] = 0;
766: }
767: PetscSegBufferDestroy(&segrow);
768: PetscFree(seen);
770: /* Column indices are in the segmented buffer */
771: PetscSegBufferExtractAlloc(seg,&cj);
772: PetscSegBufferDestroy(&seg);
774: /* put together the new symbolic matrix */
775: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);
777: (*C)->rmap->bs = A->rmap->bs;
778: (*C)->cmap->bs = B->cmap->bs;
780: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
781: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
782: c = (Mat_SeqAIJ*)((*C)->data);
783: c->free_a = PETSC_TRUE;
784: c->free_ij = PETSC_TRUE;
785: c->nonew = 0;
787: (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
789: /* set MatInfo */
790: afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
791: if (afill < 1.0) afill = 1.0;
792: c->maxnz = ci[am];
793: c->nz = ci[am];
794: (*C)->info.mallocs = ndouble;
795: (*C)->info.fill_ratio_given = fill;
796: (*C)->info.fill_ratio_needed = afill;
798: #if defined(PETSC_USE_INFO)
799: if (ci[am]) {
800: PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);
801: PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);
802: } else {
803: PetscInfo((*C),"Empty matrix product\n");
804: }
805: #endif
806: return(0);
807: }
809: /* This routine is not used. Should be removed! */
812: PetscErrorCode MatMatTransposeMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
813: {
817: if (scall == MAT_INITIAL_MATRIX) {
818: PetscLogEventBegin(MAT_MatTransposeMultSymbolic,A,B,0,0);
819: MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);
820: PetscLogEventEnd(MAT_MatTransposeMultSymbolic,A,B,0,0);
821: }
822: PetscLogEventBegin(MAT_MatTransposeMultNumeric,A,B,0,0);
823: MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);
824: PetscLogEventEnd(MAT_MatTransposeMultNumeric,A,B,0,0);
825: return(0);
826: }
830: PetscErrorCode PetscContainerDestroy_Mat_MatMatTransMult(void *ptr)
831: {
832: PetscErrorCode ierr;
833: Mat_MatMatTransMult *multtrans=(Mat_MatMatTransMult*)ptr;
836: MatTransposeColoringDestroy(&multtrans->matcoloring);
837: MatDestroy(&multtrans->Bt_den);
838: MatDestroy(&multtrans->ABt_den);
839: PetscFree(multtrans);
840: return(0);
841: }
845: PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(Mat A)
846: {
847: PetscErrorCode ierr;
848: PetscContainer container;
849: Mat_MatMatTransMult *multtrans=NULL;
852: PetscObjectQuery((PetscObject)A,"Mat_MatMatTransMult",(PetscObject*)&container);
853: if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
854: PetscContainerGetPointer(container,(void**)&multtrans);
856: A->ops->destroy = multtrans->destroy;
857: if (A->ops->destroy) {
858: (*A->ops->destroy)(A);
859: }
860: PetscObjectCompose((PetscObject)A,"Mat_MatMatTransMult",0);
861: return(0);
862: }
866: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
867: {
868: PetscErrorCode ierr;
869: Mat Bt;
870: PetscInt *bti,*btj;
871: Mat_MatMatTransMult *multtrans;
872: PetscContainer container;
875: /* create symbolic Bt */
876: MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);
877: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,NULL,&Bt);
879: Bt->rmap->bs = A->cmap->bs;
880: Bt->cmap->bs = B->cmap->bs;
882: /* get symbolic C=A*Bt */
883: MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);
885: /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */
886: PetscNew(Mat_MatMatTransMult,&multtrans);
888: /* attach the supporting struct to C */
889: PetscContainerCreate(PETSC_COMM_SELF,&container);
890: PetscContainerSetPointer(container,multtrans);
891: PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_MatMatTransMult);
892: PetscObjectCompose((PetscObject)(*C),"Mat_MatMatTransMult",(PetscObject)container);
893: PetscContainerDestroy(&container);
895: multtrans->usecoloring = PETSC_FALSE;
896: multtrans->destroy = (*C)->ops->destroy;
897: (*C)->ops->destroy = MatDestroy_SeqAIJ_MatMatMultTrans;
899: PetscOptionsGetBool(NULL,"-matmattransmult_color",&multtrans->usecoloring,NULL);
900: if (multtrans->usecoloring) {
901: /* Create MatTransposeColoring from symbolic C=A*B^T */
902: MatTransposeColoring matcoloring;
903: ISColoring iscoloring;
904: Mat Bt_dense,C_dense;
906: MatGetColoring(*C,MATCOLORINGLF,&iscoloring);
907: MatTransposeColoringCreate(*C,iscoloring,&matcoloring);
909: multtrans->matcoloring = matcoloring;
911: ISColoringDestroy(&iscoloring);
913: /* Create Bt_dense and C_dense = A*Bt_dense */
914: MatCreate(PETSC_COMM_SELF,&Bt_dense);
915: MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);
916: MatSetType(Bt_dense,MATSEQDENSE);
917: MatSeqDenseSetPreallocation(Bt_dense,NULL);
919: Bt_dense->assembled = PETSC_TRUE;
920: multtrans->Bt_den = Bt_dense;
922: MatCreate(PETSC_COMM_SELF,&C_dense);
923: MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);
924: MatSetType(C_dense,MATSEQDENSE);
925: MatSeqDenseSetPreallocation(C_dense,NULL);
927: Bt_dense->assembled = PETSC_TRUE;
928: multtrans->ABt_den = C_dense;
930: #if defined(PETSC_USE_INFO)
931: {
932: Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*C)->data;
933: PetscInfo5(*C,"Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",A->cmap->n,matcoloring->ncolors,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors));
934: }
935: #endif
936: }
937: /* clean up */
938: MatDestroy(&Bt);
939: MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);
943: #if defined(INEFFICIENT_ALGORITHM)
944: /* The algorithm below computes am*bm sparse inner-product - inefficient! It will be deleted later. */
945: PetscFreeSpaceList free_space=NULL,current_space=NULL;
946: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
947: PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj,*acol,*bcol;
948: PetscInt am =A->rmap->N,bm=B->rmap->N;
949: PetscInt i,j,anzi,bnzj,cnzi,nlnk,*lnk,nspacedouble=0,ka,kb,index[1];
950: MatScalar *ca;
951: PetscBT lnkbt;
952: PetscReal afill;
954: /* Allocate row pointer array ci */
955: PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);
956: ci[0] = 0;
958: /* Create and initialize a linked list for C columns */
959: nlnk = bm+1;
960: PetscLLCreate(bm,bm,nlnk,lnk,lnkbt);
962: /* Initial FreeSpace with size fill*(nnz(A)+nnz(B)) */
963: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);
964: current_space = free_space;
966: /* Determine symbolic info for each row of the product A*B^T: */
967: for (i=0; i<am; i++) {
968: anzi = ai[i+1] - ai[i];
969: cnzi = 0;
970: acol = aj + ai[i];
971: for (j=0; j<bm; j++) {
972: bnzj = bi[j+1] - bi[j];
973: bcol = bj + bi[j];
974: /* sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */
975: ka = 0; kb = 0;
976: while (ka < anzi && kb < bnzj) {
977: while (acol[ka] < bcol[kb] && ka < anzi) ka++;
978: if (ka == anzi) break;
979: while (acol[ka] > bcol[kb] && kb < bnzj) kb++;
980: if (kb == bnzj) break;
981: if (acol[ka] == bcol[kb]) { /* add nonzero c(i,j) to lnk */
982: index[0] = j;
983: PetscLLAdd(1,index,bm,nlnk,lnk,lnkbt);
984: cnzi++;
985: break;
986: }
987: }
988: }
990: /* If free space is not available, make more free space */
991: /* Double the amount of total space in the list */
992: if (current_space->local_remaining<cnzi) {
993: PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);
994: nspacedouble++;
995: }
997: /* Copy data into free space, then initialize lnk */
998: PetscLLClean(bm,bm,cnzi,lnk,current_space->array,lnkbt);
1000: current_space->array += cnzi;
1001: current_space->local_used += cnzi;
1002: current_space->local_remaining -= cnzi;
1004: ci[i+1] = ci[i] + cnzi;
1005: }
1008: /* Column indices are in the list of free space.
1009: Allocate array cj, copy column indices to cj, and destroy list of free space */
1010: PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);
1011: PetscFreeSpaceContiguous(&free_space,cj);
1012: PetscLLDestroy(lnk,lnkbt);
1014: /* Allocate space for ca */
1015: PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);
1016: PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));
1018: /* put together the new symbolic matrix */
1019: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bm,ci,cj,ca,C);
1021: (*C)->rmap->bs = A->cmap->bs;
1022: (*C)->cmap->bs = B->cmap->bs;
1024: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1025: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
1026: c = (Mat_SeqAIJ*)((*C)->data);
1027: c->free_a = PETSC_TRUE;
1028: c->free_ij = PETSC_TRUE;
1029: c->nonew = 0;
1031: /* set MatInfo */
1032: afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
1033: if (afill < 1.0) afill = 1.0;
1034: c->maxnz = ci[am];
1035: c->nz = ci[am];
1036: (*C)->info.mallocs = nspacedouble;
1037: (*C)->info.fill_ratio_given = fill;
1038: (*C)->info.fill_ratio_needed = afill;
1040: #if defined(PETSC_USE_INFO)
1041: if (ci[am]) {
1042: PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);
1043: PetscInfo1((*C),"Use MatMatTransposeMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);
1044: } else {
1045: PetscInfo((*C),"Empty matrix product\n");
1046: }
1047: #endif
1048: #endif
1049: return(0);
1050: }
1052: /* #define USE_ARRAY - for sparse dot product. Slower than !USE_ARRAY */
1055: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
1056: {
1057: PetscErrorCode ierr;
1058: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
1059: PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow;
1060: PetscInt cm =C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol;
1061: PetscLogDouble flops=0.0;
1062: MatScalar *aa =a->a,*aval,*ba=b->a,*bval,*ca,*cval;
1063: Mat_MatMatTransMult *multtrans;
1064: PetscContainer container;
1065: #if defined(USE_ARRAY)
1066: MatScalar *spdot;
1067: #endif
1070: /* clear old values in C */
1071: if (!c->a) {
1072: PetscMalloc((ci[cm]+1)*sizeof(MatScalar),&ca);
1073: c->a = ca;
1074: c->free_a = PETSC_TRUE;
1075: } else {
1076: ca = c->a;
1077: }
1078: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
1080: PetscObjectQuery((PetscObject)C,"Mat_MatMatTransMult",(PetscObject*)&container);
1081: if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
1082: PetscContainerGetPointer(container,(void**)&multtrans);
1083: if (multtrans->usecoloring) {
1084: MatTransposeColoring matcoloring = multtrans->matcoloring;
1085: Mat Bt_dense;
1086: PetscInt m,n;
1087: Mat C_dense = multtrans->ABt_den;
1089: Bt_dense = multtrans->Bt_den;
1090: MatGetLocalSize(Bt_dense,&m,&n);
1092: /* Get Bt_dense by Apply MatTransposeColoring to B */
1093: MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);
1095: /* C_dense = A*Bt_dense */
1096: MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);
1098: /* Recover C from C_dense */
1099: MatTransColoringApplyDenToSp(matcoloring,C_dense,C);
1100: return(0);
1101: }
1103: #if defined(USE_ARRAY)
1104: /* allocate an array for implementing sparse inner-product */
1105: PetscMalloc((A->cmap->n+1)*sizeof(MatScalar),&spdot);
1106: PetscMemzero(spdot,(A->cmap->n+1)*sizeof(MatScalar));
1107: #endif
1109: for (i=0; i<cm; i++) {
1110: anzi = ai[i+1] - ai[i];
1111: acol = aj + ai[i];
1112: aval = aa + ai[i];
1113: cnzi = ci[i+1] - ci[i];
1114: ccol = cj + ci[i];
1115: cval = ca + ci[i];
1116: for (j=0; j<cnzi; j++) {
1117: brow = ccol[j];
1118: bnzj = bi[brow+1] - bi[brow];
1119: bcol = bj + bi[brow];
1120: bval = ba + bi[brow];
1122: /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */
1123: #if defined(USE_ARRAY)
1124: /* put ba to spdot array */
1125: for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = bval[nextb];
1126: /* c(i,j)=A[i,:]*B[j,:]^T */
1127: for (nexta=0; nexta<anzi; nexta++) {
1128: cval[j] += spdot[acol[nexta]]*aval[nexta];
1129: }
1130: /* zero spdot array */
1131: for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = 0.0;
1132: #else
1133: nexta = 0; nextb = 0;
1134: while (nexta<anzi && nextb<bnzj) {
1135: while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++;
1136: if (nexta == anzi) break;
1137: while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++;
1138: if (nextb == bnzj) break;
1139: if (acol[nexta] == bcol[nextb]) {
1140: cval[j] += aval[nexta]*bval[nextb];
1141: nexta++; nextb++;
1142: flops += 2;
1143: }
1144: }
1145: #endif
1146: }
1147: }
1148: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1149: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1150: PetscLogFlops(flops);
1151: #if defined(USE_ARRAY)
1152: PetscFree(spdot);
1153: #endif
1154: return(0);
1155: }
1159: PetscErrorCode MatTransposeMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1160: {
1164: if (scall == MAT_INITIAL_MATRIX) {
1165: MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);
1166: }
1167: MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);
1168: return(0);
1169: }
1173: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
1174: {
1176: Mat At;
1177: PetscInt *ati,*atj;
1180: /* create symbolic At */
1181: MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);
1182: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,NULL,&At);
1184: At->rmap->bs = A->cmap->bs;
1185: At->cmap->bs = B->cmap->bs;
1187: /* get symbolic C=At*B */
1188: MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);
1190: /* clean up */
1191: MatDestroy(&At);
1192: MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);
1193: return(0);
1194: }
1198: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
1199: {
1201: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
1202: PetscInt am =A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
1203: PetscInt cm =C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k;
1204: PetscLogDouble flops=0.0;
1205: MatScalar *aa =a->a,*ba,*ca,*caj;
1208: if (!c->a) {
1209: PetscMalloc((ci[cm]+1)*sizeof(MatScalar),&ca);
1211: c->a = ca;
1212: c->free_a = PETSC_TRUE;
1213: } else {
1214: ca = c->a;
1215: }
1216: /* clear old values in C */
1217: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
1219: /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
1220: for (i=0; i<am; i++) {
1221: bj = b->j + bi[i];
1222: ba = b->a + bi[i];
1223: bnzi = bi[i+1] - bi[i];
1224: anzi = ai[i+1] - ai[i];
1225: for (j=0; j<anzi; j++) {
1226: nextb = 0;
1227: crow = *aj++;
1228: cjj = cj + ci[crow];
1229: caj = ca + ci[crow];
1230: /* perform sparse axpy operation. Note cjj includes bj. */
1231: for (k=0; nextb<bnzi; k++) {
1232: if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
1233: caj[k] += (*aa)*(*(ba+nextb));
1234: nextb++;
1235: }
1236: }
1237: flops += 2*bnzi;
1238: aa++;
1239: }
1240: }
1242: /* Assemble the final matrix and clean up */
1243: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1244: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1245: PetscLogFlops(flops);
1246: return(0);
1247: }
1251: PetscErrorCode MatMatMult_SeqAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1252: {
1256: if (scall == MAT_INITIAL_MATRIX) {
1257: PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);
1258: MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,fill,C);
1259: PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);
1260: }
1261: PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);
1262: MatMatMultNumeric_SeqAIJ_SeqDense(A,B,*C);
1263: PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);
1264: return(0);
1265: }
1269: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C)
1270: {
1274: MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);
1276: (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense;
1277: return(0);
1278: }
1282: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C)
1283: {
1284: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
1286: PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4;
1287: MatScalar *aa;
1288: PetscInt cm = C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n;
1289: PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam;
1292: if (!cm || !cn) return(0);
1293: if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm);
1294: if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n);
1295: if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n);
1296: MatDenseGetArray(B,&b);
1297: MatDenseGetArray(C,&c);
1298: b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm;
1299: for (col=0; col<cn-4; col += 4) { /* over columns of C */
1300: colam = col*am;
1301: for (i=0; i<am; i++) { /* over rows of C in those columns */
1302: r1 = r2 = r3 = r4 = 0.0;
1303: n = a->i[i+1] - a->i[i];
1304: aj = a->j + a->i[i];
1305: aa = a->a + a->i[i];
1306: for (j=0; j<n; j++) {
1307: r1 += (*aa)*b1[*aj];
1308: r2 += (*aa)*b2[*aj];
1309: r3 += (*aa)*b3[*aj];
1310: r4 += (*aa++)*b4[*aj++];
1311: }
1312: c[colam + i] = r1;
1313: c[colam + am + i] = r2;
1314: c[colam + am2 + i] = r3;
1315: c[colam + am3 + i] = r4;
1316: }
1317: b1 += bm4;
1318: b2 += bm4;
1319: b3 += bm4;
1320: b4 += bm4;
1321: }
1322: for (; col<cn; col++) { /* over extra columns of C */
1323: for (i=0; i<am; i++) { /* over rows of C in those columns */
1324: r1 = 0.0;
1325: n = a->i[i+1] - a->i[i];
1326: aj = a->j + a->i[i];
1327: aa = a->a + a->i[i];
1329: for (j=0; j<n; j++) {
1330: r1 += (*aa++)*b1[*aj++];
1331: }
1332: c[col*am + i] = r1;
1333: }
1334: b1 += bm;
1335: }
1336: PetscLogFlops(cn*(2.0*a->nz));
1337: MatDenseRestoreArray(B,&b);
1338: MatDenseRestoreArray(C,&c);
1339: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1340: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1341: return(0);
1342: }
1344: /*
1345: Note very similar to MatMult_SeqAIJ(), should generate both codes from same base
1346: */
1349: PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C)
1350: {
1351: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
1353: PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4;
1354: MatScalar *aa;
1355: PetscInt cm = C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n,*ii,arm;
1356: PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam,*ridx;
1359: if (!cm || !cn) return(0);
1360: MatDenseGetArray(B,&b);
1361: MatDenseGetArray(C,&c);
1362: b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm;
1364: if (a->compressedrow.use) { /* use compressed row format */
1365: for (col=0; col<cn-4; col += 4) { /* over columns of C */
1366: colam = col*am;
1367: arm = a->compressedrow.nrows;
1368: ii = a->compressedrow.i;
1369: ridx = a->compressedrow.rindex;
1370: for (i=0; i<arm; i++) { /* over rows of C in those columns */
1371: r1 = r2 = r3 = r4 = 0.0;
1372: n = ii[i+1] - ii[i];
1373: aj = a->j + ii[i];
1374: aa = a->a + ii[i];
1375: for (j=0; j<n; j++) {
1376: r1 += (*aa)*b1[*aj];
1377: r2 += (*aa)*b2[*aj];
1378: r3 += (*aa)*b3[*aj];
1379: r4 += (*aa++)*b4[*aj++];
1380: }
1381: c[colam + ridx[i]] += r1;
1382: c[colam + am + ridx[i]] += r2;
1383: c[colam + am2 + ridx[i]] += r3;
1384: c[colam + am3 + ridx[i]] += r4;
1385: }
1386: b1 += bm4;
1387: b2 += bm4;
1388: b3 += bm4;
1389: b4 += bm4;
1390: }
1391: for (; col<cn; col++) { /* over extra columns of C */
1392: colam = col*am;
1393: arm = a->compressedrow.nrows;
1394: ii = a->compressedrow.i;
1395: ridx = a->compressedrow.rindex;
1396: for (i=0; i<arm; i++) { /* over rows of C in those columns */
1397: r1 = 0.0;
1398: n = ii[i+1] - ii[i];
1399: aj = a->j + ii[i];
1400: aa = a->a + ii[i];
1402: for (j=0; j<n; j++) {
1403: r1 += (*aa++)*b1[*aj++];
1404: }
1405: c[colam + ridx[i]] += r1;
1406: }
1407: b1 += bm;
1408: }
1409: } else {
1410: for (col=0; col<cn-4; col += 4) { /* over columns of C */
1411: colam = col*am;
1412: for (i=0; i<am; i++) { /* over rows of C in those columns */
1413: r1 = r2 = r3 = r4 = 0.0;
1414: n = a->i[i+1] - a->i[i];
1415: aj = a->j + a->i[i];
1416: aa = a->a + a->i[i];
1417: for (j=0; j<n; j++) {
1418: r1 += (*aa)*b1[*aj];
1419: r2 += (*aa)*b2[*aj];
1420: r3 += (*aa)*b3[*aj];
1421: r4 += (*aa++)*b4[*aj++];
1422: }
1423: c[colam + i] += r1;
1424: c[colam + am + i] += r2;
1425: c[colam + am2 + i] += r3;
1426: c[colam + am3 + i] += r4;
1427: }
1428: b1 += bm4;
1429: b2 += bm4;
1430: b3 += bm4;
1431: b4 += bm4;
1432: }
1433: for (; col<cn; col++) { /* over extra columns of C */
1434: colam = col*am;
1435: for (i=0; i<am; i++) { /* over rows of C in those columns */
1436: r1 = 0.0;
1437: n = a->i[i+1] - a->i[i];
1438: aj = a->j + a->i[i];
1439: aa = a->a + a->i[i];
1441: for (j=0; j<n; j++) {
1442: r1 += (*aa++)*b1[*aj++];
1443: }
1444: c[colam + i] += r1;
1445: }
1446: b1 += bm;
1447: }
1448: }
1449: PetscLogFlops(cn*2.0*a->nz);
1450: MatDenseRestoreArray(B,&b);
1451: MatDenseRestoreArray(C,&c);
1452: return(0);
1453: }
1457: PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense)
1458: {
1460: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
1461: Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data;
1462: PetscInt *bi = b->i,*bj=b->j;
1463: PetscInt m = Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns;
1464: MatScalar *btval,*btval_den,*ba=b->a;
1465: PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors;
1468: btval_den=btdense->v;
1469: PetscMemzero(btval_den,(m*n)*sizeof(MatScalar));
1470: for (k=0; k<ncolors; k++) {
1471: ncolumns = coloring->ncolumns[k];
1472: for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */
1473: col = *(columns + colorforcol[k] + l);
1474: btcol = bj + bi[col];
1475: btval = ba + bi[col];
1476: anz = bi[col+1] - bi[col];
1477: for (j=0; j<anz; j++) {
1478: brow = btcol[j];
1479: btval_den[brow] = btval[j];
1480: }
1481: }
1482: btval_den += m;
1483: }
1484: return(0);
1485: }
1489: PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp)
1490: {
1492: Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data;
1493: PetscInt k,l,*row,*idx,m,ncolors=matcoloring->ncolors,nrows;
1494: PetscScalar *ca_den,*cp_den,*ca=csp->a;
1495: PetscInt *rows=matcoloring->rows,*spidx=matcoloring->columnsforspidx,*colorforrow=matcoloring->colorforrow;
1498: MatGetLocalSize(Csp,&m,NULL);
1499: MatDenseGetArray(Cden,&ca_den);
1500: cp_den = ca_den;
1501: for (k=0; k<ncolors; k++) {
1502: nrows = matcoloring->nrows[k];
1503: row = rows + colorforrow[k];
1504: idx = spidx + colorforrow[k];
1505: for (l=0; l<nrows; l++) {
1506: ca[idx[l]] = cp_den[row[l]];
1507: }
1508: cp_den += m;
1509: }
1510: MatDenseRestoreArray(Cden,&ca_den);
1511: return(0);
1512: }
1514: /*
1515: MatGetColumnIJ_SeqAIJ_Color() and MatRestoreColumnIJ_SeqAIJ_Color() are customized from
1516: MatGetColumnIJ_SeqAIJ() and MatRestoreColumnIJ_SeqAIJ() by adding an output
1517: spidx[], index of a->j, to be used for setting 'columnsforspidx' in MatTransposeColoringCreate_SeqAIJ().
1518: */
1521: PetscErrorCode MatGetColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done)
1522: {
1523: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1525: PetscInt i,*collengths,*cia,*cja,n = A->cmap->n,m = A->rmap->n;
1526: PetscInt nz = a->i[m],row,*jj,mr,col;
1527: PetscInt *cspidx;
1530: *nn = n;
1531: if (!ia) return(0);
1532: if (symmetric) {
1533: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatGetColumnIJ_SeqAIJ_Color() not supported for the case symmetric");
1534: MatToSymmetricIJ_SeqAIJ(A->rmap->n,a->i,a->j,0,oshift,(PetscInt**)ia,(PetscInt**)ja);
1535: } else {
1536: PetscMalloc((n+1)*sizeof(PetscInt),&collengths);
1537: PetscMemzero(collengths,n*sizeof(PetscInt));
1538: PetscMalloc((n+1)*sizeof(PetscInt),&cia);
1539: PetscMalloc((nz+1)*sizeof(PetscInt),&cja);
1540: PetscMalloc((nz+1)*sizeof(PetscInt),&cspidx);
1541: jj = a->j;
1542: for (i=0; i<nz; i++) {
1543: collengths[jj[i]]++;
1544: }
1545: cia[0] = oshift;
1546: for (i=0; i<n; i++) {
1547: cia[i+1] = cia[i] + collengths[i];
1548: }
1549: PetscMemzero(collengths,n*sizeof(PetscInt));
1550: jj = a->j;
1551: for (row=0; row<m; row++) {
1552: mr = a->i[row+1] - a->i[row];
1553: for (i=0; i<mr; i++) {
1554: col = *jj++;
1556: cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
1557: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
1558: }
1559: }
1560: PetscFree(collengths);
1561: *ia = cia; *ja = cja;
1562: *spidx = cspidx;
1563: }
1564: return(0);
1565: }
1569: PetscErrorCode MatRestoreColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done)
1570: {
1574: if (!ia) return(0);
1576: PetscFree(*ia);
1577: PetscFree(*ja);
1578: PetscFree(*spidx);
1579: return(0);
1580: }
1584: PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c)
1585: {
1587: PetscInt i,n,nrows,N,j,k,m,ncols,col,cm;
1588: const PetscInt *is,*ci,*cj,*row_idx;
1589: PetscInt nis = iscoloring->n,*rowhit,bs = 1;
1590: IS *isa;
1591: PetscBool flg1,flg2;
1592: Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data;
1593: PetscInt *colorforrow,*rows,*rows_i,*columnsforspidx,*columnsforspidx_i,*idxhit,*spidx;
1594: PetscInt *colorforcol,*columns,*columns_i;
1597: ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);
1599: /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */
1600: PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);
1601: PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);
1602: if (flg1 || flg2) {
1603: MatGetBlockSize(mat,&bs);
1604: }
1606: N = mat->cmap->N/bs;
1607: c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */
1608: c->N = mat->cmap->N/bs;
1609: c->m = mat->rmap->N/bs;
1610: c->rstart = 0;
1612: c->ncolors = nis;
1613: PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);
1614: PetscMalloc(nis*sizeof(PetscInt),&c->nrows);
1615: PetscMalloc2(csp->nz+1,PetscInt,&rows,csp->nz+1,PetscInt,&columnsforspidx);
1616: PetscMalloc((nis+1)*sizeof(PetscInt),&colorforrow);
1618: colorforrow[0] = 0;
1619: rows_i = rows;
1620: columnsforspidx_i = columnsforspidx;
1622: PetscMalloc((nis+1)*sizeof(PetscInt),&colorforcol);
1623: PetscMalloc((N+1)*sizeof(PetscInt),&columns);
1625: colorforcol[0] = 0;
1626: columns_i = columns;
1628: MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL); /* column-wise storage of mat */
1630: cm = c->m;
1631: PetscMalloc((cm+1)*sizeof(PetscInt),&rowhit);
1632: PetscMalloc((cm+1)*sizeof(PetscInt),&idxhit);
1633: for (i=0; i<nis; i++) {
1634: ISGetLocalSize(isa[i],&n);
1635: ISGetIndices(isa[i],&is);
1637: c->ncolumns[i] = n;
1638: if (n) {
1639: PetscMemcpy(columns_i,is,n*sizeof(PetscInt));
1640: }
1641: colorforcol[i+1] = colorforcol[i] + n;
1642: columns_i += n;
1644: /* fast, crude version requires O(N*N) work */
1645: PetscMemzero(rowhit,cm*sizeof(PetscInt));
1647: /* loop over columns*/
1648: for (j=0; j<n; j++) {
1649: col = is[j];
1650: row_idx = cj + ci[col];
1651: m = ci[col+1] - ci[col];
1652: /* loop over columns marking them in rowhit */
1653: for (k=0; k<m; k++) {
1654: idxhit[*row_idx] = spidx[ci[col] + k];
1655: rowhit[*row_idx++] = col + 1;
1656: }
1657: }
1658: /* count the number of hits */
1659: nrows = 0;
1660: for (j=0; j<cm; j++) {
1661: if (rowhit[j]) nrows++;
1662: }
1663: c->nrows[i] = nrows;
1664: colorforrow[i+1] = colorforrow[i] + nrows;
1666: nrows = 0;
1667: for (j=0; j<cm; j++) {
1668: if (rowhit[j]) {
1669: rows_i[nrows] = j;
1670: columnsforspidx_i[nrows] = idxhit[j];
1671: nrows++;
1672: }
1673: }
1674: ISRestoreIndices(isa[i],&is);
1675: rows_i += nrows; columnsforspidx_i += nrows;
1676: }
1677: MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);
1678: PetscFree(rowhit);
1679: ISColoringRestoreIS(iscoloring,&isa);
1680: #if defined(PETSC_USE_DEBUG)
1681: if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]);
1682: #endif
1684: c->colorforrow = colorforrow;
1685: c->rows = rows;
1686: c->columnsforspidx = columnsforspidx;
1687: c->colorforcol = colorforcol;
1688: c->columns = columns;
1690: PetscFree(idxhit);
1691: return(0);
1692: }