Actual source code: matmatmult.c

petsc-3.4.2 2013-07-02
  2: /*
  3:   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
  4:           C = A * B
  5: */

  7: #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
  8: #include <../src/mat/utils/freespace.h>
  9: #include <../src/mat/utils/petscheap.h>
 10: #include <petscbt.h>
 11: #include <../src/mat/impls/dense/seq/dense.h>

 13: static PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat,Mat,PetscReal,Mat*);

 17: PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
 18: {
 20:   PetscBool      scalable=PETSC_FALSE,scalable_fast=PETSC_FALSE,heap = PETSC_FALSE,btheap = PETSC_FALSE,llcondensed = PETSC_FALSE;

 23:   if (scall == MAT_INITIAL_MATRIX) {
 24:     PetscObjectOptionsBegin((PetscObject)A);
 25:     PetscOptionsBool("-matmatmult_scalable","Use a scalable but slower C=A*B","",scalable,&scalable,NULL);
 26:     PetscOptionsBool("-matmatmult_scalable_fast","Use a scalable but slower C=A*B","",scalable_fast,&scalable_fast,NULL);
 27:     PetscOptionsBool("-matmatmult_heap","Use heap implementation of symbolic factorization C=A*B","",heap,&heap,NULL);
 28:     PetscOptionsBool("-matmatmult_btheap","Use btheap implementation of symbolic factorization C=A*B","",btheap,&btheap,NULL);
 29:     PetscOptionsBool("-matmatmult_llcondensed","Use LLCondensed to for symbolic C=A*B","",llcondensed,&llcondensed,NULL);
 30:     PetscOptionsEnd();
 31:     PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);
 32:     if (scalable_fast) {
 33:       MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A,B,fill,C);
 34:     } else if (scalable) {
 35:       MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);
 36:     } else if (heap) {
 37:       MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C);
 38:     } else if (btheap) {
 39:       MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C);
 40:     } else if (llcondensed) {
 41:       MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A,B,fill,C);
 42:     } else {
 43:       MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);
 44:     }
 45:     PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);
 46:   }

 48:   PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);
 49:   (*(*C)->ops->matmultnumeric)(A,B,*C);
 50:   PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);
 51:   return(0);
 52: }

 56: static PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A,Mat B,PetscReal fill,Mat *C)
 57: {
 58:   PetscErrorCode     ierr;
 59:   Mat_SeqAIJ         *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
 60:   PetscInt           *ai=a->i,*bi=b->i,*ci,*cj;
 61:   PetscInt           am =A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
 62:   PetscReal          afill;
 63:   PetscInt           i,j,anzi,brow,bnzj,cnzi,*bj,*aj,nlnk_max,*lnk,ndouble=0;
 64:   PetscBT            lnkbt;
 65:   PetscFreeSpaceList free_space=NULL,current_space=NULL;

 68:   /* Get ci and cj */
 69:   /*---------------*/
 70:   /* Allocate ci array, arrays for fill computation and */
 71:   /* free space for accumulating nonzero column info */
 72:   PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);
 73:   ci[0] = 0;

 75:   /* create and initialize a linked list */
 76:   nlnk_max = a->rmax*b->rmax;
 77:   if (!nlnk_max || nlnk_max > bn) nlnk_max = bn;
 78:   PetscLLCondensedCreate(nlnk_max,bn,&lnk,&lnkbt);

 80:   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
 81:   PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);

 83:   current_space = free_space;

 85:   /* Determine ci and cj */
 86:   for (i=0; i<am; i++) {
 87:     anzi = ai[i+1] - ai[i];
 88:     aj   = a->j + ai[i];
 89:     for (j=0; j<anzi; j++) {
 90:       brow = aj[j];
 91:       bnzj = bi[brow+1] - bi[brow];
 92:       bj   = b->j + bi[brow];
 93:       /* add non-zero cols of B into the sorted linked list lnk */
 94:       PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt);
 95:     }
 96:     cnzi = lnk[0];

 98:     /* If free space is not available, make more free space */
 99:     /* Double the amount of total space in the list */
100:     if (current_space->local_remaining<cnzi) {
101:       PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);
102:       ndouble++;
103:     }

105:     /* Copy data into free space, then initialize lnk */
106:     PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt);

108:     current_space->array           += cnzi;
109:     current_space->local_used      += cnzi;
110:     current_space->local_remaining -= cnzi;

112:     ci[i+1] = ci[i] + cnzi;
113:   }

115:   /* Column indices are in the list of free space */
116:   /* Allocate space for cj, initialize cj, and */
117:   /* destroy list of free space and other temporary array(s) */
118:   PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);
119:   PetscFreeSpaceContiguous(&free_space,cj);
120:   PetscLLCondensedDestroy(lnk,lnkbt);

122:   /* put together the new symbolic matrix */
123:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);

125:   (*C)->rmap->bs = A->rmap->bs;
126:   (*C)->cmap->bs = B->cmap->bs;

128:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
129:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
130:   c                         = (Mat_SeqAIJ*)((*C)->data);
131:   c->free_a                 = PETSC_FALSE;
132:   c->free_ij                = PETSC_TRUE;
133:   c->nonew                  = 0;
134:   (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; /* fast, needs non-scalable O(bn) array 'abdense' */

136:   /* set MatInfo */
137:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
138:   if (afill < 1.0) afill = 1.0;
139:   c->maxnz                     = ci[am];
140:   c->nz                        = ci[am];
141:   (*C)->info.mallocs           = ndouble;
142:   (*C)->info.fill_ratio_given  = fill;
143:   (*C)->info.fill_ratio_needed = afill;

145: #if defined(PETSC_USE_INFO)
146:   if (ci[am]) {
147:     PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);
148:     PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);
149:   } else {
150:     PetscInfo((*C),"Empty matrix product\n");
151:   }
152: #endif
153:   return(0);
154: }

158: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
159: {
161:   PetscLogDouble flops=0.0;
162:   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)A->data;
163:   Mat_SeqAIJ     *b   = (Mat_SeqAIJ*)B->data;
164:   Mat_SeqAIJ     *c   = (Mat_SeqAIJ*)C->data;
165:   PetscInt       *ai  =a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
166:   PetscInt       am   =A->rmap->n,cm=C->rmap->n;
167:   PetscInt       i,j,k,anzi,bnzi,cnzi,brow;
168:   PetscScalar    *aa=a->a,*ba=b->a,*baj,*ca,valtmp;
169:   PetscScalar    *ab_dense;

172:   /* printf("MatMatMultNumeric_SeqAIJ_SeqAIJ...ca %p\n",c->a); */
173:   if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */
174:     PetscMalloc((ci[cm]+1)*sizeof(MatScalar),&ca);
175:     c->a      = ca;
176:     c->free_a = PETSC_TRUE;

178:     PetscMalloc(B->cmap->N*sizeof(PetscScalar),&ab_dense);
179:     PetscMemzero(ab_dense,B->cmap->N*sizeof(PetscScalar));

181:     c->matmult_abdense = ab_dense;
182:   } else {
183:     ca       = c->a;
184:     ab_dense = c->matmult_abdense;
185:   }

187:   /* clean old values in C */
188:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
189:   /* Traverse A row-wise. */
190:   /* Build the ith row in C by summing over nonzero columns in A, */
191:   /* the rows of B corresponding to nonzeros of A. */
192:   for (i=0; i<am; i++) {
193:     anzi = ai[i+1] - ai[i];
194:     for (j=0; j<anzi; j++) {
195:       brow = aj[j];
196:       bnzi = bi[brow+1] - bi[brow];
197:       bjj  = bj + bi[brow];
198:       baj  = ba + bi[brow];
199:       /* perform dense axpy */
200:       valtmp = aa[j];
201:       for (k=0; k<bnzi; k++) {
202:         ab_dense[bjj[k]] += valtmp*baj[k];
203:       }
204:       flops += 2*bnzi;
205:     }
206:     aj += anzi; aa += anzi;

208:     cnzi = ci[i+1] - ci[i];
209:     for (k=0; k<cnzi; k++) {
210:       ca[k]          += ab_dense[cj[k]];
211:       ab_dense[cj[k]] = 0.0; /* zero ab_dense */
212:     }
213:     flops += cnzi;
214:     cj    += cnzi; ca += cnzi;
215:   }
216:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
217:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
218:   PetscLogFlops(flops);
219:   return(0);
220: }

224: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C)
225: {
227:   PetscLogDouble flops=0.0;
228:   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)A->data;
229:   Mat_SeqAIJ     *b   = (Mat_SeqAIJ*)B->data;
230:   Mat_SeqAIJ     *c   = (Mat_SeqAIJ*)C->data;
231:   PetscInt       *ai  = a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
232:   PetscInt       am   = A->rmap->N,cm=C->rmap->N;
233:   PetscInt       i,j,k,anzi,bnzi,cnzi,brow;
234:   PetscScalar    *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp;
235:   PetscInt       nextb;

238:   /* clean old values in C */
239:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
240:   /* Traverse A row-wise. */
241:   /* Build the ith row in C by summing over nonzero columns in A, */
242:   /* the rows of B corresponding to nonzeros of A. */
243:   for (i=0; i<am; i++) {
244:     anzi = ai[i+1] - ai[i];
245:     cnzi = ci[i+1] - ci[i];
246:     for (j=0; j<anzi; j++) {
247:       brow = aj[j];
248:       bnzi = bi[brow+1] - bi[brow];
249:       bjj  = bj + bi[brow];
250:       baj  = ba + bi[brow];
251:       /* perform sparse axpy */
252:       valtmp = aa[j];
253:       nextb  = 0;
254:       for (k=0; nextb<bnzi; k++) {
255:         if (cj[k] == bjj[nextb]) { /* ccol == bcol */
256:           ca[k] += valtmp*baj[nextb++];
257:         }
258:       }
259:       flops += 2*bnzi;
260:     }
261:     aj += anzi; aa += anzi;
262:     cj += cnzi; ca += cnzi;
263:   }

265:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
266:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
267:   PetscLogFlops(flops);
268:   return(0);
269: }

273: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A,Mat B,PetscReal fill,Mat *C)
274: {
275:   PetscErrorCode     ierr;
276:   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
277:   PetscInt           *ai = a->i,*bi=b->i,*ci,*cj;
278:   PetscInt           am  = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
279:   MatScalar          *ca;
280:   PetscReal          afill;
281:   PetscInt           i,j,anzi,brow,bnzj,cnzi,*bj,*aj,nlnk_max,*lnk,ndouble=0;
282:   PetscFreeSpaceList free_space=NULL,current_space=NULL;

285:   /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */
286:   /*-----------------------------------------------------------------------------------------*/
287:   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
288:   PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);
289:   ci[0] = 0;

291:   /* create and initialize a linked list */
292:   nlnk_max = a->rmax*b->rmax;
293:   if (!nlnk_max || nlnk_max > bn) nlnk_max = bn; /* in case rmax is not defined for A or B */
294:   PetscLLCondensedCreate_fast(nlnk_max,&lnk);

296:   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
297:   PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);
298:   current_space = free_space;

300:   /* Determine ci and cj */
301:   for (i=0; i<am; i++) {
302:     anzi = ai[i+1] - ai[i];
303:     aj   = a->j + ai[i];
304:     for (j=0; j<anzi; j++) {
305:       brow = aj[j];
306:       bnzj = bi[brow+1] - bi[brow];
307:       bj   = b->j + bi[brow];
308:       /* add non-zero cols of B into the sorted linked list lnk */
309:       PetscLLCondensedAddSorted_fast(bnzj,bj,lnk);
310:     }
311:     cnzi = lnk[1];

313:     /* If free space is not available, make more free space */
314:     /* Double the amount of total space in the list */
315:     if (current_space->local_remaining<cnzi) {
316:       PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);
317:       ndouble++;
318:     }

320:     /* Copy data into free space, then initialize lnk */
321:     PetscLLCondensedClean_fast(cnzi,current_space->array,lnk);

323:     current_space->array           += cnzi;
324:     current_space->local_used      += cnzi;
325:     current_space->local_remaining -= cnzi;

327:     ci[i+1] = ci[i] + cnzi;
328:   }

330:   /* Column indices are in the list of free space */
331:   /* Allocate space for cj, initialize cj, and */
332:   /* destroy list of free space and other temporary array(s) */
333:   PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);
334:   PetscFreeSpaceContiguous(&free_space,cj);
335:   PetscLLCondensedDestroy_fast(lnk);

337:   /* Allocate space for ca */
338:   PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);
339:   PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));

341:   /* put together the new symbolic matrix */
342:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,C);

344:   (*C)->rmap->bs = A->rmap->bs;
345:   (*C)->cmap->bs = B->cmap->bs;

347:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
348:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
349:   c          = (Mat_SeqAIJ*)((*C)->data);
350:   c->free_a  = PETSC_TRUE;
351:   c->free_ij = PETSC_TRUE;
352:   c->nonew   = 0;

354:   (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */

356:   /* set MatInfo */
357:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
358:   if (afill < 1.0) afill = 1.0;
359:   c->maxnz                     = ci[am];
360:   c->nz                        = ci[am];
361:   (*C)->info.mallocs           = ndouble;
362:   (*C)->info.fill_ratio_given  = fill;
363:   (*C)->info.fill_ratio_needed = afill;

365: #if defined(PETSC_USE_INFO)
366:   if (ci[am]) {
367:     PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);
368:     PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);
369:   } else {
370:     PetscInfo((*C),"Empty matrix product\n");
371:   }
372: #endif
373:   return(0);
374: }


379: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat *C)
380: {
381:   PetscErrorCode     ierr;
382:   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
383:   PetscInt           *ai = a->i,*bi=b->i,*ci,*cj;
384:   PetscInt           am  = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
385:   MatScalar          *ca;
386:   PetscReal          afill;
387:   PetscInt           i,j,anzi,brow,bnzj,cnzi,*bj,*aj,nlnk_max,*lnk,ndouble=0;
388:   PetscFreeSpaceList free_space=NULL,current_space=NULL;

391:   /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */
392:   /*---------------------------------------------------------------------------------------------*/
393:   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
394:   PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);
395:   ci[0] = 0;

397:   /* create and initialize a linked list */
398:   nlnk_max = a->rmax*b->rmax;
399:   if (!nlnk_max || nlnk_max > bn) nlnk_max = bn; /* in case rmax is not defined for A or B */
400:   PetscLLCondensedCreate_Scalable(nlnk_max,&lnk);

402:   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
403:   PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);
404:   current_space = free_space;

406:   /* Determine ci and cj */
407:   for (i=0; i<am; i++) {
408:     anzi = ai[i+1] - ai[i];
409:     aj   = a->j + ai[i];
410:     for (j=0; j<anzi; j++) {
411:       brow = aj[j];
412:       bnzj = bi[brow+1] - bi[brow];
413:       bj   = b->j + bi[brow];
414:       /* add non-zero cols of B into the sorted linked list lnk */
415:       PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk);
416:     }
417:     cnzi = lnk[0];

419:     /* If free space is not available, make more free space */
420:     /* Double the amount of total space in the list */
421:     if (current_space->local_remaining<cnzi) {
422:       PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);
423:       ndouble++;
424:     }

426:     /* Copy data into free space, then initialize lnk */
427:     PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk);

429:     current_space->array           += cnzi;
430:     current_space->local_used      += cnzi;
431:     current_space->local_remaining -= cnzi;

433:     ci[i+1] = ci[i] + cnzi;
434:   }

436:   /* Column indices are in the list of free space */
437:   /* Allocate space for cj, initialize cj, and */
438:   /* destroy list of free space and other temporary array(s) */
439:   PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);
440:   PetscFreeSpaceContiguous(&free_space,cj);
441:   PetscLLCondensedDestroy_Scalable(lnk);

443:   /* Allocate space for ca */
444:   /*-----------------------*/
445:   PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);
446:   PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));

448:   /* put together the new symbolic matrix */
449:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,C);

451:   (*C)->rmap->bs = A->rmap->bs;
452:   (*C)->cmap->bs = B->cmap->bs;

454:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
455:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
456:   c          = (Mat_SeqAIJ*)((*C)->data);
457:   c->free_a  = PETSC_TRUE;
458:   c->free_ij = PETSC_TRUE;
459:   c->nonew   = 0;

461:   (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */

463:   /* set MatInfo */
464:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
465:   if (afill < 1.0) afill = 1.0;
466:   c->maxnz                     = ci[am];
467:   c->nz                        = ci[am];
468:   (*C)->info.mallocs           = ndouble;
469:   (*C)->info.fill_ratio_given  = fill;
470:   (*C)->info.fill_ratio_needed = afill;

472: #if defined(PETSC_USE_INFO)
473:   if (ci[am]) {
474:     PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);
475:     PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);
476:   } else {
477:     PetscInfo((*C),"Empty matrix product\n");
478:   }
479: #endif
480:   return(0);
481: }

485: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A,Mat B,PetscReal fill,Mat *C)
486: {
487:   PetscErrorCode     ierr;
488:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
489:   const PetscInt     *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j;
490:   PetscInt           *ci,*cj,*bb;
491:   PetscInt           am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
492:   PetscReal          afill;
493:   PetscInt           i,j,col,ndouble = 0;
494:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
495:   PetscHeap          h;

498:   /* Get ci and cj - by merging sorted rows using a heap */
499:   /*---------------------------------------------------------------------------------------------*/
500:   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
501:   PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);
502:   ci[0] = 0;

504:   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
505:   PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);
506:   current_space = free_space;

508:   PetscHeapCreate(a->rmax,&h);
509:   PetscMalloc(a->rmax*sizeof(PetscInt),&bb);

511:   /* Determine ci and cj */
512:   for (i=0; i<am; i++) {
513:     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
514:     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
515:     ci[i+1] = ci[i];
516:     /* Populate the min heap */
517:     for (j=0; j<anzi; j++) {
518:       bb[j] = bi[acol[j]];         /* bb points at the start of the row */
519:       if (bb[j] < bi[acol[j]+1]) { /* Add if row is nonempty */
520:         PetscHeapAdd(h,j,bj[bb[j]++]);
521:       }
522:     }
523:     /* Pick off the min element, adding it to free space */
524:     PetscHeapPop(h,&j,&col);
525:     while (j >= 0) {
526:       if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
527:         PetscFreeSpaceGet(PetscMin(2*current_space->total_array_size,16 << 20),&current_space);
528:         ndouble++;
529:       }
530:       *(current_space->array++) = col;
531:       current_space->local_used++;
532:       current_space->local_remaining--;
533:       ci[i+1]++;

535:       /* stash if anything else remains in this row of B */
536:       if (bb[j] < bi[acol[j]+1]) {PetscHeapStash(h,j,bj[bb[j]++]);}
537:       while (1) {               /* pop and stash any other rows of B that also had an entry in this column */
538:         PetscInt j2,col2;
539:         PetscHeapPeek(h,&j2,&col2);
540:         if (col2 != col) break;
541:         PetscHeapPop(h,&j2,&col2);
542:         if (bb[j2] < bi[acol[j2]+1]) {PetscHeapStash(h,j2,bj[bb[j2]++]);}
543:       }
544:       /* Put any stashed elements back into the min heap */
545:       PetscHeapUnstash(h);
546:       PetscHeapPop(h,&j,&col);
547:     }
548:   }
549:   PetscFree(bb);
550:   PetscHeapDestroy(&h);

552:   /* Column indices are in the list of free space */
553:   /* Allocate space for cj, initialize cj, and */
554:   /* destroy list of free space and other temporary array(s) */
555:   PetscMalloc(ci[am]*sizeof(PetscInt),&cj);
556:   PetscFreeSpaceContiguous(&free_space,cj);

558:   /* put together the new symbolic matrix */
559:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);

561:   (*C)->rmap->bs = A->rmap->bs;
562:   (*C)->cmap->bs = B->cmap->bs;

564:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
565:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
566:   c          = (Mat_SeqAIJ*)((*C)->data);
567:   c->free_a  = PETSC_TRUE;
568:   c->free_ij = PETSC_TRUE;
569:   c->nonew   = 0;

571:   (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;

573:   /* set MatInfo */
574:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
575:   if (afill < 1.0) afill = 1.0;
576:   c->maxnz                     = ci[am];
577:   c->nz                        = ci[am];
578:   (*C)->info.mallocs           = ndouble;
579:   (*C)->info.fill_ratio_given  = fill;
580:   (*C)->info.fill_ratio_needed = afill;

582: #if defined(PETSC_USE_INFO)
583:   if (ci[am]) {
584:     PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);
585:     PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);
586:   } else {
587:     PetscInfo((*C),"Empty matrix product\n");
588:   }
589: #endif
590:   return(0);
591: }

595: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A,Mat B,PetscReal fill,Mat *C)
596: {
597:   PetscErrorCode     ierr;
598:   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
599:   const PetscInt     *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j;
600:   PetscInt           *ci,*cj,*bb;
601:   PetscInt           am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
602:   PetscReal          afill;
603:   PetscInt           i,j,col,ndouble = 0;
604:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
605:   PetscHeap          h;
606:   PetscBT            bt;

609:   /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */
610:   /*---------------------------------------------------------------------------------------------*/
611:   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
612:   PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);
613:   ci[0] = 0;

615:   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
616:   PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);

618:   current_space = free_space;

620:   PetscHeapCreate(a->rmax,&h);
621:   PetscMalloc(a->rmax*sizeof(PetscInt),&bb);
622:   PetscBTCreate(bn,&bt);

624:   /* Determine ci and cj */
625:   for (i=0; i<am; i++) {
626:     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
627:     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
628:     const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */
629:     ci[i+1] = ci[i];
630:     /* Populate the min heap */
631:     for (j=0; j<anzi; j++) {
632:       PetscInt brow = acol[j];
633:       for (bb[j] = bi[brow]; bb[j] < bi[brow+1]; bb[j]++) {
634:         PetscInt bcol = bj[bb[j]];
635:         if (!PetscBTLookupSet(bt,bcol)) { /* new entry */
636:           PetscHeapAdd(h,j,bcol);
637:           bb[j]++;
638:           break;
639:         }
640:       }
641:     }
642:     /* Pick off the min element, adding it to free space */
643:     PetscHeapPop(h,&j,&col);
644:     while (j >= 0) {
645:       if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
646:         fptr = NULL;                      /* need PetscBTMemzero */
647:         PetscFreeSpaceGet(PetscMin(2*current_space->total_array_size,16 << 20),&current_space);
648:         ndouble++;
649:       }
650:       *(current_space->array++) = col;
651:       current_space->local_used++;
652:       current_space->local_remaining--;
653:       ci[i+1]++;

655:       /* stash if anything else remains in this row of B */
656:       for (; bb[j] < bi[acol[j]+1]; bb[j]++) {
657:         PetscInt bcol = bj[bb[j]];
658:         if (!PetscBTLookupSet(bt,bcol)) { /* new entry */
659:           PetscHeapAdd(h,j,bcol);
660:           bb[j]++;
661:           break;
662:         }
663:       }
664:       PetscHeapPop(h,&j,&col);
665:     }
666:     if (fptr) {                 /* Clear the bits for this row */
667:       for (; fptr<current_space->array; fptr++) {PetscBTClear(bt,*fptr);}
668:     } else {                    /* We reallocated so we don't remember (easily) how to clear only the bits we changed */
669:       PetscBTMemzero(bn,bt);
670:     }
671:   }
672:   PetscFree(bb);
673:   PetscHeapDestroy(&h);
674:   PetscBTDestroy(&bt);

676:   /* Column indices are in the list of free space */
677:   /* Allocate space for cj, initialize cj, and */
678:   /* destroy list of free space and other temporary array(s) */
679:   PetscMalloc(ci[am]*sizeof(PetscInt),&cj);
680:   PetscFreeSpaceContiguous(&free_space,cj);

682:   /* put together the new symbolic matrix */
683:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);

685:   (*C)->rmap->bs = A->rmap->bs;
686:   (*C)->cmap->bs = B->cmap->bs;

688:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
689:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
690:   c          = (Mat_SeqAIJ*)((*C)->data);
691:   c->free_a  = PETSC_TRUE;
692:   c->free_ij = PETSC_TRUE;
693:   c->nonew   = 0;

695:   (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;

697:   /* set MatInfo */
698:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
699:   if (afill < 1.0) afill = 1.0;
700:   c->maxnz                     = ci[am];
701:   c->nz                        = ci[am];
702:   (*C)->info.mallocs           = ndouble;
703:   (*C)->info.fill_ratio_given  = fill;
704:   (*C)->info.fill_ratio_needed = afill;

706: #if defined(PETSC_USE_INFO)
707:   if (ci[am]) {
708:     PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);
709:     PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);
710:   } else {
711:     PetscInfo((*C),"Empty matrix product\n");
712:   }
713: #endif
714:   return(0);
715: }

719: /* concatenate unique entries and then sort */
720: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
721: {
722:   PetscErrorCode     ierr;
723:   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
724:   const PetscInt     *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j;
725:   PetscInt           *ci,*cj;
726:   PetscInt           am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
727:   PetscReal          afill;
728:   PetscInt           i,j,ndouble = 0;
729:   PetscSegBuffer     seg,segrow;
730:   char               *seen;

733:   PetscMalloc((am+1)*sizeof(PetscInt),&ci);
734:   ci[0] = 0;

736:   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
737:   PetscSegBufferCreate(sizeof(PetscInt),(PetscInt)(fill*(ai[am]+bi[bm])),&seg);
738:   PetscSegBufferCreate(sizeof(PetscInt),100,&segrow);
739:   PetscMalloc(bn*sizeof(char),&seen);
740:   PetscMemzero(seen,bn*sizeof(char));

742:   /* Determine ci and cj */
743:   for (i=0; i<am; i++) {
744:     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
745:     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
746:     PetscInt packlen = 0,*PETSC_RESTRICT crow;
747:     /* Pack segrow */
748:     for (j=0; j<anzi; j++) {
749:       PetscInt brow = acol[j],bjstart = bi[brow],bjend = bi[brow+1],k;
750:       for (k=bjstart; k<bjend; k++) {
751:         PetscInt bcol = bj[k];
752:         if (!seen[bcol]) { /* new entry */
753:           PetscInt *PETSC_RESTRICT slot;
754:           PetscSegBufferGetInts(segrow,1,&slot);
755:           *slot = bcol;
756:           seen[bcol] = 1;
757:           packlen++;
758:         }
759:       }
760:     }
761:     PetscSegBufferGetInts(seg,packlen,&crow);
762:     PetscSegBufferExtractTo(segrow,crow);
763:     PetscSortInt(packlen,crow);
764:     ci[i+1] = ci[i] + packlen;
765:     for (j=0; j<packlen; j++) seen[crow[j]] = 0;
766:   }
767:   PetscSegBufferDestroy(&segrow);
768:   PetscFree(seen);

770:   /* Column indices are in the segmented buffer */
771:   PetscSegBufferExtractAlloc(seg,&cj);
772:   PetscSegBufferDestroy(&seg);

774:   /* put together the new symbolic matrix */
775:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);

777:   (*C)->rmap->bs = A->rmap->bs;
778:   (*C)->cmap->bs = B->cmap->bs;

780:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
781:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
782:   c          = (Mat_SeqAIJ*)((*C)->data);
783:   c->free_a  = PETSC_TRUE;
784:   c->free_ij = PETSC_TRUE;
785:   c->nonew   = 0;

787:   (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;

789:   /* set MatInfo */
790:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
791:   if (afill < 1.0) afill = 1.0;
792:   c->maxnz                     = ci[am];
793:   c->nz                        = ci[am];
794:   (*C)->info.mallocs           = ndouble;
795:   (*C)->info.fill_ratio_given  = fill;
796:   (*C)->info.fill_ratio_needed = afill;

798: #if defined(PETSC_USE_INFO)
799:   if (ci[am]) {
800:     PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);
801:     PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);
802:   } else {
803:     PetscInfo((*C),"Empty matrix product\n");
804:   }
805: #endif
806:   return(0);
807: }

809: /* This routine is not used. Should be removed! */
812: PetscErrorCode MatMatTransposeMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
813: {

817:   if (scall == MAT_INITIAL_MATRIX) {
818:     PetscLogEventBegin(MAT_MatTransposeMultSymbolic,A,B,0,0);
819:     MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);
820:     PetscLogEventEnd(MAT_MatTransposeMultSymbolic,A,B,0,0);
821:   }
822:   PetscLogEventBegin(MAT_MatTransposeMultNumeric,A,B,0,0);
823:   MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);
824:   PetscLogEventEnd(MAT_MatTransposeMultNumeric,A,B,0,0);
825:   return(0);
826: }

830: PetscErrorCode PetscContainerDestroy_Mat_MatMatTransMult(void *ptr)
831: {
832:   PetscErrorCode      ierr;
833:   Mat_MatMatTransMult *multtrans=(Mat_MatMatTransMult*)ptr;

836:   MatTransposeColoringDestroy(&multtrans->matcoloring);
837:   MatDestroy(&multtrans->Bt_den);
838:   MatDestroy(&multtrans->ABt_den);
839:   PetscFree(multtrans);
840:   return(0);
841: }

845: PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(Mat A)
846: {
847:   PetscErrorCode      ierr;
848:   PetscContainer      container;
849:   Mat_MatMatTransMult *multtrans=NULL;

852:   PetscObjectQuery((PetscObject)A,"Mat_MatMatTransMult",(PetscObject*)&container);
853:   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
854:   PetscContainerGetPointer(container,(void**)&multtrans);

856:   A->ops->destroy = multtrans->destroy;
857:   if (A->ops->destroy) {
858:     (*A->ops->destroy)(A);
859:   }
860:   PetscObjectCompose((PetscObject)A,"Mat_MatMatTransMult",0);
861:   return(0);
862: }

866: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
867: {
868:   PetscErrorCode      ierr;
869:   Mat                 Bt;
870:   PetscInt            *bti,*btj;
871:   Mat_MatMatTransMult *multtrans;
872:   PetscContainer      container;

875:   /* create symbolic Bt */
876:   MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);
877:   MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,NULL,&Bt);

879:   Bt->rmap->bs = A->cmap->bs;
880:   Bt->cmap->bs = B->cmap->bs;

882:   /* get symbolic C=A*Bt */
883:   MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);

885:   /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */
886:   PetscNew(Mat_MatMatTransMult,&multtrans);

888:   /* attach the supporting struct to C */
889:   PetscContainerCreate(PETSC_COMM_SELF,&container);
890:   PetscContainerSetPointer(container,multtrans);
891:   PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_MatMatTransMult);
892:   PetscObjectCompose((PetscObject)(*C),"Mat_MatMatTransMult",(PetscObject)container);
893:   PetscContainerDestroy(&container);

895:   multtrans->usecoloring = PETSC_FALSE;
896:   multtrans->destroy     = (*C)->ops->destroy;
897:   (*C)->ops->destroy     = MatDestroy_SeqAIJ_MatMatMultTrans;

899:   PetscOptionsGetBool(NULL,"-matmattransmult_color",&multtrans->usecoloring,NULL);
900:   if (multtrans->usecoloring) {
901:     /* Create MatTransposeColoring from symbolic C=A*B^T */
902:     MatTransposeColoring matcoloring;
903:     ISColoring           iscoloring;
904:     Mat                  Bt_dense,C_dense;

906:     MatGetColoring(*C,MATCOLORINGLF,&iscoloring);
907:     MatTransposeColoringCreate(*C,iscoloring,&matcoloring);

909:     multtrans->matcoloring = matcoloring;

911:     ISColoringDestroy(&iscoloring);

913:     /* Create Bt_dense and C_dense = A*Bt_dense */
914:     MatCreate(PETSC_COMM_SELF,&Bt_dense);
915:     MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);
916:     MatSetType(Bt_dense,MATSEQDENSE);
917:     MatSeqDenseSetPreallocation(Bt_dense,NULL);

919:     Bt_dense->assembled = PETSC_TRUE;
920:     multtrans->Bt_den   = Bt_dense;

922:     MatCreate(PETSC_COMM_SELF,&C_dense);
923:     MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);
924:     MatSetType(C_dense,MATSEQDENSE);
925:     MatSeqDenseSetPreallocation(C_dense,NULL);

927:     Bt_dense->assembled = PETSC_TRUE;
928:     multtrans->ABt_den  = C_dense;

930: #if defined(PETSC_USE_INFO)
931:     {
932:       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*C)->data;
933:       PetscInfo5(*C,"Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",A->cmap->n,matcoloring->ncolors,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors));
934:     }
935: #endif
936:   }
937:   /* clean up */
938:   MatDestroy(&Bt);
939:   MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);



943: #if defined(INEFFICIENT_ALGORITHM)
944:   /* The algorithm below computes am*bm sparse inner-product - inefficient! It will be deleted later. */
945:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
946:   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
947:   PetscInt           *ai       =a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj,*acol,*bcol;
948:   PetscInt           am        =A->rmap->N,bm=B->rmap->N;
949:   PetscInt           i,j,anzi,bnzj,cnzi,nlnk,*lnk,nspacedouble=0,ka,kb,index[1];
950:   MatScalar          *ca;
951:   PetscBT            lnkbt;
952:   PetscReal          afill;

954:   /* Allocate row pointer array ci  */
955:   PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);
956:   ci[0] = 0;

958:   /* Create and initialize a linked list for C columns */
959:   nlnk = bm+1;
960:   PetscLLCreate(bm,bm,nlnk,lnk,lnkbt);

962:   /* Initial FreeSpace with size fill*(nnz(A)+nnz(B)) */
963:   PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);
964:   current_space = free_space;

966:   /* Determine symbolic info for each row of the product A*B^T: */
967:   for (i=0; i<am; i++) {
968:     anzi = ai[i+1] - ai[i];
969:     cnzi = 0;
970:     acol = aj + ai[i];
971:     for (j=0; j<bm; j++) {
972:       bnzj = bi[j+1] - bi[j];
973:       bcol = bj + bi[j];
974:       /* sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */
975:       ka = 0; kb = 0;
976:       while (ka < anzi && kb < bnzj) {
977:         while (acol[ka] < bcol[kb] && ka < anzi) ka++;
978:         if (ka == anzi) break;
979:         while (acol[ka] > bcol[kb] && kb < bnzj) kb++;
980:         if (kb == bnzj) break;
981:         if (acol[ka] == bcol[kb]) { /* add nonzero c(i,j) to lnk */
982:           index[0] = j;
983:           PetscLLAdd(1,index,bm,nlnk,lnk,lnkbt);
984:           cnzi++;
985:           break;
986:         }
987:       }
988:     }

990:     /* If free space is not available, make more free space */
991:     /* Double the amount of total space in the list */
992:     if (current_space->local_remaining<cnzi) {
993:       PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);
994:       nspacedouble++;
995:     }

997:     /* Copy data into free space, then initialize lnk */
998:     PetscLLClean(bm,bm,cnzi,lnk,current_space->array,lnkbt);

1000:     current_space->array           += cnzi;
1001:     current_space->local_used      += cnzi;
1002:     current_space->local_remaining -= cnzi;

1004:     ci[i+1] = ci[i] + cnzi;
1005:   }


1008:   /* Column indices are in the list of free space.
1009:      Allocate array cj, copy column indices to cj, and destroy list of free space */
1010:   PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);
1011:   PetscFreeSpaceContiguous(&free_space,cj);
1012:   PetscLLDestroy(lnk,lnkbt);

1014:   /* Allocate space for ca */
1015:   PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);
1016:   PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));

1018:   /* put together the new symbolic matrix */
1019:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bm,ci,cj,ca,C);

1021:   (*C)->rmap->bs = A->cmap->bs;
1022:   (*C)->cmap->bs = B->cmap->bs;

1024:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1025:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
1026:   c          = (Mat_SeqAIJ*)((*C)->data);
1027:   c->free_a  = PETSC_TRUE;
1028:   c->free_ij = PETSC_TRUE;
1029:   c->nonew   = 0;

1031:   /* set MatInfo */
1032:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
1033:   if (afill < 1.0) afill = 1.0;
1034:   c->maxnz                     = ci[am];
1035:   c->nz                        = ci[am];
1036:   (*C)->info.mallocs           = nspacedouble;
1037:   (*C)->info.fill_ratio_given  = fill;
1038:   (*C)->info.fill_ratio_needed = afill;

1040: #if defined(PETSC_USE_INFO)
1041:   if (ci[am]) {
1042:     PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);
1043:     PetscInfo1((*C),"Use MatMatTransposeMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);
1044:   } else {
1045:     PetscInfo((*C),"Empty matrix product\n");
1046:   }
1047: #endif
1048: #endif
1049:   return(0);
1050: }

1052: /* #define USE_ARRAY - for sparse dot product. Slower than !USE_ARRAY */
1055: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
1056: {
1057:   PetscErrorCode      ierr;
1058:   Mat_SeqAIJ          *a   =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
1059:   PetscInt            *ai  =a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow;
1060:   PetscInt            cm   =C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol;
1061:   PetscLogDouble      flops=0.0;
1062:   MatScalar           *aa  =a->a,*aval,*ba=b->a,*bval,*ca,*cval;
1063:   Mat_MatMatTransMult *multtrans;
1064:   PetscContainer      container;
1065: #if defined(USE_ARRAY)
1066:   MatScalar *spdot;
1067: #endif

1070:   /* clear old values in C */
1071:   if (!c->a) {
1072:     PetscMalloc((ci[cm]+1)*sizeof(MatScalar),&ca);
1073:     c->a      = ca;
1074:     c->free_a = PETSC_TRUE;
1075:   } else {
1076:     ca =  c->a;
1077:   }
1078:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

1080:   PetscObjectQuery((PetscObject)C,"Mat_MatMatTransMult",(PetscObject*)&container);
1081:   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
1082:   PetscContainerGetPointer(container,(void**)&multtrans);
1083:   if (multtrans->usecoloring) {
1084:     MatTransposeColoring matcoloring = multtrans->matcoloring;
1085:     Mat                  Bt_dense;
1086:     PetscInt             m,n;
1087:     Mat                  C_dense = multtrans->ABt_den;

1089:     Bt_dense = multtrans->Bt_den;
1090:     MatGetLocalSize(Bt_dense,&m,&n);

1092:     /* Get Bt_dense by Apply MatTransposeColoring to B */
1093:     MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);

1095:     /* C_dense = A*Bt_dense */
1096:     MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);

1098:     /* Recover C from C_dense */
1099:     MatTransColoringApplyDenToSp(matcoloring,C_dense,C);
1100:     return(0);
1101:   }

1103: #if defined(USE_ARRAY)
1104:   /* allocate an array for implementing sparse inner-product */
1105:   PetscMalloc((A->cmap->n+1)*sizeof(MatScalar),&spdot);
1106:   PetscMemzero(spdot,(A->cmap->n+1)*sizeof(MatScalar));
1107: #endif

1109:   for (i=0; i<cm; i++) {
1110:     anzi = ai[i+1] - ai[i];
1111:     acol = aj + ai[i];
1112:     aval = aa + ai[i];
1113:     cnzi = ci[i+1] - ci[i];
1114:     ccol = cj + ci[i];
1115:     cval = ca + ci[i];
1116:     for (j=0; j<cnzi; j++) {
1117:       brow = ccol[j];
1118:       bnzj = bi[brow+1] - bi[brow];
1119:       bcol = bj + bi[brow];
1120:       bval = ba + bi[brow];

1122:       /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */
1123: #if defined(USE_ARRAY)
1124:       /* put ba to spdot array */
1125:       for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = bval[nextb];
1126:       /* c(i,j)=A[i,:]*B[j,:]^T */
1127:       for (nexta=0; nexta<anzi; nexta++) {
1128:         cval[j] += spdot[acol[nexta]]*aval[nexta];
1129:       }
1130:       /* zero spdot array */
1131:       for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = 0.0;
1132: #else
1133:       nexta = 0; nextb = 0;
1134:       while (nexta<anzi && nextb<bnzj) {
1135:         while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++;
1136:         if (nexta == anzi) break;
1137:         while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++;
1138:         if (nextb == bnzj) break;
1139:         if (acol[nexta] == bcol[nextb]) {
1140:           cval[j] += aval[nexta]*bval[nextb];
1141:           nexta++; nextb++;
1142:           flops += 2;
1143:         }
1144:       }
1145: #endif
1146:     }
1147:   }
1148:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1149:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1150:   PetscLogFlops(flops);
1151: #if defined(USE_ARRAY)
1152:   PetscFree(spdot);
1153: #endif
1154:   return(0);
1155: }

1159: PetscErrorCode MatTransposeMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1160: {

1164:   if (scall == MAT_INITIAL_MATRIX) {
1165:     MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);
1166:   }
1167:   MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);
1168:   return(0);
1169: }

1173: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
1174: {
1176:   Mat            At;
1177:   PetscInt       *ati,*atj;

1180:   /* create symbolic At */
1181:   MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);
1182:   MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,NULL,&At);

1184:   At->rmap->bs = A->cmap->bs;
1185:   At->cmap->bs = B->cmap->bs;

1187:   /* get symbolic C=At*B */
1188:   MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);

1190:   /* clean up */
1191:   MatDestroy(&At);
1192:   MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);
1193:   return(0);
1194: }

1198: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
1199: {
1201:   Mat_SeqAIJ     *a   =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
1202:   PetscInt       am   =A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
1203:   PetscInt       cm   =C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k;
1204:   PetscLogDouble flops=0.0;
1205:   MatScalar      *aa  =a->a,*ba,*ca,*caj;

1208:   if (!c->a) {
1209:     PetscMalloc((ci[cm]+1)*sizeof(MatScalar),&ca);

1211:     c->a      = ca;
1212:     c->free_a = PETSC_TRUE;
1213:   } else {
1214:     ca = c->a;
1215:   }
1216:   /* clear old values in C */
1217:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

1219:   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
1220:   for (i=0; i<am; i++) {
1221:     bj   = b->j + bi[i];
1222:     ba   = b->a + bi[i];
1223:     bnzi = bi[i+1] - bi[i];
1224:     anzi = ai[i+1] - ai[i];
1225:     for (j=0; j<anzi; j++) {
1226:       nextb = 0;
1227:       crow  = *aj++;
1228:       cjj   = cj + ci[crow];
1229:       caj   = ca + ci[crow];
1230:       /* perform sparse axpy operation.  Note cjj includes bj. */
1231:       for (k=0; nextb<bnzi; k++) {
1232:         if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
1233:           caj[k] += (*aa)*(*(ba+nextb));
1234:           nextb++;
1235:         }
1236:       }
1237:       flops += 2*bnzi;
1238:       aa++;
1239:     }
1240:   }

1242:   /* Assemble the final matrix and clean up */
1243:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1244:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1245:   PetscLogFlops(flops);
1246:   return(0);
1247: }

1251: PetscErrorCode MatMatMult_SeqAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1252: {

1256:   if (scall == MAT_INITIAL_MATRIX) {
1257:     PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);
1258:     MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,fill,C);
1259:     PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);
1260:   }
1261:   PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);
1262:   MatMatMultNumeric_SeqAIJ_SeqDense(A,B,*C);
1263:   PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);
1264:   return(0);
1265: }

1269: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C)
1270: {

1274:   MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);

1276:   (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense;
1277:   return(0);
1278: }

1282: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C)
1283: {
1284:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1286:   PetscScalar    *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4;
1287:   MatScalar      *aa;
1288:   PetscInt       cm  = C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n;
1289:   PetscInt       am2 = 2*am, am3 = 3*am,  bm4 = 4*bm,colam;

1292:   if (!cm || !cn) return(0);
1293:   if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm);
1294:   if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n);
1295:   if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n);
1296:   MatDenseGetArray(B,&b);
1297:   MatDenseGetArray(C,&c);
1298:   b1   = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm;
1299:   for (col=0; col<cn-4; col += 4) {  /* over columns of C */
1300:     colam = col*am;
1301:     for (i=0; i<am; i++) {        /* over rows of C in those columns */
1302:       r1 = r2 = r3 = r4 = 0.0;
1303:       n  = a->i[i+1] - a->i[i];
1304:       aj = a->j + a->i[i];
1305:       aa = a->a + a->i[i];
1306:       for (j=0; j<n; j++) {
1307:         r1 += (*aa)*b1[*aj];
1308:         r2 += (*aa)*b2[*aj];
1309:         r3 += (*aa)*b3[*aj];
1310:         r4 += (*aa++)*b4[*aj++];
1311:       }
1312:       c[colam + i]       = r1;
1313:       c[colam + am + i]  = r2;
1314:       c[colam + am2 + i] = r3;
1315:       c[colam + am3 + i] = r4;
1316:     }
1317:     b1 += bm4;
1318:     b2 += bm4;
1319:     b3 += bm4;
1320:     b4 += bm4;
1321:   }
1322:   for (; col<cn; col++) {     /* over extra columns of C */
1323:     for (i=0; i<am; i++) {  /* over rows of C in those columns */
1324:       r1 = 0.0;
1325:       n  = a->i[i+1] - a->i[i];
1326:       aj = a->j + a->i[i];
1327:       aa = a->a + a->i[i];

1329:       for (j=0; j<n; j++) {
1330:         r1 += (*aa++)*b1[*aj++];
1331:       }
1332:       c[col*am + i] = r1;
1333:     }
1334:     b1 += bm;
1335:   }
1336:   PetscLogFlops(cn*(2.0*a->nz));
1337:   MatDenseRestoreArray(B,&b);
1338:   MatDenseRestoreArray(C,&c);
1339:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1340:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1341:   return(0);
1342: }

1344: /*
1345:    Note very similar to MatMult_SeqAIJ(), should generate both codes from same base
1346: */
1349: PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C)
1350: {
1351:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1353:   PetscScalar    *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4;
1354:   MatScalar      *aa;
1355:   PetscInt       cm  = C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n,*ii,arm;
1356:   PetscInt       am2 = 2*am, am3 = 3*am,  bm4 = 4*bm,colam,*ridx;

1359:   if (!cm || !cn) return(0);
1360:   MatDenseGetArray(B,&b);
1361:   MatDenseGetArray(C,&c);
1362:   b1   = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm;

1364:   if (a->compressedrow.use) { /* use compressed row format */
1365:     for (col=0; col<cn-4; col += 4) {  /* over columns of C */
1366:       colam = col*am;
1367:       arm   = a->compressedrow.nrows;
1368:       ii    = a->compressedrow.i;
1369:       ridx  = a->compressedrow.rindex;
1370:       for (i=0; i<arm; i++) {        /* over rows of C in those columns */
1371:         r1 = r2 = r3 = r4 = 0.0;
1372:         n  = ii[i+1] - ii[i];
1373:         aj = a->j + ii[i];
1374:         aa = a->a + ii[i];
1375:         for (j=0; j<n; j++) {
1376:           r1 += (*aa)*b1[*aj];
1377:           r2 += (*aa)*b2[*aj];
1378:           r3 += (*aa)*b3[*aj];
1379:           r4 += (*aa++)*b4[*aj++];
1380:         }
1381:         c[colam       + ridx[i]] += r1;
1382:         c[colam + am  + ridx[i]] += r2;
1383:         c[colam + am2 + ridx[i]] += r3;
1384:         c[colam + am3 + ridx[i]] += r4;
1385:       }
1386:       b1 += bm4;
1387:       b2 += bm4;
1388:       b3 += bm4;
1389:       b4 += bm4;
1390:     }
1391:     for (; col<cn; col++) {     /* over extra columns of C */
1392:       colam = col*am;
1393:       arm   = a->compressedrow.nrows;
1394:       ii    = a->compressedrow.i;
1395:       ridx  = a->compressedrow.rindex;
1396:       for (i=0; i<arm; i++) {  /* over rows of C in those columns */
1397:         r1 = 0.0;
1398:         n  = ii[i+1] - ii[i];
1399:         aj = a->j + ii[i];
1400:         aa = a->a + ii[i];

1402:         for (j=0; j<n; j++) {
1403:           r1 += (*aa++)*b1[*aj++];
1404:         }
1405:         c[colam + ridx[i]] += r1;
1406:       }
1407:       b1 += bm;
1408:     }
1409:   } else {
1410:     for (col=0; col<cn-4; col += 4) {  /* over columns of C */
1411:       colam = col*am;
1412:       for (i=0; i<am; i++) {        /* over rows of C in those columns */
1413:         r1 = r2 = r3 = r4 = 0.0;
1414:         n  = a->i[i+1] - a->i[i];
1415:         aj = a->j + a->i[i];
1416:         aa = a->a + a->i[i];
1417:         for (j=0; j<n; j++) {
1418:           r1 += (*aa)*b1[*aj];
1419:           r2 += (*aa)*b2[*aj];
1420:           r3 += (*aa)*b3[*aj];
1421:           r4 += (*aa++)*b4[*aj++];
1422:         }
1423:         c[colam + i]       += r1;
1424:         c[colam + am + i]  += r2;
1425:         c[colam + am2 + i] += r3;
1426:         c[colam + am3 + i] += r4;
1427:       }
1428:       b1 += bm4;
1429:       b2 += bm4;
1430:       b3 += bm4;
1431:       b4 += bm4;
1432:     }
1433:     for (; col<cn; col++) {     /* over extra columns of C */
1434:       colam = col*am;
1435:       for (i=0; i<am; i++) {  /* over rows of C in those columns */
1436:         r1 = 0.0;
1437:         n  = a->i[i+1] - a->i[i];
1438:         aj = a->j + a->i[i];
1439:         aa = a->a + a->i[i];

1441:         for (j=0; j<n; j++) {
1442:           r1 += (*aa++)*b1[*aj++];
1443:         }
1444:         c[colam + i] += r1;
1445:       }
1446:       b1 += bm;
1447:     }
1448:   }
1449:   PetscLogFlops(cn*2.0*a->nz);
1450:   MatDenseRestoreArray(B,&b);
1451:   MatDenseRestoreArray(C,&c);
1452:   return(0);
1453: }

1457: PetscErrorCode  MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense)
1458: {
1460:   Mat_SeqAIJ     *b       = (Mat_SeqAIJ*)B->data;
1461:   Mat_SeqDense   *btdense = (Mat_SeqDense*)Btdense->data;
1462:   PetscInt       *bi      = b->i,*bj=b->j;
1463:   PetscInt       m        = Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns;
1464:   MatScalar      *btval,*btval_den,*ba=b->a;
1465:   PetscInt       *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors;

1468:   btval_den=btdense->v;
1469:   PetscMemzero(btval_den,(m*n)*sizeof(MatScalar));
1470:   for (k=0; k<ncolors; k++) {
1471:     ncolumns = coloring->ncolumns[k];
1472:     for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */
1473:       col   = *(columns + colorforcol[k] + l);
1474:       btcol = bj + bi[col];
1475:       btval = ba + bi[col];
1476:       anz   = bi[col+1] - bi[col];
1477:       for (j=0; j<anz; j++) {
1478:         brow            = btcol[j];
1479:         btval_den[brow] = btval[j];
1480:       }
1481:     }
1482:     btval_den += m;
1483:   }
1484:   return(0);
1485: }

1489: PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp)
1490: {
1492:   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)Csp->data;
1493:   PetscInt       k,l,*row,*idx,m,ncolors=matcoloring->ncolors,nrows;
1494:   PetscScalar    *ca_den,*cp_den,*ca=csp->a;
1495:   PetscInt       *rows=matcoloring->rows,*spidx=matcoloring->columnsforspidx,*colorforrow=matcoloring->colorforrow;

1498:   MatGetLocalSize(Csp,&m,NULL);
1499:   MatDenseGetArray(Cden,&ca_den);
1500:   cp_den = ca_den;
1501:   for (k=0; k<ncolors; k++) {
1502:     nrows = matcoloring->nrows[k];
1503:     row   = rows  + colorforrow[k];
1504:     idx   = spidx + colorforrow[k];
1505:     for (l=0; l<nrows; l++) {
1506:       ca[idx[l]] = cp_den[row[l]];
1507:     }
1508:     cp_den += m;
1509:   }
1510:   MatDenseRestoreArray(Cden,&ca_den);
1511:   return(0);
1512: }

1514: /*
1515:  MatGetColumnIJ_SeqAIJ_Color() and MatRestoreColumnIJ_SeqAIJ_Color() are customized from
1516:  MatGetColumnIJ_SeqAIJ() and MatRestoreColumnIJ_SeqAIJ() by adding an output
1517:  spidx[], index of a->j, to be used for setting 'columnsforspidx' in MatTransposeColoringCreate_SeqAIJ().
1518:  */
1521: PetscErrorCode MatGetColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
1522: {
1523:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1525:   PetscInt       i,*collengths,*cia,*cja,n = A->cmap->n,m = A->rmap->n;
1526:   PetscInt       nz = a->i[m],row,*jj,mr,col;
1527:   PetscInt       *cspidx;

1530:   *nn = n;
1531:   if (!ia) return(0);
1532:   if (symmetric) {
1533:     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatGetColumnIJ_SeqAIJ_Color() not supported for the case symmetric");
1534:     MatToSymmetricIJ_SeqAIJ(A->rmap->n,a->i,a->j,0,oshift,(PetscInt**)ia,(PetscInt**)ja);
1535:   } else {
1536:     PetscMalloc((n+1)*sizeof(PetscInt),&collengths);
1537:     PetscMemzero(collengths,n*sizeof(PetscInt));
1538:     PetscMalloc((n+1)*sizeof(PetscInt),&cia);
1539:     PetscMalloc((nz+1)*sizeof(PetscInt),&cja);
1540:     PetscMalloc((nz+1)*sizeof(PetscInt),&cspidx);
1541:     jj   = a->j;
1542:     for (i=0; i<nz; i++) {
1543:       collengths[jj[i]]++;
1544:     }
1545:     cia[0] = oshift;
1546:     for (i=0; i<n; i++) {
1547:       cia[i+1] = cia[i] + collengths[i];
1548:     }
1549:     PetscMemzero(collengths,n*sizeof(PetscInt));
1550:     jj   = a->j;
1551:     for (row=0; row<m; row++) {
1552:       mr = a->i[row+1] - a->i[row];
1553:       for (i=0; i<mr; i++) {
1554:         col = *jj++;

1556:         cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
1557:         cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
1558:       }
1559:     }
1560:     PetscFree(collengths);
1561:     *ia    = cia; *ja = cja;
1562:     *spidx = cspidx;
1563:   }
1564:   return(0);
1565: }

1569: PetscErrorCode MatRestoreColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
1570: {

1574:   if (!ia) return(0);

1576:   PetscFree(*ia);
1577:   PetscFree(*ja);
1578:   PetscFree(*spidx);
1579:   return(0);
1580: }

1584: PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c)
1585: {
1587:   PetscInt       i,n,nrows,N,j,k,m,ncols,col,cm;
1588:   const PetscInt *is,*ci,*cj,*row_idx;
1589:   PetscInt       nis = iscoloring->n,*rowhit,bs = 1;
1590:   IS             *isa;
1591:   PetscBool      flg1,flg2;
1592:   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
1593:   PetscInt       *colorforrow,*rows,*rows_i,*columnsforspidx,*columnsforspidx_i,*idxhit,*spidx;
1594:   PetscInt       *colorforcol,*columns,*columns_i;

1597:   ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);

1599:   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */
1600:   PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);
1601:   PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);
1602:   if (flg1 || flg2) {
1603:     MatGetBlockSize(mat,&bs);
1604:   }

1606:   N         = mat->cmap->N/bs;
1607:   c->M      = mat->rmap->N/bs;  /* set total rows, columns and local rows */
1608:   c->N      = mat->cmap->N/bs;
1609:   c->m      = mat->rmap->N/bs;
1610:   c->rstart = 0;

1612:   c->ncolors = nis;
1613:   PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);
1614:   PetscMalloc(nis*sizeof(PetscInt),&c->nrows);
1615:   PetscMalloc2(csp->nz+1,PetscInt,&rows,csp->nz+1,PetscInt,&columnsforspidx);
1616:   PetscMalloc((nis+1)*sizeof(PetscInt),&colorforrow);

1618:   colorforrow[0]    = 0;
1619:   rows_i            = rows;
1620:   columnsforspidx_i = columnsforspidx;

1622:   PetscMalloc((nis+1)*sizeof(PetscInt),&colorforcol);
1623:   PetscMalloc((N+1)*sizeof(PetscInt),&columns);

1625:   colorforcol[0] = 0;
1626:   columns_i      = columns;

1628:   MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL); /* column-wise storage of mat */

1630:   cm   = c->m;
1631:   PetscMalloc((cm+1)*sizeof(PetscInt),&rowhit);
1632:   PetscMalloc((cm+1)*sizeof(PetscInt),&idxhit);
1633:   for (i=0; i<nis; i++) {
1634:     ISGetLocalSize(isa[i],&n);
1635:     ISGetIndices(isa[i],&is);

1637:     c->ncolumns[i] = n;
1638:     if (n) {
1639:       PetscMemcpy(columns_i,is,n*sizeof(PetscInt));
1640:     }
1641:     colorforcol[i+1] = colorforcol[i] + n;
1642:     columns_i       += n;

1644:     /* fast, crude version requires O(N*N) work */
1645:     PetscMemzero(rowhit,cm*sizeof(PetscInt));

1647:     /* loop over columns*/
1648:     for (j=0; j<n; j++) {
1649:       col     = is[j];
1650:       row_idx = cj + ci[col];
1651:       m       = ci[col+1] - ci[col];
1652:       /* loop over columns marking them in rowhit */
1653:       for (k=0; k<m; k++) {
1654:         idxhit[*row_idx]   = spidx[ci[col] + k];
1655:         rowhit[*row_idx++] = col + 1;
1656:       }
1657:     }
1658:     /* count the number of hits */
1659:     nrows = 0;
1660:     for (j=0; j<cm; j++) {
1661:       if (rowhit[j]) nrows++;
1662:     }
1663:     c->nrows[i]      = nrows;
1664:     colorforrow[i+1] = colorforrow[i] + nrows;

1666:     nrows = 0;
1667:     for (j=0; j<cm; j++) {
1668:       if (rowhit[j]) {
1669:         rows_i[nrows]            = j;
1670:         columnsforspidx_i[nrows] = idxhit[j];
1671:         nrows++;
1672:       }
1673:     }
1674:     ISRestoreIndices(isa[i],&is);
1675:     rows_i += nrows; columnsforspidx_i += nrows;
1676:   }
1677:   MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);
1678:   PetscFree(rowhit);
1679:   ISColoringRestoreIS(iscoloring,&isa);
1680: #if defined(PETSC_USE_DEBUG)
1681:   if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]);
1682: #endif

1684:   c->colorforrow     = colorforrow;
1685:   c->rows            = rows;
1686:   c->columnsforspidx = columnsforspidx;
1687:   c->colorforcol     = colorforcol;
1688:   c->columns         = columns;

1690:   PetscFree(idxhit);
1691:   return(0);
1692: }