Actual source code: aijfact.c
petsc-3.4.2 2013-07-02
2: #include <../src/mat/impls/aij/seq/aij.h>
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <petscbt.h>
5: #include <../src/mat/utils/freespace.h>
9: /*
10: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
12: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
13: */
14: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
15: {
16: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data;
17: PetscErrorCode ierr;
18: PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
19: const PetscInt *ai = a->i, *aj = a->j;
20: const PetscScalar *aa = a->a;
21: PetscBool *done;
22: PetscReal best,past = 0,future;
25: /* pick initial row */
26: best = -1;
27: for (i=0; i<n; i++) {
28: future = 0.0;
29: for (j=ai[i]; j<ai[i+1]; j++) {
30: if (aj[j] != i) future += PetscAbsScalar(aa[j]);
31: else past = PetscAbsScalar(aa[j]);
32: }
33: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34: if (past/future > best) {
35: best = past/future;
36: current = i;
37: }
38: }
40: PetscMalloc(n*sizeof(PetscBool),&done);
41: PetscMemzero(done,n*sizeof(PetscBool));
42: PetscMalloc(n*sizeof(PetscInt),&order);
43: order[0] = current;
44: for (i=0; i<n-1; i++) {
45: done[current] = PETSC_TRUE;
46: best = -1;
47: /* loop over all neighbors of current pivot */
48: for (j=ai[current]; j<ai[current+1]; j++) {
49: jj = aj[j];
50: if (done[jj]) continue;
51: /* loop over columns of potential next row computing weights for below and above diagonal */
52: past = future = 0.0;
53: for (k=ai[jj]; k<ai[jj+1]; k++) {
54: kk = aj[k];
55: if (done[kk]) past += PetscAbsScalar(aa[k]);
56: else if (kk != jj) future += PetscAbsScalar(aa[k]);
57: }
58: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59: if (past/future > best) {
60: best = past/future;
61: newcurrent = jj;
62: }
63: }
64: if (best == -1) { /* no neighbors to select from so select best of all that remain */
65: best = -1;
66: for (k=0; k<n; k++) {
67: if (done[k]) continue;
68: future = 0.0;
69: past = 0.0;
70: for (j=ai[k]; j<ai[k+1]; j++) {
71: kk = aj[j];
72: if (done[kk]) past += PetscAbsScalar(aa[j]);
73: else if (kk != k) future += PetscAbsScalar(aa[j]);
74: }
75: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76: if (past/future > best) {
77: best = past/future;
78: newcurrent = k;
79: }
80: }
81: }
82: if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
83: current = newcurrent;
84: order[i+1] = current;
85: }
86: ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);
87: *icol = *irow;
88: PetscObjectReference((PetscObject)*irow);
89: PetscFree(done);
90: PetscFree(order);
91: return(0);
92: }
96: PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg)
97: {
99: *flg = PETSC_TRUE;
100: return(0);
101: }
105: PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
106: {
107: PetscInt n = A->rmap->n;
111: #if defined(PETSC_USE_COMPLEX)
112: if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
113: #endif
114: MatCreate(PetscObjectComm((PetscObject)A),B);
115: MatSetSizes(*B,n,n,n,n);
116: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
117: MatSetType(*B,MATSEQAIJ);
119: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
120: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
122: MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);
123: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
124: MatSetType(*B,MATSEQSBAIJ);
125: MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);
127: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
128: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
129: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
130: (*B)->factortype = ftype;
131: return(0);
132: }
136: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
137: {
138: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
139: IS isicol;
140: PetscErrorCode ierr;
141: const PetscInt *r,*ic;
142: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
143: PetscInt *bi,*bj,*ajtmp;
144: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
145: PetscReal f;
146: PetscInt nlnk,*lnk,k,**bi_ptr;
147: PetscFreeSpaceList free_space=NULL,current_space=NULL;
148: PetscBT lnkbt;
151: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
152: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
153: ISGetIndices(isrow,&r);
154: ISGetIndices(isicol,&ic);
156: /* get new row pointers */
157: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
158: bi[0] = 0;
160: /* bdiag is location of diagonal in factor */
161: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
162: bdiag[0] = 0;
164: /* linked list for storing column indices of the active row */
165: nlnk = n + 1;
166: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
168: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
170: /* initial FreeSpace size is f*(ai[n]+1) */
171: f = info->fill;
172: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
173: current_space = free_space;
175: for (i=0; i<n; i++) {
176: /* copy previous fill into linked list */
177: nzi = 0;
178: nnz = ai[r[i]+1] - ai[r[i]];
179: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
180: ajtmp = aj + ai[r[i]];
181: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
182: nzi += nlnk;
184: /* add pivot rows into linked list */
185: row = lnk[n];
186: while (row < i) {
187: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
188: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
189: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
190: nzi += nlnk;
191: row = lnk[row];
192: }
193: bi[i+1] = bi[i] + nzi;
194: im[i] = nzi;
196: /* mark bdiag */
197: nzbd = 0;
198: nnz = nzi;
199: k = lnk[n];
200: while (nnz-- && k < i) {
201: nzbd++;
202: k = lnk[k];
203: }
204: bdiag[i] = bi[i] + nzbd;
206: /* if free space is not available, make more free space */
207: if (current_space->local_remaining<nzi) {
208: nnz = (n - i)*nzi; /* estimated and max additional space needed */
209: PetscFreeSpaceGet(nnz,¤t_space);
210: reallocs++;
211: }
213: /* copy data into free space, then initialize lnk */
214: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
216: bi_ptr[i] = current_space->array;
217: current_space->array += nzi;
218: current_space->local_used += nzi;
219: current_space->local_remaining -= nzi;
220: }
221: #if defined(PETSC_USE_INFO)
222: if (ai[n] != 0) {
223: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
224: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
225: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
226: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
227: PetscInfo(A,"for best performance.\n");
228: } else {
229: PetscInfo(A,"Empty matrix\n");
230: }
231: #endif
233: ISRestoreIndices(isrow,&r);
234: ISRestoreIndices(isicol,&ic);
236: /* destroy list of free space and other temporary array(s) */
237: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
238: PetscFreeSpaceContiguous(&free_space,bj);
239: PetscLLDestroy(lnk,lnkbt);
240: PetscFree2(bi_ptr,im);
242: /* put together the new matrix */
243: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
244: PetscLogObjectParent(B,isicol);
245: b = (Mat_SeqAIJ*)(B)->data;
247: b->free_a = PETSC_TRUE;
248: b->free_ij = PETSC_TRUE;
249: b->singlemalloc = PETSC_FALSE;
251: PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
252: b->j = bj;
253: b->i = bi;
254: b->diag = bdiag;
255: b->ilen = 0;
256: b->imax = 0;
257: b->row = isrow;
258: b->col = iscol;
259: PetscObjectReference((PetscObject)isrow);
260: PetscObjectReference((PetscObject)iscol);
261: b->icol = isicol;
262: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
264: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
265: PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
266: b->maxnz = b->nz = bi[n];
268: (B)->factortype = MAT_FACTOR_LU;
269: (B)->info.factor_mallocs = reallocs;
270: (B)->info.fill_ratio_given = f;
272: if (ai[n]) {
273: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
274: } else {
275: (B)->info.fill_ratio_needed = 0.0;
276: }
277: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
278: if (a->inode.size) {
279: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
280: }
281: return(0);
282: }
286: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
287: {
288: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
289: IS isicol;
290: PetscErrorCode ierr;
291: const PetscInt *r,*ic;
292: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
293: PetscInt *bi,*bj,*ajtmp;
294: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
295: PetscReal f;
296: PetscInt nlnk,*lnk,k,**bi_ptr;
297: PetscFreeSpaceList free_space=NULL,current_space=NULL;
298: PetscBT lnkbt;
301: /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
302: /*
303: PetscBool olddatastruct=PETSC_FALSE;
304: PetscOptionsGetBool(NULL,"-lu_old",&olddatastruct,NULL);
305: if (olddatastruct) {
306: MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);
307: return(0);
308: }
309: */
310: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
311: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
312: ISGetIndices(isrow,&r);
313: ISGetIndices(isicol,&ic);
315: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
316: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
317: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
318: bi[0] = bdiag[0] = 0;
320: /* linked list for storing column indices of the active row */
321: nlnk = n + 1;
322: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
324: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
326: /* initial FreeSpace size is f*(ai[n]+1) */
327: f = info->fill;
328: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
329: current_space = free_space;
331: for (i=0; i<n; i++) {
332: /* copy previous fill into linked list */
333: nzi = 0;
334: nnz = ai[r[i]+1] - ai[r[i]];
335: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
336: ajtmp = aj + ai[r[i]];
337: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
338: nzi += nlnk;
340: /* add pivot rows into linked list */
341: row = lnk[n];
342: while (row < i) {
343: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
344: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
345: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
346: nzi += nlnk;
347: row = lnk[row];
348: }
349: bi[i+1] = bi[i] + nzi;
350: im[i] = nzi;
352: /* mark bdiag */
353: nzbd = 0;
354: nnz = nzi;
355: k = lnk[n];
356: while (nnz-- && k < i) {
357: nzbd++;
358: k = lnk[k];
359: }
360: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
362: /* if free space is not available, make more free space */
363: if (current_space->local_remaining<nzi) {
364: nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
365: PetscFreeSpaceGet(nnz,¤t_space);
366: reallocs++;
367: }
369: /* copy data into free space, then initialize lnk */
370: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
372: bi_ptr[i] = current_space->array;
373: current_space->array += nzi;
374: current_space->local_used += nzi;
375: current_space->local_remaining -= nzi;
376: }
378: ISRestoreIndices(isrow,&r);
379: ISRestoreIndices(isicol,&ic);
381: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
382: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
383: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
384: PetscLLDestroy(lnk,lnkbt);
385: PetscFree2(bi_ptr,im);
387: /* put together the new matrix */
388: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
389: PetscLogObjectParent(B,isicol);
390: b = (Mat_SeqAIJ*)(B)->data;
392: b->free_a = PETSC_TRUE;
393: b->free_ij = PETSC_TRUE;
394: b->singlemalloc = PETSC_FALSE;
396: PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);
398: b->j = bj;
399: b->i = bi;
400: b->diag = bdiag;
401: b->ilen = 0;
402: b->imax = 0;
403: b->row = isrow;
404: b->col = iscol;
405: PetscObjectReference((PetscObject)isrow);
406: PetscObjectReference((PetscObject)iscol);
407: b->icol = isicol;
408: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
410: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
411: PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
412: b->maxnz = b->nz = bdiag[0]+1;
414: B->factortype = MAT_FACTOR_LU;
415: B->info.factor_mallocs = reallocs;
416: B->info.fill_ratio_given = f;
418: if (ai[n]) {
419: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
420: } else {
421: B->info.fill_ratio_needed = 0.0;
422: }
423: #if defined(PETSC_USE_INFO)
424: if (ai[n] != 0) {
425: PetscReal af = B->info.fill_ratio_needed;
426: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
427: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
428: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
429: PetscInfo(A,"for best performance.\n");
430: } else {
431: PetscInfo(A,"Empty matrix\n");
432: }
433: #endif
434: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
435: if (a->inode.size) {
436: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
437: }
438: return(0);
439: }
441: /*
442: Trouble in factorization, should we dump the original matrix?
443: */
446: PetscErrorCode MatFactorDumpMatrix(Mat A)
447: {
449: PetscBool flg = PETSC_FALSE;
452: PetscOptionsGetBool(NULL,"-mat_factor_dump_on_error",&flg,NULL);
453: if (flg) {
454: PetscViewer viewer;
455: char filename[PETSC_MAX_PATH_LEN];
457: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
458: PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);
459: MatView(A,viewer);
460: PetscViewerDestroy(&viewer);
461: }
462: return(0);
463: }
467: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
468: {
469: Mat C =B;
470: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
471: IS isrow = b->row,isicol = b->icol;
472: PetscErrorCode ierr;
473: const PetscInt *r,*ic,*ics;
474: const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
475: PetscInt i,j,k,nz,nzL,row,*pj;
476: const PetscInt *ajtmp,*bjtmp;
477: MatScalar *rtmp,*pc,multiplier,*pv;
478: const MatScalar *aa=a->a,*v;
479: PetscBool row_identity,col_identity;
480: FactorShiftCtx sctx;
481: const PetscInt *ddiag;
482: PetscReal rs;
483: MatScalar d;
486: /* MatPivotSetUp(): initialize shift context sctx */
487: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
489: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
490: ddiag = a->diag;
491: sctx.shift_top = info->zeropivot;
492: for (i=0; i<n; i++) {
493: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
494: d = (aa)[ddiag[i]];
495: rs = -PetscAbsScalar(d) - PetscRealPart(d);
496: v = aa+ai[i];
497: nz = ai[i+1] - ai[i];
498: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
499: if (rs>sctx.shift_top) sctx.shift_top = rs;
500: }
501: sctx.shift_top *= 1.1;
502: sctx.nshift_max = 5;
503: sctx.shift_lo = 0.;
504: sctx.shift_hi = 1.;
505: }
507: ISGetIndices(isrow,&r);
508: ISGetIndices(isicol,&ic);
509: PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
510: ics = ic;
512: do {
513: sctx.newshift = PETSC_FALSE;
514: for (i=0; i<n; i++) {
515: /* zero rtmp */
516: /* L part */
517: nz = bi[i+1] - bi[i];
518: bjtmp = bj + bi[i];
519: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
521: /* U part */
522: nz = bdiag[i]-bdiag[i+1];
523: bjtmp = bj + bdiag[i+1]+1;
524: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
526: /* load in initial (unfactored row) */
527: nz = ai[r[i]+1] - ai[r[i]];
528: ajtmp = aj + ai[r[i]];
529: v = aa + ai[r[i]];
530: for (j=0; j<nz; j++) {
531: rtmp[ics[ajtmp[j]]] = v[j];
532: }
533: /* ZeropivotApply() */
534: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
536: /* elimination */
537: bjtmp = bj + bi[i];
538: row = *bjtmp++;
539: nzL = bi[i+1] - bi[i];
540: for (k=0; k < nzL; k++) {
541: pc = rtmp + row;
542: if (*pc != 0.0) {
543: pv = b->a + bdiag[row];
544: multiplier = *pc * (*pv);
545: *pc = multiplier;
547: pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
548: pv = b->a + bdiag[row+1]+1;
549: nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
551: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
552: PetscLogFlops(1+2*nz);
553: }
554: row = *bjtmp++;
555: }
557: /* finished row so stick it into b->a */
558: rs = 0.0;
559: /* L part */
560: pv = b->a + bi[i];
561: pj = b->j + bi[i];
562: nz = bi[i+1] - bi[i];
563: for (j=0; j<nz; j++) {
564: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
565: }
567: /* U part */
568: pv = b->a + bdiag[i+1]+1;
569: pj = b->j + bdiag[i+1]+1;
570: nz = bdiag[i] - bdiag[i+1]-1;
571: for (j=0; j<nz; j++) {
572: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
573: }
575: sctx.rs = rs;
576: sctx.pv = rtmp[i];
577: MatPivotCheck(A,info,&sctx,i);
578: if (sctx.newshift) break; /* break for-loop */
579: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
581: /* Mark diagonal and invert diagonal for simplier triangular solves */
582: pv = b->a + bdiag[i];
583: *pv = 1.0/rtmp[i];
585: } /* endof for (i=0; i<n; i++) { */
587: /* MatPivotRefine() */
588: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
589: /*
590: * if no shift in this attempt & shifting & started shifting & can refine,
591: * then try lower shift
592: */
593: sctx.shift_hi = sctx.shift_fraction;
594: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
595: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
596: sctx.newshift = PETSC_TRUE;
597: sctx.nshift++;
598: }
599: } while (sctx.newshift);
601: PetscFree(rtmp);
602: ISRestoreIndices(isicol,&ic);
603: ISRestoreIndices(isrow,&r);
605: ISIdentity(isrow,&row_identity);
606: ISIdentity(isicol,&col_identity);
607: if (row_identity && col_identity) {
608: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
609: } else {
610: C->ops->solve = MatSolve_SeqAIJ;
611: }
612: C->ops->solveadd = MatSolveAdd_SeqAIJ;
613: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
614: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
615: C->ops->matsolve = MatMatSolve_SeqAIJ;
616: C->assembled = PETSC_TRUE;
617: C->preallocated = PETSC_TRUE;
619: PetscLogFlops(C->cmap->n);
621: /* MatShiftView(A,info,&sctx) */
622: if (sctx.nshift) {
623: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
624: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
625: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
626: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
627: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
628: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);
629: }
630: }
631: Mat_CheckInode_FactorLU(C,PETSC_FALSE);
632: return(0);
633: }
637: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
638: {
639: Mat C =B;
640: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
641: IS isrow = b->row,isicol = b->icol;
642: PetscErrorCode ierr;
643: const PetscInt *r,*ic,*ics;
644: PetscInt nz,row,i,j,n=A->rmap->n,diag;
645: const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
646: const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
647: MatScalar *pv,*rtmp,*pc,multiplier,d;
648: const MatScalar *v,*aa=a->a;
649: PetscReal rs=0.0;
650: FactorShiftCtx sctx;
651: const PetscInt *ddiag;
652: PetscBool row_identity, col_identity;
655: /* MatPivotSetUp(): initialize shift context sctx */
656: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
658: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
659: ddiag = a->diag;
660: sctx.shift_top = info->zeropivot;
661: for (i=0; i<n; i++) {
662: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
663: d = (aa)[ddiag[i]];
664: rs = -PetscAbsScalar(d) - PetscRealPart(d);
665: v = aa+ai[i];
666: nz = ai[i+1] - ai[i];
667: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
668: if (rs>sctx.shift_top) sctx.shift_top = rs;
669: }
670: sctx.shift_top *= 1.1;
671: sctx.nshift_max = 5;
672: sctx.shift_lo = 0.;
673: sctx.shift_hi = 1.;
674: }
676: ISGetIndices(isrow,&r);
677: ISGetIndices(isicol,&ic);
678: PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
679: ics = ic;
681: do {
682: sctx.newshift = PETSC_FALSE;
683: for (i=0; i<n; i++) {
684: nz = bi[i+1] - bi[i];
685: bjtmp = bj + bi[i];
686: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
688: /* load in initial (unfactored row) */
689: nz = ai[r[i]+1] - ai[r[i]];
690: ajtmp = aj + ai[r[i]];
691: v = aa + ai[r[i]];
692: for (j=0; j<nz; j++) {
693: rtmp[ics[ajtmp[j]]] = v[j];
694: }
695: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
697: row = *bjtmp++;
698: while (row < i) {
699: pc = rtmp + row;
700: if (*pc != 0.0) {
701: pv = b->a + diag_offset[row];
702: pj = b->j + diag_offset[row] + 1;
703: multiplier = *pc / *pv++;
704: *pc = multiplier;
705: nz = bi[row+1] - diag_offset[row] - 1;
706: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
707: PetscLogFlops(1+2*nz);
708: }
709: row = *bjtmp++;
710: }
711: /* finished row so stick it into b->a */
712: pv = b->a + bi[i];
713: pj = b->j + bi[i];
714: nz = bi[i+1] - bi[i];
715: diag = diag_offset[i] - bi[i];
716: rs = 0.0;
717: for (j=0; j<nz; j++) {
718: pv[j] = rtmp[pj[j]];
719: rs += PetscAbsScalar(pv[j]);
720: }
721: rs -= PetscAbsScalar(pv[diag]);
723: sctx.rs = rs;
724: sctx.pv = pv[diag];
725: MatPivotCheck(A,info,&sctx,i);
726: if (sctx.newshift) break;
727: pv[diag] = sctx.pv;
728: }
730: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
731: /*
732: * if no shift in this attempt & shifting & started shifting & can refine,
733: * then try lower shift
734: */
735: sctx.shift_hi = sctx.shift_fraction;
736: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
737: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
738: sctx.newshift = PETSC_TRUE;
739: sctx.nshift++;
740: }
741: } while (sctx.newshift);
743: /* invert diagonal entries for simplier triangular solves */
744: for (i=0; i<n; i++) {
745: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
746: }
747: PetscFree(rtmp);
748: ISRestoreIndices(isicol,&ic);
749: ISRestoreIndices(isrow,&r);
751: ISIdentity(isrow,&row_identity);
752: ISIdentity(isicol,&col_identity);
753: if (row_identity && col_identity) {
754: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
755: } else {
756: C->ops->solve = MatSolve_SeqAIJ_inplace;
757: }
758: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
759: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
760: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
761: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
763: C->assembled = PETSC_TRUE;
764: C->preallocated = PETSC_TRUE;
766: PetscLogFlops(C->cmap->n);
767: if (sctx.nshift) {
768: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
769: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
770: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
771: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
772: }
773: }
774: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
775: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
777: Mat_CheckInode(C,PETSC_FALSE);
778: return(0);
779: }
781: /*
782: This routine implements inplace ILU(0) with row or/and column permutations.
783: Input:
784: A - original matrix
785: Output;
786: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
787: a->j (col index) is permuted by the inverse of colperm, then sorted
788: a->a reordered accordingly with a->j
789: a->diag (ptr to diagonal elements) is updated.
790: */
793: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
794: {
795: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data;
796: IS isrow = a->row,isicol = a->icol;
797: PetscErrorCode ierr;
798: const PetscInt *r,*ic,*ics;
799: PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
800: PetscInt *ajtmp,nz,row;
801: PetscInt *diag = a->diag,nbdiag,*pj;
802: PetscScalar *rtmp,*pc,multiplier,d;
803: MatScalar *pv,*v;
804: PetscReal rs;
805: FactorShiftCtx sctx;
806: const MatScalar *aa=a->a,*vtmp;
809: if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
811: /* MatPivotSetUp(): initialize shift context sctx */
812: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
814: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
815: const PetscInt *ddiag = a->diag;
816: sctx.shift_top = info->zeropivot;
817: for (i=0; i<n; i++) {
818: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
819: d = (aa)[ddiag[i]];
820: rs = -PetscAbsScalar(d) - PetscRealPart(d);
821: vtmp = aa+ai[i];
822: nz = ai[i+1] - ai[i];
823: for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
824: if (rs>sctx.shift_top) sctx.shift_top = rs;
825: }
826: sctx.shift_top *= 1.1;
827: sctx.nshift_max = 5;
828: sctx.shift_lo = 0.;
829: sctx.shift_hi = 1.;
830: }
832: ISGetIndices(isrow,&r);
833: ISGetIndices(isicol,&ic);
834: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
835: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
836: ics = ic;
838: #if defined(MV)
839: sctx.shift_top = 0.;
840: sctx.nshift_max = 0;
841: sctx.shift_lo = 0.;
842: sctx.shift_hi = 0.;
843: sctx.shift_fraction = 0.;
845: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
846: sctx.shift_top = 0.;
847: for (i=0; i<n; i++) {
848: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
849: d = (a->a)[diag[i]];
850: rs = -PetscAbsScalar(d) - PetscRealPart(d);
851: v = a->a+ai[i];
852: nz = ai[i+1] - ai[i];
853: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
854: if (rs>sctx.shift_top) sctx.shift_top = rs;
855: }
856: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
857: sctx.shift_top *= 1.1;
858: sctx.nshift_max = 5;
859: sctx.shift_lo = 0.;
860: sctx.shift_hi = 1.;
861: }
863: sctx.shift_amount = 0.;
864: sctx.nshift = 0;
865: #endif
867: do {
868: sctx.newshift = PETSC_FALSE;
869: for (i=0; i<n; i++) {
870: /* load in initial unfactored row */
871: nz = ai[r[i]+1] - ai[r[i]];
872: ajtmp = aj + ai[r[i]];
873: v = a->a + ai[r[i]];
874: /* sort permuted ajtmp and values v accordingly */
875: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
876: PetscSortIntWithScalarArray(nz,ajtmp,v);
878: diag[r[i]] = ai[r[i]];
879: for (j=0; j<nz; j++) {
880: rtmp[ajtmp[j]] = v[j];
881: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
882: }
883: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
885: row = *ajtmp++;
886: while (row < i) {
887: pc = rtmp + row;
888: if (*pc != 0.0) {
889: pv = a->a + diag[r[row]];
890: pj = aj + diag[r[row]] + 1;
892: multiplier = *pc / *pv++;
893: *pc = multiplier;
894: nz = ai[r[row]+1] - diag[r[row]] - 1;
895: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
896: PetscLogFlops(1+2*nz);
897: }
898: row = *ajtmp++;
899: }
900: /* finished row so overwrite it onto a->a */
901: pv = a->a + ai[r[i]];
902: pj = aj + ai[r[i]];
903: nz = ai[r[i]+1] - ai[r[i]];
904: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
906: rs = 0.0;
907: for (j=0; j<nz; j++) {
908: pv[j] = rtmp[pj[j]];
909: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
910: }
912: sctx.rs = rs;
913: sctx.pv = pv[nbdiag];
914: MatPivotCheck(A,info,&sctx,i);
915: if (sctx.newshift) break;
916: pv[nbdiag] = sctx.pv;
917: }
919: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
920: /*
921: * if no shift in this attempt & shifting & started shifting & can refine,
922: * then try lower shift
923: */
924: sctx.shift_hi = sctx.shift_fraction;
925: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
926: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
927: sctx.newshift = PETSC_TRUE;
928: sctx.nshift++;
929: }
930: } while (sctx.newshift);
932: /* invert diagonal entries for simplier triangular solves */
933: for (i=0; i<n; i++) {
934: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
935: }
937: PetscFree(rtmp);
938: ISRestoreIndices(isicol,&ic);
939: ISRestoreIndices(isrow,&r);
941: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
942: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
943: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
944: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
946: A->assembled = PETSC_TRUE;
947: A->preallocated = PETSC_TRUE;
949: PetscLogFlops(A->cmap->n);
950: if (sctx.nshift) {
951: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
952: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
953: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
954: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
955: }
956: }
957: return(0);
958: }
960: /* ----------------------------------------------------------- */
963: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
964: {
966: Mat C;
969: MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);
970: MatLUFactorSymbolic(C,A,row,col,info);
971: MatLUFactorNumeric(C,A,info);
973: A->ops->solve = C->ops->solve;
974: A->ops->solvetranspose = C->ops->solvetranspose;
976: MatHeaderMerge(A,C);
977: PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
978: return(0);
979: }
980: /* ----------------------------------------------------------- */
985: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
986: {
987: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
988: IS iscol = a->col,isrow = a->row;
989: PetscErrorCode ierr;
990: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
991: PetscInt nz;
992: const PetscInt *rout,*cout,*r,*c;
993: PetscScalar *x,*tmp,*tmps,sum;
994: const PetscScalar *b;
995: const MatScalar *aa = a->a,*v;
998: if (!n) return(0);
1000: VecGetArrayRead(bb,&b);
1001: VecGetArray(xx,&x);
1002: tmp = a->solve_work;
1004: ISGetIndices(isrow,&rout); r = rout;
1005: ISGetIndices(iscol,&cout); c = cout + (n-1);
1007: /* forward solve the lower triangular */
1008: tmp[0] = b[*r++];
1009: tmps = tmp;
1010: for (i=1; i<n; i++) {
1011: v = aa + ai[i];
1012: vi = aj + ai[i];
1013: nz = a->diag[i] - ai[i];
1014: sum = b[*r++];
1015: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1016: tmp[i] = sum;
1017: }
1019: /* backward solve the upper triangular */
1020: for (i=n-1; i>=0; i--) {
1021: v = aa + a->diag[i] + 1;
1022: vi = aj + a->diag[i] + 1;
1023: nz = ai[i+1] - a->diag[i] - 1;
1024: sum = tmp[i];
1025: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1026: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1027: }
1029: ISRestoreIndices(isrow,&rout);
1030: ISRestoreIndices(iscol,&cout);
1031: VecRestoreArrayRead(bb,&b);
1032: VecRestoreArray(xx,&x);
1033: PetscLogFlops(2.0*a->nz - A->cmap->n);
1034: return(0);
1035: }
1039: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1040: {
1041: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1042: IS iscol = a->col,isrow = a->row;
1043: PetscErrorCode ierr;
1044: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1045: PetscInt nz,neq;
1046: const PetscInt *rout,*cout,*r,*c;
1047: PetscScalar *x,*b,*tmp,*tmps,sum;
1048: const MatScalar *aa = a->a,*v;
1049: PetscBool bisdense,xisdense;
1052: if (!n) return(0);
1054: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1055: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1056: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1057: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1059: MatDenseGetArray(B,&b);
1060: MatDenseGetArray(X,&x);
1062: tmp = a->solve_work;
1063: ISGetIndices(isrow,&rout); r = rout;
1064: ISGetIndices(iscol,&cout); c = cout;
1066: for (neq=0; neq<B->cmap->n; neq++) {
1067: /* forward solve the lower triangular */
1068: tmp[0] = b[r[0]];
1069: tmps = tmp;
1070: for (i=1; i<n; i++) {
1071: v = aa + ai[i];
1072: vi = aj + ai[i];
1073: nz = a->diag[i] - ai[i];
1074: sum = b[r[i]];
1075: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1076: tmp[i] = sum;
1077: }
1078: /* backward solve the upper triangular */
1079: for (i=n-1; i>=0; i--) {
1080: v = aa + a->diag[i] + 1;
1081: vi = aj + a->diag[i] + 1;
1082: nz = ai[i+1] - a->diag[i] - 1;
1083: sum = tmp[i];
1084: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1085: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1086: }
1088: b += n;
1089: x += n;
1090: }
1091: ISRestoreIndices(isrow,&rout);
1092: ISRestoreIndices(iscol,&cout);
1093: MatDenseRestoreArray(B,&b);
1094: MatDenseRestoreArray(X,&x);
1095: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1096: return(0);
1097: }
1101: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1102: {
1103: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1104: IS iscol = a->col,isrow = a->row;
1105: PetscErrorCode ierr;
1106: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1107: PetscInt nz,neq;
1108: const PetscInt *rout,*cout,*r,*c;
1109: PetscScalar *x,*b,*tmp,sum;
1110: const MatScalar *aa = a->a,*v;
1111: PetscBool bisdense,xisdense;
1114: if (!n) return(0);
1116: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1117: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1118: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1119: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1121: MatDenseGetArray(B,&b);
1122: MatDenseGetArray(X,&x);
1124: tmp = a->solve_work;
1125: ISGetIndices(isrow,&rout); r = rout;
1126: ISGetIndices(iscol,&cout); c = cout;
1128: for (neq=0; neq<B->cmap->n; neq++) {
1129: /* forward solve the lower triangular */
1130: tmp[0] = b[r[0]];
1131: v = aa;
1132: vi = aj;
1133: for (i=1; i<n; i++) {
1134: nz = ai[i+1] - ai[i];
1135: sum = b[r[i]];
1136: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1137: tmp[i] = sum;
1138: v += nz; vi += nz;
1139: }
1141: /* backward solve the upper triangular */
1142: for (i=n-1; i>=0; i--) {
1143: v = aa + adiag[i+1]+1;
1144: vi = aj + adiag[i+1]+1;
1145: nz = adiag[i]-adiag[i+1]-1;
1146: sum = tmp[i];
1147: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1148: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1149: }
1151: b += n;
1152: x += n;
1153: }
1154: ISRestoreIndices(isrow,&rout);
1155: ISRestoreIndices(iscol,&cout);
1156: MatDenseRestoreArray(B,&b);
1157: MatDenseRestoreArray(X,&x);
1158: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1159: return(0);
1160: }
1164: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1165: {
1166: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1167: IS iscol = a->col,isrow = a->row;
1168: PetscErrorCode ierr;
1169: const PetscInt *r,*c,*rout,*cout;
1170: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1171: PetscInt nz,row;
1172: PetscScalar *x,*b,*tmp,*tmps,sum;
1173: const MatScalar *aa = a->a,*v;
1176: if (!n) return(0);
1178: VecGetArray(bb,&b);
1179: VecGetArray(xx,&x);
1180: tmp = a->solve_work;
1182: ISGetIndices(isrow,&rout); r = rout;
1183: ISGetIndices(iscol,&cout); c = cout + (n-1);
1185: /* forward solve the lower triangular */
1186: tmp[0] = b[*r++];
1187: tmps = tmp;
1188: for (row=1; row<n; row++) {
1189: i = rout[row]; /* permuted row */
1190: v = aa + ai[i];
1191: vi = aj + ai[i];
1192: nz = a->diag[i] - ai[i];
1193: sum = b[*r++];
1194: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1195: tmp[row] = sum;
1196: }
1198: /* backward solve the upper triangular */
1199: for (row=n-1; row>=0; row--) {
1200: i = rout[row]; /* permuted row */
1201: v = aa + a->diag[i] + 1;
1202: vi = aj + a->diag[i] + 1;
1203: nz = ai[i+1] - a->diag[i] - 1;
1204: sum = tmp[row];
1205: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1206: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1207: }
1209: ISRestoreIndices(isrow,&rout);
1210: ISRestoreIndices(iscol,&cout);
1211: VecRestoreArray(bb,&b);
1212: VecRestoreArray(xx,&x);
1213: PetscLogFlops(2.0*a->nz - A->cmap->n);
1214: return(0);
1215: }
1217: /* ----------------------------------------------------------- */
1218: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1221: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1222: {
1223: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1224: PetscErrorCode ierr;
1225: PetscInt n = A->rmap->n;
1226: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag;
1227: PetscScalar *x;
1228: const PetscScalar *b;
1229: const MatScalar *aa = a->a;
1230: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1231: PetscInt adiag_i,i,nz,ai_i;
1232: const PetscInt *vi;
1233: const MatScalar *v;
1234: PetscScalar sum;
1235: #endif
1238: if (!n) return(0);
1240: VecGetArrayRead(bb,&b);
1241: VecGetArray(xx,&x);
1243: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1244: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1245: #else
1246: /* forward solve the lower triangular */
1247: x[0] = b[0];
1248: for (i=1; i<n; i++) {
1249: ai_i = ai[i];
1250: v = aa + ai_i;
1251: vi = aj + ai_i;
1252: nz = adiag[i] - ai_i;
1253: sum = b[i];
1254: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1255: x[i] = sum;
1256: }
1258: /* backward solve the upper triangular */
1259: for (i=n-1; i>=0; i--) {
1260: adiag_i = adiag[i];
1261: v = aa + adiag_i + 1;
1262: vi = aj + adiag_i + 1;
1263: nz = ai[i+1] - adiag_i - 1;
1264: sum = x[i];
1265: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1266: x[i] = sum*aa[adiag_i];
1267: }
1268: #endif
1269: PetscLogFlops(2.0*a->nz - A->cmap->n);
1270: VecRestoreArrayRead(bb,&b);
1271: VecRestoreArray(xx,&x);
1272: return(0);
1273: }
1277: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1278: {
1279: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1280: IS iscol = a->col,isrow = a->row;
1281: PetscErrorCode ierr;
1282: PetscInt i, n = A->rmap->n,j;
1283: PetscInt nz;
1284: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1285: PetscScalar *x,*tmp,sum;
1286: const PetscScalar *b;
1287: const MatScalar *aa = a->a,*v;
1290: if (yy != xx) {VecCopy(yy,xx);}
1292: VecGetArrayRead(bb,&b);
1293: VecGetArray(xx,&x);
1294: tmp = a->solve_work;
1296: ISGetIndices(isrow,&rout); r = rout;
1297: ISGetIndices(iscol,&cout); c = cout + (n-1);
1299: /* forward solve the lower triangular */
1300: tmp[0] = b[*r++];
1301: for (i=1; i<n; i++) {
1302: v = aa + ai[i];
1303: vi = aj + ai[i];
1304: nz = a->diag[i] - ai[i];
1305: sum = b[*r++];
1306: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1307: tmp[i] = sum;
1308: }
1310: /* backward solve the upper triangular */
1311: for (i=n-1; i>=0; i--) {
1312: v = aa + a->diag[i] + 1;
1313: vi = aj + a->diag[i] + 1;
1314: nz = ai[i+1] - a->diag[i] - 1;
1315: sum = tmp[i];
1316: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1317: tmp[i] = sum*aa[a->diag[i]];
1318: x[*c--] += tmp[i];
1319: }
1321: ISRestoreIndices(isrow,&rout);
1322: ISRestoreIndices(iscol,&cout);
1323: VecRestoreArrayRead(bb,&b);
1324: VecRestoreArray(xx,&x);
1325: PetscLogFlops(2.0*a->nz);
1326: return(0);
1327: }
1331: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1332: {
1333: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1334: IS iscol = a->col,isrow = a->row;
1335: PetscErrorCode ierr;
1336: PetscInt i, n = A->rmap->n,j;
1337: PetscInt nz;
1338: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1339: PetscScalar *x,*tmp,sum;
1340: const PetscScalar *b;
1341: const MatScalar *aa = a->a,*v;
1344: if (yy != xx) {VecCopy(yy,xx);}
1346: VecGetArrayRead(bb,&b);
1347: VecGetArray(xx,&x);
1348: tmp = a->solve_work;
1350: ISGetIndices(isrow,&rout); r = rout;
1351: ISGetIndices(iscol,&cout); c = cout;
1353: /* forward solve the lower triangular */
1354: tmp[0] = b[r[0]];
1355: v = aa;
1356: vi = aj;
1357: for (i=1; i<n; i++) {
1358: nz = ai[i+1] - ai[i];
1359: sum = b[r[i]];
1360: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1361: tmp[i] = sum;
1362: v += nz;
1363: vi += nz;
1364: }
1366: /* backward solve the upper triangular */
1367: v = aa + adiag[n-1];
1368: vi = aj + adiag[n-1];
1369: for (i=n-1; i>=0; i--) {
1370: nz = adiag[i] - adiag[i+1] - 1;
1371: sum = tmp[i];
1372: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1373: tmp[i] = sum*v[nz];
1374: x[c[i]] += tmp[i];
1375: v += nz+1; vi += nz+1;
1376: }
1378: ISRestoreIndices(isrow,&rout);
1379: ISRestoreIndices(iscol,&cout);
1380: VecRestoreArrayRead(bb,&b);
1381: VecRestoreArray(xx,&x);
1382: PetscLogFlops(2.0*a->nz);
1383: return(0);
1384: }
1388: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1389: {
1390: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1391: IS iscol = a->col,isrow = a->row;
1392: PetscErrorCode ierr;
1393: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1394: PetscInt i,n = A->rmap->n,j;
1395: PetscInt nz;
1396: PetscScalar *x,*tmp,s1;
1397: const MatScalar *aa = a->a,*v;
1398: const PetscScalar *b;
1401: VecGetArrayRead(bb,&b);
1402: VecGetArray(xx,&x);
1403: tmp = a->solve_work;
1405: ISGetIndices(isrow,&rout); r = rout;
1406: ISGetIndices(iscol,&cout); c = cout;
1408: /* copy the b into temp work space according to permutation */
1409: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1411: /* forward solve the U^T */
1412: for (i=0; i<n; i++) {
1413: v = aa + diag[i];
1414: vi = aj + diag[i] + 1;
1415: nz = ai[i+1] - diag[i] - 1;
1416: s1 = tmp[i];
1417: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1418: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1419: tmp[i] = s1;
1420: }
1422: /* backward solve the L^T */
1423: for (i=n-1; i>=0; i--) {
1424: v = aa + diag[i] - 1;
1425: vi = aj + diag[i] - 1;
1426: nz = diag[i] - ai[i];
1427: s1 = tmp[i];
1428: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1429: }
1431: /* copy tmp into x according to permutation */
1432: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1434: ISRestoreIndices(isrow,&rout);
1435: ISRestoreIndices(iscol,&cout);
1436: VecRestoreArrayRead(bb,&b);
1437: VecRestoreArray(xx,&x);
1439: PetscLogFlops(2.0*a->nz-A->cmap->n);
1440: return(0);
1441: }
1445: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1446: {
1447: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1448: IS iscol = a->col,isrow = a->row;
1449: PetscErrorCode ierr;
1450: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1451: PetscInt i,n = A->rmap->n,j;
1452: PetscInt nz;
1453: PetscScalar *x,*tmp,s1;
1454: const MatScalar *aa = a->a,*v;
1455: const PetscScalar *b;
1458: VecGetArrayRead(bb,&b);
1459: VecGetArray(xx,&x);
1460: tmp = a->solve_work;
1462: ISGetIndices(isrow,&rout); r = rout;
1463: ISGetIndices(iscol,&cout); c = cout;
1465: /* copy the b into temp work space according to permutation */
1466: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1468: /* forward solve the U^T */
1469: for (i=0; i<n; i++) {
1470: v = aa + adiag[i+1] + 1;
1471: vi = aj + adiag[i+1] + 1;
1472: nz = adiag[i] - adiag[i+1] - 1;
1473: s1 = tmp[i];
1474: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1475: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1476: tmp[i] = s1;
1477: }
1479: /* backward solve the L^T */
1480: for (i=n-1; i>=0; i--) {
1481: v = aa + ai[i];
1482: vi = aj + ai[i];
1483: nz = ai[i+1] - ai[i];
1484: s1 = tmp[i];
1485: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1486: }
1488: /* copy tmp into x according to permutation */
1489: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1491: ISRestoreIndices(isrow,&rout);
1492: ISRestoreIndices(iscol,&cout);
1493: VecRestoreArrayRead(bb,&b);
1494: VecRestoreArray(xx,&x);
1496: PetscLogFlops(2.0*a->nz-A->cmap->n);
1497: return(0);
1498: }
1502: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1503: {
1504: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1505: IS iscol = a->col,isrow = a->row;
1506: PetscErrorCode ierr;
1507: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1508: PetscInt i,n = A->rmap->n,j;
1509: PetscInt nz;
1510: PetscScalar *x,*tmp,s1;
1511: const MatScalar *aa = a->a,*v;
1512: const PetscScalar *b;
1515: if (zz != xx) {VecCopy(zz,xx);}
1516: VecGetArrayRead(bb,&b);
1517: VecGetArray(xx,&x);
1518: tmp = a->solve_work;
1520: ISGetIndices(isrow,&rout); r = rout;
1521: ISGetIndices(iscol,&cout); c = cout;
1523: /* copy the b into temp work space according to permutation */
1524: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1526: /* forward solve the U^T */
1527: for (i=0; i<n; i++) {
1528: v = aa + diag[i];
1529: vi = aj + diag[i] + 1;
1530: nz = ai[i+1] - diag[i] - 1;
1531: s1 = tmp[i];
1532: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1533: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1534: tmp[i] = s1;
1535: }
1537: /* backward solve the L^T */
1538: for (i=n-1; i>=0; i--) {
1539: v = aa + diag[i] - 1;
1540: vi = aj + diag[i] - 1;
1541: nz = diag[i] - ai[i];
1542: s1 = tmp[i];
1543: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1544: }
1546: /* copy tmp into x according to permutation */
1547: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1549: ISRestoreIndices(isrow,&rout);
1550: ISRestoreIndices(iscol,&cout);
1551: VecRestoreArrayRead(bb,&b);
1552: VecRestoreArray(xx,&x);
1554: PetscLogFlops(2.0*a->nz-A->cmap->n);
1555: return(0);
1556: }
1560: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1561: {
1562: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1563: IS iscol = a->col,isrow = a->row;
1564: PetscErrorCode ierr;
1565: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1566: PetscInt i,n = A->rmap->n,j;
1567: PetscInt nz;
1568: PetscScalar *x,*tmp,s1;
1569: const MatScalar *aa = a->a,*v;
1570: const PetscScalar *b;
1573: if (zz != xx) {VecCopy(zz,xx);}
1574: VecGetArrayRead(bb,&b);
1575: VecGetArray(xx,&x);
1576: tmp = a->solve_work;
1578: ISGetIndices(isrow,&rout); r = rout;
1579: ISGetIndices(iscol,&cout); c = cout;
1581: /* copy the b into temp work space according to permutation */
1582: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1584: /* forward solve the U^T */
1585: for (i=0; i<n; i++) {
1586: v = aa + adiag[i+1] + 1;
1587: vi = aj + adiag[i+1] + 1;
1588: nz = adiag[i] - adiag[i+1] - 1;
1589: s1 = tmp[i];
1590: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1591: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1592: tmp[i] = s1;
1593: }
1596: /* backward solve the L^T */
1597: for (i=n-1; i>=0; i--) {
1598: v = aa + ai[i];
1599: vi = aj + ai[i];
1600: nz = ai[i+1] - ai[i];
1601: s1 = tmp[i];
1602: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1603: }
1605: /* copy tmp into x according to permutation */
1606: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1608: ISRestoreIndices(isrow,&rout);
1609: ISRestoreIndices(iscol,&cout);
1610: VecRestoreArrayRead(bb,&b);
1611: VecRestoreArray(xx,&x);
1613: PetscLogFlops(2.0*a->nz-A->cmap->n);
1614: return(0);
1615: }
1617: /* ----------------------------------------------------------------*/
1619: extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool);
1621: /*
1622: ilu() under revised new data structure.
1623: Factored arrays bj and ba are stored as
1624: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1626: bi=fact->i is an array of size n+1, in which
1627: bi+
1628: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1629: bi[n]: points to L(n-1,n-1)+1
1631: bdiag=fact->diag is an array of size n+1,in which
1632: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1633: bdiag[n]: points to entry of U(n-1,0)-1
1635: U(i,:) contains bdiag[i] as its last entry, i.e.,
1636: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1637: */
1640: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1641: {
1643: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1645: const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1646: PetscInt i,j,k=0,nz,*bi,*bj,*bdiag;
1647: PetscBool missing;
1648: IS isicol;
1651: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1652: MatMissingDiagonal(A,&missing,&i);
1653: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1654: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1655: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);
1656: b = (Mat_SeqAIJ*)(fact)->data;
1658: /* allocate matrix arrays for new data structure */
1659: PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);
1660: PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));
1662: b->singlemalloc = PETSC_TRUE;
1663: if (!b->diag) {
1664: PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);
1665: PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));
1666: }
1667: bdiag = b->diag;
1669: if (n > 0) {
1670: PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));
1671: }
1673: /* set bi and bj with new data structure */
1674: bi = b->i;
1675: bj = b->j;
1677: /* L part */
1678: bi[0] = 0;
1679: for (i=0; i<n; i++) {
1680: nz = adiag[i] - ai[i];
1681: bi[i+1] = bi[i] + nz;
1682: aj = a->j + ai[i];
1683: for (j=0; j<nz; j++) {
1684: /* *bj = aj[j]; bj++; */
1685: bj[k++] = aj[j];
1686: }
1687: }
1689: /* U part */
1690: bdiag[n] = bi[n]-1;
1691: for (i=n-1; i>=0; i--) {
1692: nz = ai[i+1] - adiag[i] - 1;
1693: aj = a->j + adiag[i] + 1;
1694: for (j=0; j<nz; j++) {
1695: /* *bj = aj[j]; bj++; */
1696: bj[k++] = aj[j];
1697: }
1698: /* diag[i] */
1699: /* *bj = i; bj++; */
1700: bj[k++] = i;
1701: bdiag[i] = bdiag[i+1] + nz + 1;
1702: }
1704: fact->factortype = MAT_FACTOR_ILU;
1705: fact->info.factor_mallocs = 0;
1706: fact->info.fill_ratio_given = info->fill;
1707: fact->info.fill_ratio_needed = 1.0;
1708: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1710: b = (Mat_SeqAIJ*)(fact)->data;
1711: b->row = isrow;
1712: b->col = iscol;
1713: b->icol = isicol;
1714: PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);
1715: PetscObjectReference((PetscObject)isrow);
1716: PetscObjectReference((PetscObject)iscol);
1717: return(0);
1718: }
1722: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1723: {
1724: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1725: IS isicol;
1726: PetscErrorCode ierr;
1727: const PetscInt *r,*ic;
1728: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1729: PetscInt *bi,*cols,nnz,*cols_lvl;
1730: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1731: PetscInt i,levels,diagonal_fill;
1732: PetscBool col_identity,row_identity;
1733: PetscReal f;
1734: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1735: PetscBT lnkbt;
1736: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1737: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1738: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1741: /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1742: /*
1743: PetscBool olddatastruct=PETSC_FALSE;
1744: PetscOptionsGetBool(NULL,"-ilu_old",&olddatastruct,NULL);
1745: if (olddatastruct) {
1746: MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);
1747: return(0);
1748: }
1749: */
1751: levels = (PetscInt)info->levels;
1752: ISIdentity(isrow,&row_identity);
1753: ISIdentity(iscol,&col_identity);
1754: if (!levels && row_identity && col_identity) {
1755: /* special case: ilu(0) with natural ordering */
1756: MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);
1757: if (a->inode.size) {
1758: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1759: }
1760: return(0);
1761: }
1763: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1764: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1765: ISGetIndices(isrow,&r);
1766: ISGetIndices(isicol,&ic);
1768: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1769: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1770: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1771: bi[0] = bdiag[0] = 0;
1772: PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);
1774: /* create a linked list for storing column indices of the active row */
1775: nlnk = n + 1;
1776: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1778: /* initial FreeSpace size is f*(ai[n]+1) */
1779: f = info->fill;
1780: diagonal_fill = (PetscInt)info->diagonal_fill;
1781: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1782: current_space = free_space;
1783: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1784: current_space_lvl = free_space_lvl;
1785: for (i=0; i<n; i++) {
1786: nzi = 0;
1787: /* copy current row into linked list */
1788: nnz = ai[r[i]+1] - ai[r[i]];
1789: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1790: cols = aj + ai[r[i]];
1791: lnk[i] = -1; /* marker to indicate if diagonal exists */
1792: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1793: nzi += nlnk;
1795: /* make sure diagonal entry is included */
1796: if (diagonal_fill && lnk[i] == -1) {
1797: fm = n;
1798: while (lnk[fm] < i) fm = lnk[fm];
1799: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1800: lnk[fm] = i;
1801: lnk_lvl[i] = 0;
1802: nzi++; dcount++;
1803: }
1805: /* add pivot rows into the active row */
1806: nzbd = 0;
1807: prow = lnk[n];
1808: while (prow < i) {
1809: nnz = bdiag[prow];
1810: cols = bj_ptr[prow] + nnz + 1;
1811: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1812: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1813: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1814: nzi += nlnk;
1815: prow = lnk[prow];
1816: nzbd++;
1817: }
1818: bdiag[i] = nzbd;
1819: bi[i+1] = bi[i] + nzi;
1820: /* if free space is not available, make more free space */
1821: if (current_space->local_remaining<nzi) {
1822: nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1823: PetscFreeSpaceGet(nnz,¤t_space);
1824: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1825: reallocs++;
1826: }
1828: /* copy data into free_space and free_space_lvl, then initialize lnk */
1829: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1830: bj_ptr[i] = current_space->array;
1831: bjlvl_ptr[i] = current_space_lvl->array;
1833: /* make sure the active row i has diagonal entry */
1834: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1836: current_space->array += nzi;
1837: current_space->local_used += nzi;
1838: current_space->local_remaining -= nzi;
1839: current_space_lvl->array += nzi;
1840: current_space_lvl->local_used += nzi;
1841: current_space_lvl->local_remaining -= nzi;
1842: }
1844: ISRestoreIndices(isrow,&r);
1845: ISRestoreIndices(isicol,&ic);
1846: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1847: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
1848: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
1850: PetscIncompleteLLDestroy(lnk,lnkbt);
1851: PetscFreeSpaceDestroy(free_space_lvl);
1852: PetscFree2(bj_ptr,bjlvl_ptr);
1854: #if defined(PETSC_USE_INFO)
1855: {
1856: PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1857: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
1858: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
1859: PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
1860: PetscInfo(A,"for best performance.\n");
1861: if (diagonal_fill) {
1862: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
1863: }
1864: }
1865: #endif
1866: /* put together the new matrix */
1867: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
1868: PetscLogObjectParent(fact,isicol);
1869: b = (Mat_SeqAIJ*)(fact)->data;
1871: b->free_a = PETSC_TRUE;
1872: b->free_ij = PETSC_TRUE;
1873: b->singlemalloc = PETSC_FALSE;
1875: PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);
1877: b->j = bj;
1878: b->i = bi;
1879: b->diag = bdiag;
1880: b->ilen = 0;
1881: b->imax = 0;
1882: b->row = isrow;
1883: b->col = iscol;
1884: PetscObjectReference((PetscObject)isrow);
1885: PetscObjectReference((PetscObject)iscol);
1886: b->icol = isicol;
1888: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1889: /* In b structure: Free imax, ilen, old a, old j.
1890: Allocate bdiag, solve_work, new a, new j */
1891: PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
1892: b->maxnz = b->nz = bdiag[0]+1;
1894: (fact)->info.factor_mallocs = reallocs;
1895: (fact)->info.fill_ratio_given = f;
1896: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1897: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1898: if (a->inode.size) {
1899: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1900: }
1901: return(0);
1902: }
1906: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1907: {
1908: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1909: IS isicol;
1910: PetscErrorCode ierr;
1911: const PetscInt *r,*ic;
1912: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1913: PetscInt *bi,*cols,nnz,*cols_lvl;
1914: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1915: PetscInt i,levels,diagonal_fill;
1916: PetscBool col_identity,row_identity;
1917: PetscReal f;
1918: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1919: PetscBT lnkbt;
1920: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1921: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1922: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1923: PetscBool missing;
1926: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1927: f = info->fill;
1928: levels = (PetscInt)info->levels;
1929: diagonal_fill = (PetscInt)info->diagonal_fill;
1931: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1933: ISIdentity(isrow,&row_identity);
1934: ISIdentity(iscol,&col_identity);
1935: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1936: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);
1938: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1939: if (a->inode.size) {
1940: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1941: }
1942: fact->factortype = MAT_FACTOR_ILU;
1943: (fact)->info.factor_mallocs = 0;
1944: (fact)->info.fill_ratio_given = info->fill;
1945: (fact)->info.fill_ratio_needed = 1.0;
1947: b = (Mat_SeqAIJ*)(fact)->data;
1948: MatMissingDiagonal(A,&missing,&d);
1949: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1950: b->row = isrow;
1951: b->col = iscol;
1952: b->icol = isicol;
1953: PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);
1954: PetscObjectReference((PetscObject)isrow);
1955: PetscObjectReference((PetscObject)iscol);
1956: return(0);
1957: }
1959: ISGetIndices(isrow,&r);
1960: ISGetIndices(isicol,&ic);
1962: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1963: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1964: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1965: bi[0] = bdiag[0] = 0;
1967: PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);
1969: /* create a linked list for storing column indices of the active row */
1970: nlnk = n + 1;
1971: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1973: /* initial FreeSpace size is f*(ai[n]+1) */
1974: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1975: current_space = free_space;
1976: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1977: current_space_lvl = free_space_lvl;
1979: for (i=0; i<n; i++) {
1980: nzi = 0;
1981: /* copy current row into linked list */
1982: nnz = ai[r[i]+1] - ai[r[i]];
1983: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1984: cols = aj + ai[r[i]];
1985: lnk[i] = -1; /* marker to indicate if diagonal exists */
1986: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1987: nzi += nlnk;
1989: /* make sure diagonal entry is included */
1990: if (diagonal_fill && lnk[i] == -1) {
1991: fm = n;
1992: while (lnk[fm] < i) fm = lnk[fm];
1993: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1994: lnk[fm] = i;
1995: lnk_lvl[i] = 0;
1996: nzi++; dcount++;
1997: }
1999: /* add pivot rows into the active row */
2000: nzbd = 0;
2001: prow = lnk[n];
2002: while (prow < i) {
2003: nnz = bdiag[prow];
2004: cols = bj_ptr[prow] + nnz + 1;
2005: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
2006: nnz = bi[prow+1] - bi[prow] - nnz - 1;
2007: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
2008: nzi += nlnk;
2009: prow = lnk[prow];
2010: nzbd++;
2011: }
2012: bdiag[i] = nzbd;
2013: bi[i+1] = bi[i] + nzi;
2015: /* if free space is not available, make more free space */
2016: if (current_space->local_remaining<nzi) {
2017: nnz = nzi*(n - i); /* estimated and max additional space needed */
2018: PetscFreeSpaceGet(nnz,¤t_space);
2019: PetscFreeSpaceGet(nnz,¤t_space_lvl);
2020: reallocs++;
2021: }
2023: /* copy data into free_space and free_space_lvl, then initialize lnk */
2024: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2025: bj_ptr[i] = current_space->array;
2026: bjlvl_ptr[i] = current_space_lvl->array;
2028: /* make sure the active row i has diagonal entry */
2029: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2031: current_space->array += nzi;
2032: current_space->local_used += nzi;
2033: current_space->local_remaining -= nzi;
2034: current_space_lvl->array += nzi;
2035: current_space_lvl->local_used += nzi;
2036: current_space_lvl->local_remaining -= nzi;
2037: }
2039: ISRestoreIndices(isrow,&r);
2040: ISRestoreIndices(isicol,&ic);
2042: /* destroy list of free space and other temporary arrays */
2043: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
2044: PetscFreeSpaceContiguous(&free_space,bj); /* copy free_space -> bj */
2045: PetscIncompleteLLDestroy(lnk,lnkbt);
2046: PetscFreeSpaceDestroy(free_space_lvl);
2047: PetscFree2(bj_ptr,bjlvl_ptr);
2049: #if defined(PETSC_USE_INFO)
2050: {
2051: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2052: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
2053: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
2054: PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
2055: PetscInfo(A,"for best performance.\n");
2056: if (diagonal_fill) {
2057: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
2058: }
2059: }
2060: #endif
2062: /* put together the new matrix */
2063: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
2064: PetscLogObjectParent(fact,isicol);
2065: b = (Mat_SeqAIJ*)(fact)->data;
2067: b->free_a = PETSC_TRUE;
2068: b->free_ij = PETSC_TRUE;
2069: b->singlemalloc = PETSC_FALSE;
2071: PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);
2072: b->j = bj;
2073: b->i = bi;
2074: for (i=0; i<n; i++) bdiag[i] += bi[i];
2075: b->diag = bdiag;
2076: b->ilen = 0;
2077: b->imax = 0;
2078: b->row = isrow;
2079: b->col = iscol;
2080: PetscObjectReference((PetscObject)isrow);
2081: PetscObjectReference((PetscObject)iscol);
2082: b->icol = isicol;
2083: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
2084: /* In b structure: Free imax, ilen, old a, old j.
2085: Allocate bdiag, solve_work, new a, new j */
2086: PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
2087: b->maxnz = b->nz = bi[n];
2089: (fact)->info.factor_mallocs = reallocs;
2090: (fact)->info.fill_ratio_given = f;
2091: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2092: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2093: if (a->inode.size) {
2094: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2095: }
2096: return(0);
2097: }
2101: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2102: {
2103: Mat C = B;
2104: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2105: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2106: IS ip=b->row,iip = b->icol;
2108: const PetscInt *rip,*riip;
2109: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2110: PetscInt *ai=a->i,*aj=a->j;
2111: PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2112: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2113: PetscBool perm_identity;
2114: FactorShiftCtx sctx;
2115: PetscReal rs;
2116: MatScalar d,*v;
2119: /* MatPivotSetUp(): initialize shift context sctx */
2120: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2122: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2123: sctx.shift_top = info->zeropivot;
2124: for (i=0; i<mbs; i++) {
2125: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2126: d = (aa)[a->diag[i]];
2127: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2128: v = aa+ai[i];
2129: nz = ai[i+1] - ai[i];
2130: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2131: if (rs>sctx.shift_top) sctx.shift_top = rs;
2132: }
2133: sctx.shift_top *= 1.1;
2134: sctx.nshift_max = 5;
2135: sctx.shift_lo = 0.;
2136: sctx.shift_hi = 1.;
2137: }
2139: ISGetIndices(ip,&rip);
2140: ISGetIndices(iip,&riip);
2142: /* allocate working arrays
2143: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2144: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2145: */
2146: PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);
2148: do {
2149: sctx.newshift = PETSC_FALSE;
2151: for (i=0; i<mbs; i++) c2r[i] = mbs;
2152: if (mbs) il[0] = 0;
2154: for (k = 0; k<mbs; k++) {
2155: /* zero rtmp */
2156: nz = bi[k+1] - bi[k];
2157: bjtmp = bj + bi[k];
2158: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2160: /* load in initial unfactored row */
2161: bval = ba + bi[k];
2162: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2163: for (j = jmin; j < jmax; j++) {
2164: col = riip[aj[j]];
2165: if (col >= k) { /* only take upper triangular entry */
2166: rtmp[col] = aa[j];
2167: *bval++ = 0.0; /* for in-place factorization */
2168: }
2169: }
2170: /* shift the diagonal of the matrix: ZeropivotApply() */
2171: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2173: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2174: dk = rtmp[k];
2175: i = c2r[k]; /* first row to be added to k_th row */
2177: while (i < k) {
2178: nexti = c2r[i]; /* next row to be added to k_th row */
2180: /* compute multiplier, update diag(k) and U(i,k) */
2181: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2182: uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2183: dk += uikdi*ba[ili]; /* update diag[k] */
2184: ba[ili] = uikdi; /* -U(i,k) */
2186: /* add multiple of row i to k-th row */
2187: jmin = ili + 1; jmax = bi[i+1];
2188: if (jmin < jmax) {
2189: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2190: /* update il and c2r for row i */
2191: il[i] = jmin;
2192: j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2193: }
2194: i = nexti;
2195: }
2197: /* copy data into U(k,:) */
2198: rs = 0.0;
2199: jmin = bi[k]; jmax = bi[k+1]-1;
2200: if (jmin < jmax) {
2201: for (j=jmin; j<jmax; j++) {
2202: col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2203: }
2204: /* add the k-th row into il and c2r */
2205: il[k] = jmin;
2206: i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2207: }
2209: /* MatPivotCheck() */
2210: sctx.rs = rs;
2211: sctx.pv = dk;
2212: MatPivotCheck(A,info,&sctx,i);
2213: if (sctx.newshift) break;
2214: dk = sctx.pv;
2216: ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2217: }
2218: } while (sctx.newshift);
2220: PetscFree3(rtmp,il,c2r);
2221: ISRestoreIndices(ip,&rip);
2222: ISRestoreIndices(iip,&riip);
2224: ISIdentity(ip,&perm_identity);
2225: if (perm_identity) {
2226: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2227: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2228: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2229: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2230: } else {
2231: B->ops->solve = MatSolve_SeqSBAIJ_1;
2232: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2233: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2234: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2235: }
2237: C->assembled = PETSC_TRUE;
2238: C->preallocated = PETSC_TRUE;
2240: PetscLogFlops(C->rmap->n);
2242: /* MatPivotView() */
2243: if (sctx.nshift) {
2244: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2245: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
2246: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2247: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
2248: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2249: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);
2250: }
2251: }
2252: return(0);
2253: }
2257: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2258: {
2259: Mat C = B;
2260: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2261: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2262: IS ip=b->row,iip = b->icol;
2264: const PetscInt *rip,*riip;
2265: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2266: PetscInt *ai=a->i,*aj=a->j;
2267: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2268: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2269: PetscBool perm_identity;
2270: FactorShiftCtx sctx;
2271: PetscReal rs;
2272: MatScalar d,*v;
2275: /* MatPivotSetUp(): initialize shift context sctx */
2276: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2278: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2279: sctx.shift_top = info->zeropivot;
2280: for (i=0; i<mbs; i++) {
2281: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2282: d = (aa)[a->diag[i]];
2283: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2284: v = aa+ai[i];
2285: nz = ai[i+1] - ai[i];
2286: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2287: if (rs>sctx.shift_top) sctx.shift_top = rs;
2288: }
2289: sctx.shift_top *= 1.1;
2290: sctx.nshift_max = 5;
2291: sctx.shift_lo = 0.;
2292: sctx.shift_hi = 1.;
2293: }
2295: ISGetIndices(ip,&rip);
2296: ISGetIndices(iip,&riip);
2298: /* initialization */
2299: PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);
2301: do {
2302: sctx.newshift = PETSC_FALSE;
2304: for (i=0; i<mbs; i++) jl[i] = mbs;
2305: il[0] = 0;
2307: for (k = 0; k<mbs; k++) {
2308: /* zero rtmp */
2309: nz = bi[k+1] - bi[k];
2310: bjtmp = bj + bi[k];
2311: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2313: bval = ba + bi[k];
2314: /* initialize k-th row by the perm[k]-th row of A */
2315: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2316: for (j = jmin; j < jmax; j++) {
2317: col = riip[aj[j]];
2318: if (col >= k) { /* only take upper triangular entry */
2319: rtmp[col] = aa[j];
2320: *bval++ = 0.0; /* for in-place factorization */
2321: }
2322: }
2323: /* shift the diagonal of the matrix */
2324: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2326: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2327: dk = rtmp[k];
2328: i = jl[k]; /* first row to be added to k_th row */
2330: while (i < k) {
2331: nexti = jl[i]; /* next row to be added to k_th row */
2333: /* compute multiplier, update diag(k) and U(i,k) */
2334: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2335: uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2336: dk += uikdi*ba[ili];
2337: ba[ili] = uikdi; /* -U(i,k) */
2339: /* add multiple of row i to k-th row */
2340: jmin = ili + 1; jmax = bi[i+1];
2341: if (jmin < jmax) {
2342: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2343: /* update il and jl for row i */
2344: il[i] = jmin;
2345: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2346: }
2347: i = nexti;
2348: }
2350: /* shift the diagonals when zero pivot is detected */
2351: /* compute rs=sum of abs(off-diagonal) */
2352: rs = 0.0;
2353: jmin = bi[k]+1;
2354: nz = bi[k+1] - jmin;
2355: bcol = bj + jmin;
2356: for (j=0; j<nz; j++) {
2357: rs += PetscAbsScalar(rtmp[bcol[j]]);
2358: }
2360: sctx.rs = rs;
2361: sctx.pv = dk;
2362: MatPivotCheck(A,info,&sctx,k);
2363: if (sctx.newshift) break;
2364: dk = sctx.pv;
2366: /* copy data into U(k,:) */
2367: ba[bi[k]] = 1.0/dk; /* U(k,k) */
2368: jmin = bi[k]+1; jmax = bi[k+1];
2369: if (jmin < jmax) {
2370: for (j=jmin; j<jmax; j++) {
2371: col = bj[j]; ba[j] = rtmp[col];
2372: }
2373: /* add the k-th row into il and jl */
2374: il[k] = jmin;
2375: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2376: }
2377: }
2378: } while (sctx.newshift);
2380: PetscFree3(rtmp,il,jl);
2381: ISRestoreIndices(ip,&rip);
2382: ISRestoreIndices(iip,&riip);
2384: ISIdentity(ip,&perm_identity);
2385: if (perm_identity) {
2386: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2387: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2388: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2389: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2390: } else {
2391: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2392: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2393: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2394: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2395: }
2397: C->assembled = PETSC_TRUE;
2398: C->preallocated = PETSC_TRUE;
2400: PetscLogFlops(C->rmap->n);
2401: if (sctx.nshift) {
2402: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2403: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
2404: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2405: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
2406: }
2407: }
2408: return(0);
2409: }
2411: /*
2412: icc() under revised new data structure.
2413: Factored arrays bj and ba are stored as
2414: U(0,:),...,U(i,:),U(n-1,:)
2416: ui=fact->i is an array of size n+1, in which
2417: ui+
2418: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2419: ui[n]: points to U(n-1,n-1)+1
2421: udiag=fact->diag is an array of size n,in which
2422: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2424: U(i,:) contains udiag[i] as its last entry, i.e.,
2425: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2426: */
2430: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2431: {
2432: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2433: Mat_SeqSBAIJ *b;
2434: PetscErrorCode ierr;
2435: PetscBool perm_identity,missing;
2436: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2437: const PetscInt *rip,*riip;
2438: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2439: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2440: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2441: PetscReal fill =info->fill,levels=info->levels;
2442: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2443: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2444: PetscBT lnkbt;
2445: IS iperm;
2448: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2449: MatMissingDiagonal(A,&missing,&d);
2450: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2451: ISIdentity(perm,&perm_identity);
2452: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2454: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2455: PetscMalloc((am+1)*sizeof(PetscInt),&udiag);
2456: ui[0] = 0;
2458: /* ICC(0) without matrix ordering: simply rearrange column indices */
2459: if (!levels && perm_identity) {
2460: for (i=0; i<am; i++) {
2461: ncols = ai[i+1] - a->diag[i];
2462: ui[i+1] = ui[i] + ncols;
2463: udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2464: }
2465: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2466: cols = uj;
2467: for (i=0; i<am; i++) {
2468: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2469: ncols = ai[i+1] - a->diag[i] -1;
2470: for (j=0; j<ncols; j++) *cols++ = aj[j];
2471: *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2472: }
2473: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2474: ISGetIndices(iperm,&riip);
2475: ISGetIndices(perm,&rip);
2477: /* initialization */
2478: PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);
2480: /* jl: linked list for storing indices of the pivot rows
2481: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2482: PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);
2483: for (i=0; i<am; i++) {
2484: jl[i] = am; il[i] = 0;
2485: }
2487: /* create and initialize a linked list for storing column indices of the active row k */
2488: nlnk = am + 1;
2489: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2491: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2492: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);
2493: current_space = free_space;
2494: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);
2495: current_space_lvl = free_space_lvl;
2497: for (k=0; k<am; k++) { /* for each active row k */
2498: /* initialize lnk by the column indices of row rip[k] of A */
2499: nzk = 0;
2500: ncols = ai[rip[k]+1] - ai[rip[k]];
2501: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2502: ncols_upper = 0;
2503: for (j=0; j<ncols; j++) {
2504: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2505: if (riip[i] >= k) { /* only take upper triangular entry */
2506: ajtmp[ncols_upper] = i;
2507: ncols_upper++;
2508: }
2509: }
2510: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2511: nzk += nlnk;
2513: /* update lnk by computing fill-in for each pivot row to be merged in */
2514: prow = jl[k]; /* 1st pivot row */
2516: while (prow < k) {
2517: nextprow = jl[prow];
2519: /* merge prow into k-th row */
2520: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2521: jmax = ui[prow+1];
2522: ncols = jmax-jmin;
2523: i = jmin - ui[prow];
2524: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2525: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2526: j = *(uj - 1);
2527: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2528: nzk += nlnk;
2530: /* update il and jl for prow */
2531: if (jmin < jmax) {
2532: il[prow] = jmin;
2533: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2534: }
2535: prow = nextprow;
2536: }
2538: /* if free space is not available, make more free space */
2539: if (current_space->local_remaining<nzk) {
2540: i = am - k + 1; /* num of unfactored rows */
2541: i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2542: PetscFreeSpaceGet(i,¤t_space);
2543: PetscFreeSpaceGet(i,¤t_space_lvl);
2544: reallocs++;
2545: }
2547: /* copy data into free_space and free_space_lvl, then initialize lnk */
2548: if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2549: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2551: /* add the k-th row into il and jl */
2552: if (nzk > 1) {
2553: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2554: jl[k] = jl[i]; jl[i] = k;
2555: il[k] = ui[k] + 1;
2556: }
2557: uj_ptr[k] = current_space->array;
2558: uj_lvl_ptr[k] = current_space_lvl->array;
2560: current_space->array += nzk;
2561: current_space->local_used += nzk;
2562: current_space->local_remaining -= nzk;
2564: current_space_lvl->array += nzk;
2565: current_space_lvl->local_used += nzk;
2566: current_space_lvl->local_remaining -= nzk;
2568: ui[k+1] = ui[k] + nzk;
2569: }
2571: ISRestoreIndices(perm,&rip);
2572: ISRestoreIndices(iperm,&riip);
2573: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2574: PetscFree(ajtmp);
2576: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2577: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2578: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2579: PetscIncompleteLLDestroy(lnk,lnkbt);
2580: PetscFreeSpaceDestroy(free_space_lvl);
2582: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2584: /* put together the new matrix in MATSEQSBAIJ format */
2585: b = (Mat_SeqSBAIJ*)(fact)->data;
2586: b->singlemalloc = PETSC_FALSE;
2588: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
2590: b->j = uj;
2591: b->i = ui;
2592: b->diag = udiag;
2593: b->free_diag = PETSC_TRUE;
2594: b->ilen = 0;
2595: b->imax = 0;
2596: b->row = perm;
2597: b->col = perm;
2598: PetscObjectReference((PetscObject)perm);
2599: PetscObjectReference((PetscObject)perm);
2600: b->icol = iperm;
2601: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2603: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2604: PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2606: b->maxnz = b->nz = ui[am];
2607: b->free_a = PETSC_TRUE;
2608: b->free_ij = PETSC_TRUE;
2610: fact->info.factor_mallocs = reallocs;
2611: fact->info.fill_ratio_given = fill;
2612: if (ai[am] != 0) {
2613: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2614: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2615: } else {
2616: fact->info.fill_ratio_needed = 0.0;
2617: }
2618: #if defined(PETSC_USE_INFO)
2619: if (ai[am] != 0) {
2620: PetscReal af = fact->info.fill_ratio_needed;
2621: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2622: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2623: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2624: } else {
2625: PetscInfo(A,"Empty matrix.\n");
2626: }
2627: #endif
2628: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2629: return(0);
2630: }
2634: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2635: {
2636: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2637: Mat_SeqSBAIJ *b;
2638: PetscErrorCode ierr;
2639: PetscBool perm_identity,missing;
2640: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2641: const PetscInt *rip,*riip;
2642: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2643: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2644: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2645: PetscReal fill =info->fill,levels=info->levels;
2646: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2647: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2648: PetscBT lnkbt;
2649: IS iperm;
2652: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2653: MatMissingDiagonal(A,&missing,&d);
2654: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2655: ISIdentity(perm,&perm_identity);
2656: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2658: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2659: PetscMalloc((am+1)*sizeof(PetscInt),&udiag);
2660: ui[0] = 0;
2662: /* ICC(0) without matrix ordering: simply copies fill pattern */
2663: if (!levels && perm_identity) {
2665: for (i=0; i<am; i++) {
2666: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
2667: udiag[i] = ui[i];
2668: }
2669: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2670: cols = uj;
2671: for (i=0; i<am; i++) {
2672: aj = a->j + a->diag[i];
2673: ncols = ui[i+1] - ui[i];
2674: for (j=0; j<ncols; j++) *cols++ = *aj++;
2675: }
2676: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2677: ISGetIndices(iperm,&riip);
2678: ISGetIndices(perm,&rip);
2680: /* initialization */
2681: PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);
2683: /* jl: linked list for storing indices of the pivot rows
2684: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2685: PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);
2686: for (i=0; i<am; i++) {
2687: jl[i] = am; il[i] = 0;
2688: }
2690: /* create and initialize a linked list for storing column indices of the active row k */
2691: nlnk = am + 1;
2692: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2694: /* initial FreeSpace size is fill*(ai[am]+1) */
2695: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
2696: current_space = free_space;
2697: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
2698: current_space_lvl = free_space_lvl;
2700: for (k=0; k<am; k++) { /* for each active row k */
2701: /* initialize lnk by the column indices of row rip[k] of A */
2702: nzk = 0;
2703: ncols = ai[rip[k]+1] - ai[rip[k]];
2704: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2705: ncols_upper = 0;
2706: for (j=0; j<ncols; j++) {
2707: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2708: if (riip[i] >= k) { /* only take upper triangular entry */
2709: ajtmp[ncols_upper] = i;
2710: ncols_upper++;
2711: }
2712: }
2713: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2714: nzk += nlnk;
2716: /* update lnk by computing fill-in for each pivot row to be merged in */
2717: prow = jl[k]; /* 1st pivot row */
2719: while (prow < k) {
2720: nextprow = jl[prow];
2722: /* merge prow into k-th row */
2723: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2724: jmax = ui[prow+1];
2725: ncols = jmax-jmin;
2726: i = jmin - ui[prow];
2727: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2728: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2729: j = *(uj - 1);
2730: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2731: nzk += nlnk;
2733: /* update il and jl for prow */
2734: if (jmin < jmax) {
2735: il[prow] = jmin;
2736: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2737: }
2738: prow = nextprow;
2739: }
2741: /* if free space is not available, make more free space */
2742: if (current_space->local_remaining<nzk) {
2743: i = am - k + 1; /* num of unfactored rows */
2744: i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2745: PetscFreeSpaceGet(i,¤t_space);
2746: PetscFreeSpaceGet(i,¤t_space_lvl);
2747: reallocs++;
2748: }
2750: /* copy data into free_space and free_space_lvl, then initialize lnk */
2751: if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2752: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2754: /* add the k-th row into il and jl */
2755: if (nzk > 1) {
2756: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2757: jl[k] = jl[i]; jl[i] = k;
2758: il[k] = ui[k] + 1;
2759: }
2760: uj_ptr[k] = current_space->array;
2761: uj_lvl_ptr[k] = current_space_lvl->array;
2763: current_space->array += nzk;
2764: current_space->local_used += nzk;
2765: current_space->local_remaining -= nzk;
2767: current_space_lvl->array += nzk;
2768: current_space_lvl->local_used += nzk;
2769: current_space_lvl->local_remaining -= nzk;
2771: ui[k+1] = ui[k] + nzk;
2772: }
2774: #if defined(PETSC_USE_INFO)
2775: if (ai[am] != 0) {
2776: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2777: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2778: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2779: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2780: } else {
2781: PetscInfo(A,"Empty matrix.\n");
2782: }
2783: #endif
2785: ISRestoreIndices(perm,&rip);
2786: ISRestoreIndices(iperm,&riip);
2787: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2788: PetscFree(ajtmp);
2790: /* destroy list of free space and other temporary array(s) */
2791: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2792: PetscFreeSpaceContiguous(&free_space,uj);
2793: PetscIncompleteLLDestroy(lnk,lnkbt);
2794: PetscFreeSpaceDestroy(free_space_lvl);
2796: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2798: /* put together the new matrix in MATSEQSBAIJ format */
2800: b = (Mat_SeqSBAIJ*)fact->data;
2801: b->singlemalloc = PETSC_FALSE;
2803: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
2805: b->j = uj;
2806: b->i = ui;
2807: b->diag = udiag;
2808: b->free_diag = PETSC_TRUE;
2809: b->ilen = 0;
2810: b->imax = 0;
2811: b->row = perm;
2812: b->col = perm;
2814: PetscObjectReference((PetscObject)perm);
2815: PetscObjectReference((PetscObject)perm);
2817: b->icol = iperm;
2818: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2819: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2820: PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
2821: b->maxnz = b->nz = ui[am];
2822: b->free_a = PETSC_TRUE;
2823: b->free_ij = PETSC_TRUE;
2825: fact->info.factor_mallocs = reallocs;
2826: fact->info.fill_ratio_given = fill;
2827: if (ai[am] != 0) {
2828: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2829: } else {
2830: fact->info.fill_ratio_needed = 0.0;
2831: }
2832: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2833: return(0);
2834: }
2838: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2839: {
2840: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2841: Mat_SeqSBAIJ *b;
2842: PetscErrorCode ierr;
2843: PetscBool perm_identity;
2844: PetscReal fill = info->fill;
2845: const PetscInt *rip,*riip;
2846: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2847: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2848: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2849: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2850: PetscBT lnkbt;
2851: IS iperm;
2854: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2855: /* check whether perm is the identity mapping */
2856: ISIdentity(perm,&perm_identity);
2857: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2858: ISGetIndices(iperm,&riip);
2859: ISGetIndices(perm,&rip);
2861: /* initialization */
2862: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2863: PetscMalloc((am+1)*sizeof(PetscInt),&udiag);
2864: ui[0] = 0;
2866: /* jl: linked list for storing indices of the pivot rows
2867: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2868: PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);
2869: for (i=0; i<am; i++) {
2870: jl[i] = am; il[i] = 0;
2871: }
2873: /* create and initialize a linked list for storing column indices of the active row k */
2874: nlnk = am + 1;
2875: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2877: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2878: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);
2879: current_space = free_space;
2881: for (k=0; k<am; k++) { /* for each active row k */
2882: /* initialize lnk by the column indices of row rip[k] of A */
2883: nzk = 0;
2884: ncols = ai[rip[k]+1] - ai[rip[k]];
2885: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2886: ncols_upper = 0;
2887: for (j=0; j<ncols; j++) {
2888: i = riip[*(aj + ai[rip[k]] + j)];
2889: if (i >= k) { /* only take upper triangular entry */
2890: cols[ncols_upper] = i;
2891: ncols_upper++;
2892: }
2893: }
2894: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2895: nzk += nlnk;
2897: /* update lnk by computing fill-in for each pivot row to be merged in */
2898: prow = jl[k]; /* 1st pivot row */
2900: while (prow < k) {
2901: nextprow = jl[prow];
2902: /* merge prow into k-th row */
2903: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2904: jmax = ui[prow+1];
2905: ncols = jmax-jmin;
2906: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2907: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
2908: nzk += nlnk;
2910: /* update il and jl for prow */
2911: if (jmin < jmax) {
2912: il[prow] = jmin;
2913: j = *uj_ptr;
2914: jl[prow] = jl[j];
2915: jl[j] = prow;
2916: }
2917: prow = nextprow;
2918: }
2920: /* if free space is not available, make more free space */
2921: if (current_space->local_remaining<nzk) {
2922: i = am - k + 1; /* num of unfactored rows */
2923: i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2924: PetscFreeSpaceGet(i,¤t_space);
2925: reallocs++;
2926: }
2928: /* copy data into free space, then initialize lnk */
2929: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
2931: /* add the k-th row into il and jl */
2932: if (nzk > 1) {
2933: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2934: jl[k] = jl[i]; jl[i] = k;
2935: il[k] = ui[k] + 1;
2936: }
2937: ui_ptr[k] = current_space->array;
2939: current_space->array += nzk;
2940: current_space->local_used += nzk;
2941: current_space->local_remaining -= nzk;
2943: ui[k+1] = ui[k] + nzk;
2944: }
2946: ISRestoreIndices(perm,&rip);
2947: ISRestoreIndices(iperm,&riip);
2948: PetscFree4(ui_ptr,jl,il,cols);
2950: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2951: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2952: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2953: PetscLLDestroy(lnk,lnkbt);
2955: /* put together the new matrix in MATSEQSBAIJ format */
2957: b = (Mat_SeqSBAIJ*)fact->data;
2958: b->singlemalloc = PETSC_FALSE;
2959: b->free_a = PETSC_TRUE;
2960: b->free_ij = PETSC_TRUE;
2962: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
2964: b->j = uj;
2965: b->i = ui;
2966: b->diag = udiag;
2967: b->free_diag = PETSC_TRUE;
2968: b->ilen = 0;
2969: b->imax = 0;
2970: b->row = perm;
2971: b->col = perm;
2973: PetscObjectReference((PetscObject)perm);
2974: PetscObjectReference((PetscObject)perm);
2976: b->icol = iperm;
2977: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2979: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2980: PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2982: b->maxnz = b->nz = ui[am];
2984: fact->info.factor_mallocs = reallocs;
2985: fact->info.fill_ratio_given = fill;
2986: if (ai[am] != 0) {
2987: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2988: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2989: } else {
2990: fact->info.fill_ratio_needed = 0.0;
2991: }
2992: #if defined(PETSC_USE_INFO)
2993: if (ai[am] != 0) {
2994: PetscReal af = fact->info.fill_ratio_needed;
2995: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2996: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2997: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2998: } else {
2999: PetscInfo(A,"Empty matrix.\n");
3000: }
3001: #endif
3002: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
3003: return(0);
3004: }
3008: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
3009: {
3010: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3011: Mat_SeqSBAIJ *b;
3012: PetscErrorCode ierr;
3013: PetscBool perm_identity;
3014: PetscReal fill = info->fill;
3015: const PetscInt *rip,*riip;
3016: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
3017: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
3018: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
3019: PetscFreeSpaceList free_space=NULL,current_space=NULL;
3020: PetscBT lnkbt;
3021: IS iperm;
3024: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
3025: /* check whether perm is the identity mapping */
3026: ISIdentity(perm,&perm_identity);
3027: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
3028: ISGetIndices(iperm,&riip);
3029: ISGetIndices(perm,&rip);
3031: /* initialization */
3032: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
3033: ui[0] = 0;
3035: /* jl: linked list for storing indices of the pivot rows
3036: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3037: PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);
3038: for (i=0; i<am; i++) {
3039: jl[i] = am; il[i] = 0;
3040: }
3042: /* create and initialize a linked list for storing column indices of the active row k */
3043: nlnk = am + 1;
3044: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
3046: /* initial FreeSpace size is fill*(ai[am]+1) */
3047: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
3048: current_space = free_space;
3050: for (k=0; k<am; k++) { /* for each active row k */
3051: /* initialize lnk by the column indices of row rip[k] of A */
3052: nzk = 0;
3053: ncols = ai[rip[k]+1] - ai[rip[k]];
3054: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3055: ncols_upper = 0;
3056: for (j=0; j<ncols; j++) {
3057: i = riip[*(aj + ai[rip[k]] + j)];
3058: if (i >= k) { /* only take upper triangular entry */
3059: cols[ncols_upper] = i;
3060: ncols_upper++;
3061: }
3062: }
3063: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
3064: nzk += nlnk;
3066: /* update lnk by computing fill-in for each pivot row to be merged in */
3067: prow = jl[k]; /* 1st pivot row */
3069: while (prow < k) {
3070: nextprow = jl[prow];
3071: /* merge prow into k-th row */
3072: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3073: jmax = ui[prow+1];
3074: ncols = jmax-jmin;
3075: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3076: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
3077: nzk += nlnk;
3079: /* update il and jl for prow */
3080: if (jmin < jmax) {
3081: il[prow] = jmin;
3082: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3083: }
3084: prow = nextprow;
3085: }
3087: /* if free space is not available, make more free space */
3088: if (current_space->local_remaining<nzk) {
3089: i = am - k + 1; /* num of unfactored rows */
3090: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3091: PetscFreeSpaceGet(i,¤t_space);
3092: reallocs++;
3093: }
3095: /* copy data into free space, then initialize lnk */
3096: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
3098: /* add the k-th row into il and jl */
3099: if (nzk-1 > 0) {
3100: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3101: jl[k] = jl[i]; jl[i] = k;
3102: il[k] = ui[k] + 1;
3103: }
3104: ui_ptr[k] = current_space->array;
3106: current_space->array += nzk;
3107: current_space->local_used += nzk;
3108: current_space->local_remaining -= nzk;
3110: ui[k+1] = ui[k] + nzk;
3111: }
3113: #if defined(PETSC_USE_INFO)
3114: if (ai[am] != 0) {
3115: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3116: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
3117: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
3118: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
3119: } else {
3120: PetscInfo(A,"Empty matrix.\n");
3121: }
3122: #endif
3124: ISRestoreIndices(perm,&rip);
3125: ISRestoreIndices(iperm,&riip);
3126: PetscFree4(ui_ptr,jl,il,cols);
3128: /* destroy list of free space and other temporary array(s) */
3129: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
3130: PetscFreeSpaceContiguous(&free_space,uj);
3131: PetscLLDestroy(lnk,lnkbt);
3133: /* put together the new matrix in MATSEQSBAIJ format */
3135: b = (Mat_SeqSBAIJ*)fact->data;
3136: b->singlemalloc = PETSC_FALSE;
3137: b->free_a = PETSC_TRUE;
3138: b->free_ij = PETSC_TRUE;
3140: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
3142: b->j = uj;
3143: b->i = ui;
3144: b->diag = 0;
3145: b->ilen = 0;
3146: b->imax = 0;
3147: b->row = perm;
3148: b->col = perm;
3150: PetscObjectReference((PetscObject)perm);
3151: PetscObjectReference((PetscObject)perm);
3153: b->icol = iperm;
3154: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3156: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
3157: PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
3158: b->maxnz = b->nz = ui[am];
3160: fact->info.factor_mallocs = reallocs;
3161: fact->info.fill_ratio_given = fill;
3162: if (ai[am] != 0) {
3163: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3164: } else {
3165: fact->info.fill_ratio_needed = 0.0;
3166: }
3167: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3168: return(0);
3169: }
3173: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3174: {
3175: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3176: PetscErrorCode ierr;
3177: PetscInt n = A->rmap->n;
3178: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3179: PetscScalar *x,sum;
3180: const PetscScalar *b;
3181: const MatScalar *aa = a->a,*v;
3182: PetscInt i,nz;
3185: if (!n) return(0);
3187: VecGetArrayRead(bb,&b);
3188: VecGetArray(xx,&x);
3190: /* forward solve the lower triangular */
3191: x[0] = b[0];
3192: v = aa;
3193: vi = aj;
3194: for (i=1; i<n; i++) {
3195: nz = ai[i+1] - ai[i];
3196: sum = b[i];
3197: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3198: v += nz;
3199: vi += nz;
3200: x[i] = sum;
3201: }
3203: /* backward solve the upper triangular */
3204: for (i=n-1; i>=0; i--) {
3205: v = aa + adiag[i+1] + 1;
3206: vi = aj + adiag[i+1] + 1;
3207: nz = adiag[i] - adiag[i+1]-1;
3208: sum = x[i];
3209: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3210: x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3211: }
3213: PetscLogFlops(2.0*a->nz - A->cmap->n);
3214: VecRestoreArrayRead(bb,&b);
3215: VecRestoreArray(xx,&x);
3216: return(0);
3217: }
3221: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3222: {
3223: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3224: IS iscol = a->col,isrow = a->row;
3225: PetscErrorCode ierr;
3226: PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3227: const PetscInt *rout,*cout,*r,*c;
3228: PetscScalar *x,*tmp,sum;
3229: const PetscScalar *b;
3230: const MatScalar *aa = a->a,*v;
3233: if (!n) return(0);
3235: VecGetArrayRead(bb,&b);
3236: VecGetArray(xx,&x);
3237: tmp = a->solve_work;
3239: ISGetIndices(isrow,&rout); r = rout;
3240: ISGetIndices(iscol,&cout); c = cout;
3242: /* forward solve the lower triangular */
3243: tmp[0] = b[r[0]];
3244: v = aa;
3245: vi = aj;
3246: for (i=1; i<n; i++) {
3247: nz = ai[i+1] - ai[i];
3248: sum = b[r[i]];
3249: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3250: tmp[i] = sum;
3251: v += nz; vi += nz;
3252: }
3254: /* backward solve the upper triangular */
3255: for (i=n-1; i>=0; i--) {
3256: v = aa + adiag[i+1]+1;
3257: vi = aj + adiag[i+1]+1;
3258: nz = adiag[i]-adiag[i+1]-1;
3259: sum = tmp[i];
3260: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3261: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3262: }
3264: ISRestoreIndices(isrow,&rout);
3265: ISRestoreIndices(iscol,&cout);
3266: VecRestoreArrayRead(bb,&b);
3267: VecRestoreArray(xx,&x);
3268: PetscLogFlops(2*a->nz - A->cmap->n);
3269: return(0);
3270: }
3274: /*
3275: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3276: */
3277: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3278: {
3279: Mat B = *fact;
3280: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b;
3281: IS isicol;
3283: const PetscInt *r,*ic;
3284: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3285: PetscInt *bi,*bj,*bdiag,*bdiag_rev;
3286: PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3287: PetscInt nlnk,*lnk;
3288: PetscBT lnkbt;
3289: PetscBool row_identity,icol_identity;
3290: MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3291: const PetscInt *ics;
3292: PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3293: PetscReal dt =info->dt,shift=info->shiftamount;
3294: PetscInt dtcount=(PetscInt)info->dtcount,nnz_max;
3295: PetscBool missing;
3298: if (dt == PETSC_DEFAULT) dt = 0.005;
3299: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3301: /* ------- symbolic factorization, can be reused ---------*/
3302: MatMissingDiagonal(A,&missing,&i);
3303: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3304: adiag=a->diag;
3306: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
3308: /* bdiag is location of diagonal in factor */
3309: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag); /* becomes b->diag */
3310: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev); /* temporary */
3312: /* allocate row pointers bi */
3313: PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);
3315: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3316: if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3317: nnz_max = ai[n]+2*n*dtcount+2;
3319: PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);
3320: PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);
3322: /* put together the new matrix */
3323: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
3324: PetscLogObjectParent(B,isicol);
3325: b = (Mat_SeqAIJ*)B->data;
3327: b->free_a = PETSC_TRUE;
3328: b->free_ij = PETSC_TRUE;
3329: b->singlemalloc = PETSC_FALSE;
3331: b->a = ba;
3332: b->j = bj;
3333: b->i = bi;
3334: b->diag = bdiag;
3335: b->ilen = 0;
3336: b->imax = 0;
3337: b->row = isrow;
3338: b->col = iscol;
3339: PetscObjectReference((PetscObject)isrow);
3340: PetscObjectReference((PetscObject)iscol);
3341: b->icol = isicol;
3343: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
3344: PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
3345: b->maxnz = nnz_max;
3347: B->factortype = MAT_FACTOR_ILUDT;
3348: B->info.factor_mallocs = 0;
3349: B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3350: /* ------- end of symbolic factorization ---------*/
3352: ISGetIndices(isrow,&r);
3353: ISGetIndices(isicol,&ic);
3354: ics = ic;
3356: /* linked list for storing column indices of the active row */
3357: nlnk = n + 1;
3358: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
3360: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3361: PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);
3362: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3363: PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);
3364: PetscMemzero(rtmp,n*sizeof(MatScalar));
3366: bi[0] = 0;
3367: bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3368: bdiag_rev[n] = bdiag[0];
3369: bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3370: for (i=0; i<n; i++) {
3371: /* copy initial fill into linked list */
3372: nzi = ai[r[i]+1] - ai[r[i]];
3373: if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3374: nzi_al = adiag[r[i]] - ai[r[i]];
3375: nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3376: ajtmp = aj + ai[r[i]];
3377: PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);
3379: /* load in initial (unfactored row) */
3380: aatmp = a->a + ai[r[i]];
3381: for (j=0; j<nzi; j++) {
3382: rtmp[ics[*ajtmp++]] = *aatmp++;
3383: }
3385: /* add pivot rows into linked list */
3386: row = lnk[n];
3387: while (row < i) {
3388: nzi_bl = bi[row+1] - bi[row] + 1;
3389: bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3390: PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);
3391: nzi += nlnk;
3392: row = lnk[row];
3393: }
3395: /* copy data from lnk into jtmp, then initialize lnk */
3396: PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);
3398: /* numerical factorization */
3399: bjtmp = jtmp;
3400: row = *bjtmp++; /* 1st pivot row */
3401: while (row < i) {
3402: pc = rtmp + row;
3403: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3404: multiplier = (*pc) * (*pv);
3405: *pc = multiplier;
3406: if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3407: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3408: pv = ba + bdiag[row+1] + 1;
3409: /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3410: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3411: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3412: PetscLogFlops(1+2*nz);
3413: }
3414: row = *bjtmp++;
3415: }
3417: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3418: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3419: nzi_bl = 0; j = 0;
3420: while (jtmp[j] < i) { /* Note: jtmp is sorted */
3421: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3422: nzi_bl++; j++;
3423: }
3424: nzi_bu = nzi - nzi_bl -1;
3425: while (j < nzi) {
3426: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3427: j++;
3428: }
3430: bjtmp = bj + bi[i];
3431: batmp = ba + bi[i];
3432: /* apply level dropping rule to L part */
3433: ncut = nzi_al + dtcount;
3434: if (ncut < nzi_bl) {
3435: PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);
3436: PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
3437: } else {
3438: ncut = nzi_bl;
3439: }
3440: for (j=0; j<ncut; j++) {
3441: bjtmp[j] = jtmp[j];
3442: batmp[j] = vtmp[j];
3443: /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3444: }
3445: bi[i+1] = bi[i] + ncut;
3446: nzi = ncut + 1;
3448: /* apply level dropping rule to U part */
3449: ncut = nzi_au + dtcount;
3450: if (ncut < nzi_bu) {
3451: PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);
3452: PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
3453: } else {
3454: ncut = nzi_bu;
3455: }
3456: nzi += ncut;
3458: /* mark bdiagonal */
3459: bdiag[i+1] = bdiag[i] - (ncut + 1);
3460: bdiag_rev[n-i-1] = bdiag[i+1];
3461: bi[2*n - i] = bi[2*n - i +1] - (ncut + 1);
3462: bjtmp = bj + bdiag[i];
3463: batmp = ba + bdiag[i];
3464: *bjtmp = i;
3465: *batmp = diag_tmp; /* rtmp[i]; */
3466: if (*batmp == 0.0) {
3467: *batmp = dt+shift;
3468: /* printf(" row %d add shift %g\n",i,shift); */
3469: }
3470: *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3471: /* printf(" (%d,%g),",*bjtmp,*batmp); */
3473: bjtmp = bj + bdiag[i+1]+1;
3474: batmp = ba + bdiag[i+1]+1;
3475: for (k=0; k<ncut; k++) {
3476: bjtmp[k] = jtmp[nzi_bl+1+k];
3477: batmp[k] = vtmp[nzi_bl+1+k];
3478: /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3479: }
3480: /* printf("\n"); */
3482: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3483: /*
3484: printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3485: printf(" ----------------------------\n");
3486: */
3487: } /* for (i=0; i<n; i++) */
3488: /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3489: if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3491: ISRestoreIndices(isrow,&r);
3492: ISRestoreIndices(isicol,&ic);
3494: PetscLLDestroy(lnk,lnkbt);
3495: PetscFree2(im,jtmp);
3496: PetscFree2(rtmp,vtmp);
3497: PetscFree(bdiag_rev);
3499: PetscLogFlops(B->cmap->n);
3500: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3502: ISIdentity(isrow,&row_identity);
3503: ISIdentity(isicol,&icol_identity);
3504: if (row_identity && icol_identity) {
3505: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3506: } else {
3507: B->ops->solve = MatSolve_SeqAIJ;
3508: }
3510: B->ops->solveadd = 0;
3511: B->ops->solvetranspose = 0;
3512: B->ops->solvetransposeadd = 0;
3513: B->ops->matsolve = 0;
3514: B->assembled = PETSC_TRUE;
3515: B->preallocated = PETSC_TRUE;
3516: return(0);
3517: }
3519: /* a wraper of MatILUDTFactor_SeqAIJ() */
3522: /*
3523: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3524: */
3526: PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3527: {
3531: MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);
3532: return(0);
3533: }
3535: /*
3536: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3537: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3538: */
3541: /*
3542: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3543: */
3545: PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3546: {
3547: Mat C =fact;
3548: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3549: IS isrow = b->row,isicol = b->icol;
3551: const PetscInt *r,*ic,*ics;
3552: PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3553: PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3554: MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3555: PetscReal dt=info->dt,shift=info->shiftamount;
3556: PetscBool row_identity, col_identity;
3559: ISGetIndices(isrow,&r);
3560: ISGetIndices(isicol,&ic);
3561: PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
3562: ics = ic;
3564: for (i=0; i<n; i++) {
3565: /* initialize rtmp array */
3566: nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */
3567: bjtmp = bj + bi[i];
3568: for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3569: rtmp[i] = 0.0;
3570: nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3571: bjtmp = bj + bdiag[i+1] + 1;
3572: for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3574: /* load in initial unfactored row of A */
3575: /* printf("row %d\n",i); */
3576: nz = ai[r[i]+1] - ai[r[i]];
3577: ajtmp = aj + ai[r[i]];
3578: v = aa + ai[r[i]];
3579: for (j=0; j<nz; j++) {
3580: rtmp[ics[*ajtmp++]] = v[j];
3581: /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3582: }
3583: /* printf("\n"); */
3585: /* numerical factorization */
3586: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3587: nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3588: k = 0;
3589: while (k < nzl) {
3590: row = *bjtmp++;
3591: /* printf(" prow %d\n",row); */
3592: pc = rtmp + row;
3593: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3594: multiplier = (*pc) * (*pv);
3595: *pc = multiplier;
3596: if (PetscAbsScalar(multiplier) > dt) {
3597: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3598: pv = b->a + bdiag[row+1] + 1;
3599: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3600: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3601: PetscLogFlops(1+2*nz);
3602: }
3603: k++;
3604: }
3606: /* finished row so stick it into b->a */
3607: /* L-part */
3608: pv = b->a + bi[i];
3609: pj = bj + bi[i];
3610: nzl = bi[i+1] - bi[i];
3611: for (j=0; j<nzl; j++) {
3612: pv[j] = rtmp[pj[j]];
3613: /* printf(" (%d,%g),",pj[j],pv[j]); */
3614: }
3616: /* diagonal: invert diagonal entries for simplier triangular solves */
3617: if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3618: b->a[bdiag[i]] = 1.0/rtmp[i];
3619: /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3621: /* U-part */
3622: pv = b->a + bdiag[i+1] + 1;
3623: pj = bj + bdiag[i+1] + 1;
3624: nzu = bdiag[i] - bdiag[i+1] - 1;
3625: for (j=0; j<nzu; j++) {
3626: pv[j] = rtmp[pj[j]];
3627: /* printf(" (%d,%g),",pj[j],pv[j]); */
3628: }
3629: /* printf("\n"); */
3630: }
3632: PetscFree(rtmp);
3633: ISRestoreIndices(isicol,&ic);
3634: ISRestoreIndices(isrow,&r);
3636: ISIdentity(isrow,&row_identity);
3637: ISIdentity(isicol,&col_identity);
3638: if (row_identity && col_identity) {
3639: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3640: } else {
3641: C->ops->solve = MatSolve_SeqAIJ;
3642: }
3643: C->ops->solveadd = 0;
3644: C->ops->solvetranspose = 0;
3645: C->ops->solvetransposeadd = 0;
3646: C->ops->matsolve = 0;
3647: C->assembled = PETSC_TRUE;
3648: C->preallocated = PETSC_TRUE;
3650: PetscLogFlops(C->cmap->n);
3651: return(0);
3652: }