Actual source code: mpiov.c
petsc-3.4.2 2013-07-02
2: /*
3: Routines to compute overlapping regions of a parallel MPI matrix
4: and to find submatrices that were shared across processors.
5: */
6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
7: #include <petscbt.h>
9: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat,PetscInt,IS*);
10: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local(Mat,PetscInt,char**,PetscInt*,PetscInt**);
11: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive(Mat,PetscInt,PetscInt**,PetscInt**,PetscInt*);
12: extern PetscErrorCode MatGetRow_MPIAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
13: extern PetscErrorCode MatRestoreRow_MPIAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17: PetscErrorCode MatIncreaseOverlap_MPIAIJ(Mat C,PetscInt imax,IS is[],PetscInt ov)
18: {
20: PetscInt i;
23: if (ov < 0) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
24: for (i=0; i<ov; ++i) {
25: MatIncreaseOverlap_MPIAIJ_Once(C,imax,is);
26: }
27: return(0);
28: }
30: /*
31: Sample message format:
32: If a processor A wants processor B to process some elements corresponding
33: to index sets is[1],is[5]
34: mesg [0] = 2 (no of index sets in the mesg)
35: -----------
36: mesg [1] = 1 => is[1]
37: mesg [2] = sizeof(is[1]);
38: -----------
39: mesg [3] = 5 => is[5]
40: mesg [4] = sizeof(is[5]);
41: -----------
42: mesg [5]
43: mesg [n] datas[1]
44: -----------
45: mesg[n+1]
46: mesg[m] data(is[5])
47: -----------
49: Notes:
50: nrqs - no of requests sent (or to be sent out)
51: nrqr - no of requests recieved (which have to be or which have been processed
52: */
55: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat C,PetscInt imax,IS is[])
56: {
57: Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
58: PetscMPIInt *w1,*w2,nrqr,*w3,*w4,*onodes1,*olengths1,*onodes2,*olengths2;
59: const PetscInt **idx,*idx_i;
60: PetscInt *n,**data,len;
62: PetscMPIInt size,rank,tag1,tag2;
63: PetscInt M,i,j,k,**rbuf,row,proc = 0,nrqs,msz,**outdat,**ptr;
64: PetscInt *ctr,*pa,*tmp,*isz,*isz1,**xdata,**rbuf2,*d_p;
65: PetscBT *table;
66: MPI_Comm comm;
67: MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2;
68: MPI_Status *s_status,*recv_status;
69: char *t_p;
72: PetscObjectGetComm((PetscObject)C,&comm);
73: size = c->size;
74: rank = c->rank;
75: M = C->rmap->N;
77: PetscObjectGetNewTag((PetscObject)C,&tag1);
78: PetscObjectGetNewTag((PetscObject)C,&tag2);
80: PetscMalloc2(imax,PetscInt*,&idx,imax,PetscInt,&n);
82: for (i=0; i<imax; i++) {
83: ISGetIndices(is[i],&idx[i]);
84: ISGetLocalSize(is[i],&n[i]);
85: }
87: /* evaluate communication - mesg to who,length of mesg, and buffer space
88: required. Based on this, buffers are allocated, and data copied into them*/
89: PetscMalloc4(size,PetscMPIInt,&w1,size,PetscMPIInt,&w2,size,PetscMPIInt,&w3,size,PetscMPIInt,&w4);
90: PetscMemzero(w1,size*sizeof(PetscMPIInt)); /* initialise work vector*/
91: PetscMemzero(w2,size*sizeof(PetscMPIInt)); /* initialise work vector*/
92: PetscMemzero(w3,size*sizeof(PetscMPIInt)); /* initialise work vector*/
93: for (i=0; i<imax; i++) {
94: PetscMemzero(w4,size*sizeof(PetscMPIInt)); /* initialise work vector*/
95: idx_i = idx[i];
96: len = n[i];
97: for (j=0; j<len; j++) {
98: row = idx_i[j];
99: if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index set cannot have negative entries");
100: PetscLayoutFindOwner(C->rmap,row,&proc);
101: w4[proc]++;
102: }
103: for (j=0; j<size; j++) {
104: if (w4[j]) { w1[j] += w4[j]; w3[j]++;}
105: }
106: }
108: nrqs = 0; /* no of outgoing messages */
109: msz = 0; /* total mesg length (for all proc */
110: w1[rank] = 0; /* no mesg sent to intself */
111: w3[rank] = 0;
112: for (i=0; i<size; i++) {
113: if (w1[i]) {w2[i] = 1; nrqs++;} /* there exists a message to proc i */
114: }
115: /* pa - is list of processors to communicate with */
116: PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);
117: for (i=0,j=0; i<size; i++) {
118: if (w1[i]) {pa[j] = i; j++;}
119: }
121: /* Each message would have a header = 1 + 2*(no of IS) + data */
122: for (i=0; i<nrqs; i++) {
123: j = pa[i];
124: w1[j] += w2[j] + 2*w3[j];
125: msz += w1[j];
126: }
128: /* Determine the number of messages to expect, their lengths, from from-ids */
129: PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
130: PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
132: /* Now post the Irecvs corresponding to these messages */
133: PetscPostIrecvInt(comm,tag1,nrqr,onodes1,olengths1,&rbuf,&r_waits1);
135: /* Allocate Memory for outgoing messages */
136: PetscMalloc4(size,PetscInt*,&outdat,size,PetscInt*,&ptr,msz,PetscInt,&tmp,size,PetscInt,&ctr);
137: PetscMemzero(outdat,size*sizeof(PetscInt*));
138: PetscMemzero(ptr,size*sizeof(PetscInt*));
140: {
141: PetscInt *iptr = tmp,ict = 0;
142: for (i=0; i<nrqs; i++) {
143: j = pa[i];
144: iptr += ict;
145: outdat[j] = iptr;
146: ict = w1[j];
147: }
148: }
150: /* Form the outgoing messages */
151: /*plug in the headers*/
152: for (i=0; i<nrqs; i++) {
153: j = pa[i];
154: outdat[j][0] = 0;
155: PetscMemzero(outdat[j]+1,2*w3[j]*sizeof(PetscInt));
156: ptr[j] = outdat[j] + 2*w3[j] + 1;
157: }
159: /* Memory for doing local proc's work*/
160: {
161: PetscMalloc5(imax,PetscBT,&table, imax,PetscInt*,&data, imax,PetscInt,&isz,
162: M*imax,PetscInt,&d_p, (M/PETSC_BITS_PER_BYTE+1)*imax,char,&t_p);
163: PetscMemzero(table,imax*sizeof(PetscBT));
164: PetscMemzero(data,imax*sizeof(PetscInt*));
165: PetscMemzero(isz,imax*sizeof(PetscInt));
166: PetscMemzero(d_p,M*imax*sizeof(PetscInt));
167: PetscMemzero(t_p,(M/PETSC_BITS_PER_BYTE+1)*imax*sizeof(char));
169: for (i=0; i<imax; i++) {
170: table[i] = t_p + (M/PETSC_BITS_PER_BYTE+1)*i;
171: data[i] = d_p + M*i;
172: }
173: }
175: /* Parse the IS and update local tables and the outgoing buf with the data*/
176: {
177: PetscInt n_i,*data_i,isz_i,*outdat_j,ctr_j;
178: PetscBT table_i;
180: for (i=0; i<imax; i++) {
181: PetscMemzero(ctr,size*sizeof(PetscInt));
182: n_i = n[i];
183: table_i = table[i];
184: idx_i = idx[i];
185: data_i = data[i];
186: isz_i = isz[i];
187: for (j=0; j<n_i; j++) { /* parse the indices of each IS */
188: row = idx_i[j];
189: PetscLayoutFindOwner(C->rmap,row,&proc);
190: if (proc != rank) { /* copy to the outgoing buffer */
191: ctr[proc]++;
192: *ptr[proc] = row;
193: ptr[proc]++;
194: } else if (!PetscBTLookupSet(table_i,row)) data_i[isz_i++] = row; /* Update the local table */
195: }
196: /* Update the headers for the current IS */
197: for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
198: if ((ctr_j = ctr[j])) {
199: outdat_j = outdat[j];
200: k = ++outdat_j[0];
201: outdat_j[2*k] = ctr_j;
202: outdat_j[2*k-1] = i;
203: }
204: }
205: isz[i] = isz_i;
206: }
207: }
209: /* Now post the sends */
210: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
211: for (i=0; i<nrqs; ++i) {
212: j = pa[i];
213: MPI_Isend(outdat[j],w1[j],MPIU_INT,j,tag1,comm,s_waits1+i);
214: }
216: /* No longer need the original indices*/
217: for (i=0; i<imax; ++i) {
218: ISRestoreIndices(is[i],idx+i);
219: }
220: PetscFree2(idx,n);
222: for (i=0; i<imax; ++i) {
223: ISDestroy(&is[i]);
224: }
226: /* Do Local work*/
227: MatIncreaseOverlap_MPIAIJ_Local(C,imax,table,isz,data);
229: /* Receive messages*/
230: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&recv_status);
231: if (nrqr) {MPI_Waitall(nrqr,r_waits1,recv_status);}
233: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);
234: if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status);}
236: /* Phase 1 sends are complete - deallocate buffers */
237: PetscFree4(outdat,ptr,tmp,ctr);
238: PetscFree4(w1,w2,w3,w4);
240: PetscMalloc((nrqr+1)*sizeof(PetscInt*),&xdata);
241: PetscMalloc((nrqr+1)*sizeof(PetscInt),&isz1);
242: MatIncreaseOverlap_MPIAIJ_Receive(C,nrqr,rbuf,xdata,isz1);
243: PetscFree(rbuf[0]);
244: PetscFree(rbuf);
247: /* Send the data back*/
248: /* Do a global reduction to know the buffer space req for incoming messages*/
249: {
250: PetscMPIInt *rw1;
252: PetscMalloc(size*sizeof(PetscMPIInt),&rw1);
253: PetscMemzero(rw1,size*sizeof(PetscMPIInt));
255: for (i=0; i<nrqr; ++i) {
256: proc = recv_status[i].MPI_SOURCE;
258: if (proc != onodes1[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPI_SOURCE mismatch");
259: rw1[proc] = isz1[i];
260: }
261: PetscFree(onodes1);
262: PetscFree(olengths1);
264: /* Determine the number of messages to expect, their lengths, from from-ids */
265: PetscGatherMessageLengths(comm,nrqr,nrqs,rw1,&onodes2,&olengths2);
266: PetscFree(rw1);
267: }
268: /* Now post the Irecvs corresponding to these messages */
269: PetscPostIrecvInt(comm,tag2,nrqs,onodes2,olengths2,&rbuf2,&r_waits2);
271: /* Now post the sends */
272: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
273: for (i=0; i<nrqr; ++i) {
274: j = recv_status[i].MPI_SOURCE;
275: MPI_Isend(xdata[i],isz1[i],MPIU_INT,j,tag2,comm,s_waits2+i);
276: }
278: /* receive work done on other processors*/
279: {
280: PetscInt is_no,ct1,max,*rbuf2_i,isz_i,*data_i,jmax;
281: PetscMPIInt idex;
282: PetscBT table_i;
283: MPI_Status *status2;
285: PetscMalloc((PetscMax(nrqr,nrqs)+1)*sizeof(MPI_Status),&status2);
286: for (i=0; i<nrqs; ++i) {
287: MPI_Waitany(nrqs,r_waits2,&idex,status2+i);
288: /* Process the message*/
289: rbuf2_i = rbuf2[idex];
290: ct1 = 2*rbuf2_i[0]+1;
291: jmax = rbuf2[idex][0];
292: for (j=1; j<=jmax; j++) {
293: max = rbuf2_i[2*j];
294: is_no = rbuf2_i[2*j-1];
295: isz_i = isz[is_no];
296: data_i = data[is_no];
297: table_i = table[is_no];
298: for (k=0; k<max; k++,ct1++) {
299: row = rbuf2_i[ct1];
300: if (!PetscBTLookupSet(table_i,row)) data_i[isz_i++] = row;
301: }
302: isz[is_no] = isz_i;
303: }
304: }
306: if (nrqr) {MPI_Waitall(nrqr,s_waits2,status2);}
307: PetscFree(status2);
308: }
310: for (i=0; i<imax; ++i) {
311: ISCreateGeneral(PETSC_COMM_SELF,isz[i],data[i],PETSC_COPY_VALUES,is+i);
312: }
314: PetscFree(onodes2);
315: PetscFree(olengths2);
317: PetscFree(pa);
318: PetscFree(rbuf2[0]);
319: PetscFree(rbuf2);
320: PetscFree(s_waits1);
321: PetscFree(r_waits1);
322: PetscFree(s_waits2);
323: PetscFree(r_waits2);
324: PetscFree5(table,data,isz,d_p,t_p);
325: PetscFree(s_status);
326: PetscFree(recv_status);
327: PetscFree(xdata[0]);
328: PetscFree(xdata);
329: PetscFree(isz1);
330: return(0);
331: }
335: /*
336: MatIncreaseOverlap_MPIAIJ_Local - Called by MatincreaseOverlap, to do
337: the work on the local processor.
339: Inputs:
340: C - MAT_MPIAIJ;
341: imax - total no of index sets processed at a time;
342: table - an array of char - size = m bits.
344: Output:
345: isz - array containing the count of the solution elements corresponding
346: to each index set;
347: data - pointer to the solutions
348: */
349: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local(Mat C,PetscInt imax,PetscBT *table,PetscInt *isz,PetscInt **data)
350: {
351: Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
352: Mat A = c->A,B = c->B;
353: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data;
354: PetscInt start,end,val,max,rstart,cstart,*ai,*aj;
355: PetscInt *bi,*bj,*garray,i,j,k,row,*data_i,isz_i;
356: PetscBT table_i;
359: rstart = C->rmap->rstart;
360: cstart = C->cmap->rstart;
361: ai = a->i;
362: aj = a->j;
363: bi = b->i;
364: bj = b->j;
365: garray = c->garray;
368: for (i=0; i<imax; i++) {
369: data_i = data[i];
370: table_i = table[i];
371: isz_i = isz[i];
372: for (j=0,max=isz[i]; j<max; j++) {
373: row = data_i[j] - rstart;
374: start = ai[row];
375: end = ai[row+1];
376: for (k=start; k<end; k++) { /* Amat */
377: val = aj[k] + cstart;
378: if (!PetscBTLookupSet(table_i,val)) data_i[isz_i++] = val;
379: }
380: start = bi[row];
381: end = bi[row+1];
382: for (k=start; k<end; k++) { /* Bmat */
383: val = garray[bj[k]];
384: if (!PetscBTLookupSet(table_i,val)) data_i[isz_i++] = val;
385: }
386: }
387: isz[i] = isz_i;
388: }
389: return(0);
390: }
394: /*
395: MatIncreaseOverlap_MPIAIJ_Receive - Process the recieved messages,
396: and return the output
398: Input:
399: C - the matrix
400: nrqr - no of messages being processed.
401: rbuf - an array of pointers to the recieved requests
403: Output:
404: xdata - array of messages to be sent back
405: isz1 - size of each message
407: For better efficiency perhaps we should malloc separately each xdata[i],
408: then if a remalloc is required we need only copy the data for that one row
409: rather then all previous rows as it is now where a single large chunck of
410: memory is used.
412: */
413: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive(Mat C,PetscInt nrqr,PetscInt **rbuf,PetscInt **xdata,PetscInt * isz1)
414: {
415: Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
416: Mat A = c->A,B = c->B;
417: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data;
419: PetscInt rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k;
420: PetscInt row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end;
421: PetscInt val,max1,max2,m,no_malloc =0,*tmp,new_estimate,ctr;
422: PetscInt *rbuf_i,kmax,rbuf_0;
423: PetscBT xtable;
426: m = C->rmap->N;
427: rstart = C->rmap->rstart;
428: cstart = C->cmap->rstart;
429: ai = a->i;
430: aj = a->j;
431: bi = b->i;
432: bj = b->j;
433: garray = c->garray;
436: for (i=0,ct=0,total_sz=0; i<nrqr; ++i) {
437: rbuf_i = rbuf[i];
438: rbuf_0 = rbuf_i[0];
439: ct += rbuf_0;
440: for (j=1; j<=rbuf_0; j++) total_sz += rbuf_i[2*j];
441: }
443: if (C->rmap->n) max1 = ct*(a->nz + b->nz)/C->rmap->n;
444: else max1 = 1;
445: mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1);
446: PetscMalloc(mem_estimate*sizeof(PetscInt),&xdata[0]);
447: ++no_malloc;
448: PetscBTCreate(m,&xtable);
449: PetscMemzero(isz1,nrqr*sizeof(PetscInt));
451: ct3 = 0;
452: for (i=0; i<nrqr; i++) { /* for easch mesg from proc i */
453: rbuf_i = rbuf[i];
454: rbuf_0 = rbuf_i[0];
455: ct1 = 2*rbuf_0+1;
456: ct2 = ct1;
457: ct3 += ct1;
458: for (j=1; j<=rbuf_0; j++) { /* for each IS from proc i*/
459: PetscBTMemzero(m,xtable);
460: oct2 = ct2;
461: kmax = rbuf_i[2*j];
462: for (k=0; k<kmax; k++,ct1++) {
463: row = rbuf_i[ct1];
464: if (!PetscBTLookupSet(xtable,row)) {
465: if (!(ct3 < mem_estimate)) {
466: new_estimate = (PetscInt)(1.5*mem_estimate)+1;
467: PetscMalloc(new_estimate*sizeof(PetscInt),&tmp);
468: PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
469: PetscFree(xdata[0]);
470: xdata[0] = tmp;
471: mem_estimate = new_estimate; ++no_malloc;
472: for (ctr=1; ctr<=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];
473: }
474: xdata[i][ct2++] = row;
475: ct3++;
476: }
477: }
478: for (k=oct2,max2=ct2; k<max2; k++) {
479: row = xdata[i][k] - rstart;
480: start = ai[row];
481: end = ai[row+1];
482: for (l=start; l<end; l++) {
483: val = aj[l] + cstart;
484: if (!PetscBTLookupSet(xtable,val)) {
485: if (!(ct3 < mem_estimate)) {
486: new_estimate = (PetscInt)(1.5*mem_estimate)+1;
487: PetscMalloc(new_estimate*sizeof(PetscInt),&tmp);
488: PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
489: PetscFree(xdata[0]);
490: xdata[0] = tmp;
491: mem_estimate = new_estimate; ++no_malloc;
492: for (ctr=1; ctr<=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];
493: }
494: xdata[i][ct2++] = val;
495: ct3++;
496: }
497: }
498: start = bi[row];
499: end = bi[row+1];
500: for (l=start; l<end; l++) {
501: val = garray[bj[l]];
502: if (!PetscBTLookupSet(xtable,val)) {
503: if (!(ct3 < mem_estimate)) {
504: new_estimate = (PetscInt)(1.5*mem_estimate)+1;
505: PetscMalloc(new_estimate*sizeof(PetscInt),&tmp);
506: PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
507: PetscFree(xdata[0]);
508: xdata[0] = tmp;
509: mem_estimate = new_estimate; ++no_malloc;
510: for (ctr =1; ctr <=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];
511: }
512: xdata[i][ct2++] = val;
513: ct3++;
514: }
515: }
516: }
517: /* Update the header*/
518: xdata[i][2*j] = ct2 - oct2; /* Undo the vector isz1 and use only a var*/
519: xdata[i][2*j-1] = rbuf_i[2*j-1];
520: }
521: xdata[i][0] = rbuf_0;
522: xdata[i+1] = xdata[i] + ct2;
523: isz1[i] = ct2; /* size of each message */
524: }
525: PetscBTDestroy(&xtable);
526: PetscInfo3(C,"Allocated %D bytes, required %D bytes, no of mallocs = %D\n",mem_estimate,ct3,no_malloc);
527: return(0);
528: }
529: /* -------------------------------------------------------------------------*/
530: extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
531: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
532: /*
533: Every processor gets the entire matrix
534: */
537: PetscErrorCode MatGetSubMatrix_MPIAIJ_All(Mat A,MatGetSubMatrixOption flag,MatReuse scall,Mat *Bin[])
538: {
539: Mat B;
540: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
541: Mat_SeqAIJ *b,*ad = (Mat_SeqAIJ*)a->A->data,*bd = (Mat_SeqAIJ*)a->B->data;
543: PetscMPIInt size,rank,*recvcounts = 0,*displs = 0;
544: PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j;
545: PetscInt m,*b_sendj,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
546: MatScalar *sendbuf,*recvbuf,*a_sendbuf,*b_sendbuf;
549: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
550: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
552: if (scall == MAT_INITIAL_MATRIX) {
553: /* ----------------------------------------------------------------
554: Tell every processor the number of nonzeros per row
555: */
556: PetscMalloc(A->rmap->N*sizeof(PetscInt),&lens);
557: for (i=A->rmap->rstart; i<A->rmap->rend; i++) {
558: lens[i] = ad->i[i-A->rmap->rstart+1] - ad->i[i-A->rmap->rstart] + bd->i[i-A->rmap->rstart+1] - bd->i[i-A->rmap->rstart];
559: }
560: sendcount = A->rmap->rend - A->rmap->rstart;
561: PetscMalloc2(size,PetscMPIInt,&recvcounts,size,PetscMPIInt,&displs);
562: for (i=0; i<size; i++) {
563: recvcounts[i] = A->rmap->range[i+1] - A->rmap->range[i];
564: displs[i] = A->rmap->range[i];
565: }
566: #if defined(PETSC_HAVE_MPI_IN_PLACE)
567: MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
568: #else
569: MPI_Allgatherv(lens+A->rmap->rstart,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
570: #endif
571: /* ---------------------------------------------------------------
572: Create the sequential matrix of the same type as the local block diagonal
573: */
574: MatCreate(PETSC_COMM_SELF,&B);
575: MatSetSizes(B,A->rmap->N,A->cmap->N,PETSC_DETERMINE,PETSC_DETERMINE);
576: MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);
577: MatSetType(B,((PetscObject)a->A)->type_name);
578: MatSeqAIJSetPreallocation(B,0,lens);
579: PetscMalloc(sizeof(Mat),Bin);
580: **Bin = B;
581: b = (Mat_SeqAIJ*)B->data;
583: /*--------------------------------------------------------------------
584: Copy my part of matrix column indices over
585: */
586: sendcount = ad->nz + bd->nz;
587: jsendbuf = b->j + b->i[rstarts[rank]];
588: a_jsendbuf = ad->j;
589: b_jsendbuf = bd->j;
590: n = A->rmap->rend - A->rmap->rstart;
591: cnt = 0;
592: for (i=0; i<n; i++) {
594: /* put in lower diagonal portion */
595: m = bd->i[i+1] - bd->i[i];
596: while (m > 0) {
597: /* is it above diagonal (in bd (compressed) numbering) */
598: if (garray[*b_jsendbuf] > A->rmap->rstart + i) break;
599: jsendbuf[cnt++] = garray[*b_jsendbuf++];
600: m--;
601: }
603: /* put in diagonal portion */
604: for (j=ad->i[i]; j<ad->i[i+1]; j++) {
605: jsendbuf[cnt++] = A->rmap->rstart + *a_jsendbuf++;
606: }
608: /* put in upper diagonal portion */
609: while (m-- > 0) {
610: jsendbuf[cnt++] = garray[*b_jsendbuf++];
611: }
612: }
613: if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
615: /*--------------------------------------------------------------------
616: Gather all column indices to all processors
617: */
618: for (i=0; i<size; i++) {
619: recvcounts[i] = 0;
620: for (j=A->rmap->range[i]; j<A->rmap->range[i+1]; j++) {
621: recvcounts[i] += lens[j];
622: }
623: }
624: displs[0] = 0;
625: for (i=1; i<size; i++) {
626: displs[i] = displs[i-1] + recvcounts[i-1];
627: }
628: #if defined(PETSC_HAVE_MPI_IN_PLACE)
629: MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
630: #else
631: MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
632: #endif
633: /*--------------------------------------------------------------------
634: Assemble the matrix into useable form (note numerical values not yet set)
635: */
636: /* set the b->ilen (length of each row) values */
637: PetscMemcpy(b->ilen,lens,A->rmap->N*sizeof(PetscInt));
638: /* set the b->i indices */
639: b->i[0] = 0;
640: for (i=1; i<=A->rmap->N; i++) {
641: b->i[i] = b->i[i-1] + lens[i-1];
642: }
643: PetscFree(lens);
644: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
645: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
647: } else {
648: B = **Bin;
649: b = (Mat_SeqAIJ*)B->data;
650: }
652: /*--------------------------------------------------------------------
653: Copy my part of matrix numerical values into the values location
654: */
655: if (flag == MAT_GET_VALUES) {
656: sendcount = ad->nz + bd->nz;
657: sendbuf = b->a + b->i[rstarts[rank]];
658: a_sendbuf = ad->a;
659: b_sendbuf = bd->a;
660: b_sendj = bd->j;
661: n = A->rmap->rend - A->rmap->rstart;
662: cnt = 0;
663: for (i=0; i<n; i++) {
665: /* put in lower diagonal portion */
666: m = bd->i[i+1] - bd->i[i];
667: while (m > 0) {
668: /* is it above diagonal (in bd (compressed) numbering) */
669: if (garray[*b_sendj] > A->rmap->rstart + i) break;
670: sendbuf[cnt++] = *b_sendbuf++;
671: m--;
672: b_sendj++;
673: }
675: /* put in diagonal portion */
676: for (j=ad->i[i]; j<ad->i[i+1]; j++) {
677: sendbuf[cnt++] = *a_sendbuf++;
678: }
680: /* put in upper diagonal portion */
681: while (m-- > 0) {
682: sendbuf[cnt++] = *b_sendbuf++;
683: b_sendj++;
684: }
685: }
686: if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
688: /* -----------------------------------------------------------------
689: Gather all numerical values to all processors
690: */
691: if (!recvcounts) {
692: PetscMalloc2(size,PetscMPIInt,&recvcounts,size,PetscMPIInt,&displs);
693: }
694: for (i=0; i<size; i++) {
695: recvcounts[i] = b->i[rstarts[i+1]] - b->i[rstarts[i]];
696: }
697: displs[0] = 0;
698: for (i=1; i<size; i++) {
699: displs[i] = displs[i-1] + recvcounts[i-1];
700: }
701: recvbuf = b->a;
702: #if defined(PETSC_HAVE_MPI_IN_PLACE)
703: MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,recvbuf,recvcounts,displs,MPIU_SCALAR,PetscObjectComm((PetscObject)A));
704: #else
705: MPI_Allgatherv(sendbuf,sendcount,MPIU_SCALAR,recvbuf,recvcounts,displs,MPIU_SCALAR,PetscObjectComm((PetscObject)A));
706: #endif
707: } /* endof (flag == MAT_GET_VALUES) */
708: PetscFree2(recvcounts,displs);
710: if (A->symmetric) {
711: MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
712: } else if (A->hermitian) {
713: MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);
714: } else if (A->structurally_symmetric) {
715: MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
716: }
717: return(0);
718: }
724: PetscErrorCode MatGetSubMatrices_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
725: {
727: PetscInt nmax,nstages_local,nstages,i,pos,max_no,nrow,ncol;
728: PetscBool rowflag,colflag,wantallmatrix=PETSC_FALSE,twantallmatrix,*allcolumns;
731: /* Currently, unsorted column indices will result in inverted column indices in the resulting submatrices. */
732: /* It would make sense to error out in MatGetSubMatrices_MPIAIJ_Local(), the most impl-specific level.
733: However, there are more careful users of MatGetSubMatrices_MPIAIJ_Local() -- MatPermute_MPIAIJ() -- that
734: take care to order the result correctly by assembling it with MatSetValues() (after preallocating).
735: */
736: for (i = 0; i < ismax; ++i) {
737: PetscBool sorted;
738: ISSorted(iscol[i], &sorted);
739: if (!sorted) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_SUP, "Column index set %D not sorted", i);
740: }
742: /*
743: Check for special case: each processor gets entire matrix
744: */
745: if (ismax == 1 && C->rmap->N == C->cmap->N) {
746: ISIdentity(*isrow,&rowflag);
747: ISIdentity(*iscol,&colflag);
748: ISGetLocalSize(*isrow,&nrow);
749: ISGetLocalSize(*iscol,&ncol);
750: if (rowflag && colflag && nrow == C->rmap->N && ncol == C->cmap->N) {
751: wantallmatrix = PETSC_TRUE;
753: PetscOptionsGetBool(((PetscObject)C)->prefix,"-use_fast_submatrix",&wantallmatrix,NULL);
754: }
755: }
756: MPI_Allreduce(&wantallmatrix,&twantallmatrix,1,MPIU_BOOL,MPI_MIN,PetscObjectComm((PetscObject)C));
757: if (twantallmatrix) {
758: MatGetSubMatrix_MPIAIJ_All(C,MAT_GET_VALUES,scall,submat);
759: return(0);
760: }
762: /* Allocate memory to hold all the submatrices */
763: if (scall != MAT_REUSE_MATRIX) {
764: PetscMalloc((ismax+1)*sizeof(Mat),submat);
765: }
767: /* Check for special case: each processor gets entire matrix columns */
768: PetscMalloc((ismax+1)*sizeof(PetscBool),&allcolumns);
769: for (i=0; i<ismax; i++) {
770: ISIdentity(iscol[i],&colflag);
771: ISGetLocalSize(iscol[i],&ncol);
772: if (colflag && ncol == C->cmap->N) {
773: allcolumns[i] = PETSC_TRUE;
774: } else {
775: allcolumns[i] = PETSC_FALSE;
776: }
777: }
779: /* Determine the number of stages through which submatrices are done */
780: nmax = 20*1000000 / (C->cmap->N * sizeof(PetscInt));
782: /*
783: Each stage will extract nmax submatrices.
784: nmax is determined by the matrix column dimension.
785: If the original matrix has 20M columns, only one submatrix per stage is allowed, etc.
786: */
787: if (!nmax) nmax = 1;
788: nstages_local = ismax/nmax + ((ismax % nmax) ? 1 : 0);
790: /* Make sure every processor loops through the nstages */
791: MPI_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)C));
793: for (i=0,pos=0; i<nstages; i++) {
794: if (pos+nmax <= ismax) max_no = nmax;
795: else if (pos == ismax) max_no = 0;
796: else max_no = ismax-pos;
797: MatGetSubMatrices_MPIAIJ_Local(C,max_no,isrow+pos,iscol+pos,scall,allcolumns+pos,*submat+pos);
798: pos += max_no;
799: }
801: PetscFree(allcolumns);
802: return(0);
803: }
805: /* -------------------------------------------------------------------------*/
808: PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,PetscBool *allcolumns,Mat *submats)
809: {
810: Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
811: Mat A = c->A;
812: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)c->B->data,*mat;
813: const PetscInt **icol,**irow;
814: PetscInt *nrow,*ncol,start;
816: PetscMPIInt rank,size,tag0,tag1,tag2,tag3,*w1,*w2,*w3,*w4,nrqr;
817: PetscInt **sbuf1,**sbuf2,i,j,k,l,ct1,ct2,**rbuf1,row,proc;
818: PetscInt nrqs,msz,**ptr,*req_size,*ctr,*pa,*tmp,tcol;
819: PetscInt **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2;
820: PetscInt **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax;
821: #if defined(PETSC_USE_CTABLE)
822: PetscTable *cmap,cmap_i=NULL,*rmap,rmap_i;
823: #else
824: PetscInt **cmap,*cmap_i=NULL,**rmap,*rmap_i;
825: #endif
826: const PetscInt *irow_i;
827: PetscInt ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i;
828: MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
829: MPI_Request *r_waits4,*s_waits3,*s_waits4;
830: MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
831: MPI_Status *r_status3,*r_status4,*s_status4;
832: MPI_Comm comm;
833: PetscScalar **rbuf4,**sbuf_aa,*vals,*mat_a,*sbuf_aa_i;
834: PetscMPIInt *onodes1,*olengths1;
835: PetscMPIInt idex,idex2,end;
838: PetscObjectGetComm((PetscObject)C,&comm);
839: tag0 = ((PetscObject)C)->tag;
840: size = c->size;
841: rank = c->rank;
843: /* Get some new tags to keep the communication clean */
844: PetscObjectGetNewTag((PetscObject)C,&tag1);
845: PetscObjectGetNewTag((PetscObject)C,&tag2);
846: PetscObjectGetNewTag((PetscObject)C,&tag3);
848: PetscMalloc4(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol);
850: for (i=0; i<ismax; i++) {
851: ISGetIndices(isrow[i],&irow[i]);
852: ISGetLocalSize(isrow[i],&nrow[i]);
853: if (allcolumns[i]) {
854: icol[i] = NULL;
855: ncol[i] = C->cmap->N;
856: } else {
857: ISGetIndices(iscol[i],&icol[i]);
858: ISGetLocalSize(iscol[i],&ncol[i]);
859: }
860: }
862: /* evaluate communication - mesg to who, length of mesg, and buffer space
863: required. Based on this, buffers are allocated, and data copied into them*/
864: PetscMalloc4(size,PetscMPIInt,&w1,size,PetscMPIInt,&w2,size,PetscMPIInt,&w3,size,PetscMPIInt,&w4); /* mesg size */
865: PetscMemzero(w1,size*sizeof(PetscMPIInt)); /* initialize work vector*/
866: PetscMemzero(w2,size*sizeof(PetscMPIInt)); /* initialize work vector*/
867: PetscMemzero(w3,size*sizeof(PetscMPIInt)); /* initialize work vector*/
868: for (i=0; i<ismax; i++) {
869: PetscMemzero(w4,size*sizeof(PetscMPIInt)); /* initialize work vector*/
870: jmax = nrow[i];
871: irow_i = irow[i];
872: for (j=0; j<jmax; j++) {
873: l = 0;
874: row = irow_i[j];
875: while (row >= C->rmap->range[l+1]) l++;
876: proc = l;
877: w4[proc]++;
878: }
879: for (j=0; j<size; j++) {
880: if (w4[j]) { w1[j] += w4[j]; w3[j]++;}
881: }
882: }
884: nrqs = 0; /* no of outgoing messages */
885: msz = 0; /* total mesg length (for all procs) */
886: w1[rank] = 0; /* no mesg sent to self */
887: w3[rank] = 0;
888: for (i=0; i<size; i++) {
889: if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
890: }
891: PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa); /*(proc -array)*/
892: for (i=0,j=0; i<size; i++) {
893: if (w1[i]) { pa[j] = i; j++; }
894: }
896: /* Each message would have a header = 1 + 2*(no of IS) + data */
897: for (i=0; i<nrqs; i++) {
898: j = pa[i];
899: w1[j] += w2[j] + 2* w3[j];
900: msz += w1[j];
901: }
902: PetscInfo2(0,"Number of outgoing messages %D Total message length %D\n",nrqs,msz);
904: /* Determine the number of messages to expect, their lengths, from from-ids */
905: PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
906: PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
908: /* Now post the Irecvs corresponding to these messages */
909: PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
911: PetscFree(onodes1);
912: PetscFree(olengths1);
914: /* Allocate Memory for outgoing messages */
915: PetscMalloc4(size,PetscInt*,&sbuf1,size,PetscInt*,&ptr,2*msz,PetscInt,&tmp,size,PetscInt,&ctr);
916: PetscMemzero(sbuf1,size*sizeof(PetscInt*));
917: PetscMemzero(ptr,size*sizeof(PetscInt*));
919: {
920: PetscInt *iptr = tmp,ict = 0;
921: for (i=0; i<nrqs; i++) {
922: j = pa[i];
923: iptr += ict;
924: sbuf1[j] = iptr;
925: ict = w1[j];
926: }
927: }
929: /* Form the outgoing messages */
930: /* Initialize the header space */
931: for (i=0; i<nrqs; i++) {
932: j = pa[i];
933: sbuf1[j][0] = 0;
934: PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));
935: ptr[j] = sbuf1[j] + 2*w3[j] + 1;
936: }
938: /* Parse the isrow and copy data into outbuf */
939: for (i=0; i<ismax; i++) {
940: PetscMemzero(ctr,size*sizeof(PetscInt));
941: irow_i = irow[i];
942: jmax = nrow[i];
943: for (j=0; j<jmax; j++) { /* parse the indices of each IS */
944: l = 0;
945: row = irow_i[j];
946: while (row >= C->rmap->range[l+1]) l++;
947: proc = l;
948: if (proc != rank) { /* copy to the outgoing buf*/
949: ctr[proc]++;
950: *ptr[proc] = row;
951: ptr[proc]++;
952: }
953: }
954: /* Update the headers for the current IS */
955: for (j=0; j<size; j++) { /* Can Optimise this loop too */
956: if ((ctr_j = ctr[j])) {
957: sbuf1_j = sbuf1[j];
958: k = ++sbuf1_j[0];
959: sbuf1_j[2*k] = ctr_j;
960: sbuf1_j[2*k-1] = i;
961: }
962: }
963: }
965: /* Now post the sends */
966: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
967: for (i=0; i<nrqs; ++i) {
968: j = pa[i];
969: MPI_Isend(sbuf1[j],w1[j],MPIU_INT,j,tag0,comm,s_waits1+i);
970: }
972: /* Post Receives to capture the buffer size */
973: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
974: PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf2);
975: rbuf2[0] = tmp + msz;
976: for (i=1; i<nrqs; ++i) {
977: rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
978: }
979: for (i=0; i<nrqs; ++i) {
980: j = pa[i];
981: MPI_Irecv(rbuf2[i],w1[j],MPIU_INT,j,tag1,comm,r_waits2+i);
982: }
984: /* Send to other procs the buf size they should allocate */
987: /* Receive messages*/
988: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
989: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
990: PetscMalloc3(nrqr,PetscInt*,&sbuf2,nrqr,PetscInt,&req_size,nrqr,PetscInt,&req_source);
991: {
992: Mat_SeqAIJ *sA = (Mat_SeqAIJ*)c->A->data,*sB = (Mat_SeqAIJ*)c->B->data;
993: PetscInt *sAi = sA->i,*sBi = sB->i,id,rstart = C->rmap->rstart;
994: PetscInt *sbuf2_i;
996: for (i=0; i<nrqr; ++i) {
997: MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);
999: req_size[idex] = 0;
1000: rbuf1_i = rbuf1[idex];
1001: start = 2*rbuf1_i[0] + 1;
1002: MPI_Get_count(r_status1+i,MPIU_INT,&end);
1003: PetscMalloc((end+1)*sizeof(PetscInt),&sbuf2[idex]);
1004: sbuf2_i = sbuf2[idex];
1005: for (j=start; j<end; j++) {
1006: id = rbuf1_i[j] - rstart;
1007: ncols = sAi[id+1] - sAi[id] + sBi[id+1] - sBi[id];
1008: sbuf2_i[j] = ncols;
1009: req_size[idex] += ncols;
1010: }
1011: req_source[idex] = r_status1[i].MPI_SOURCE;
1012: /* form the header */
1013: sbuf2_i[0] = req_size[idex];
1014: for (j=1; j<start; j++) sbuf2_i[j] = rbuf1_i[j];
1016: MPI_Isend(sbuf2_i,end,MPIU_INT,req_source[idex],tag1,comm,s_waits2+i);
1017: }
1018: }
1019: PetscFree(r_status1);
1020: PetscFree(r_waits1);
1022: /* recv buffer sizes */
1023: /* Receive messages*/
1025: PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf3);
1026: PetscMalloc((nrqs+1)*sizeof(PetscScalar*),&rbuf4);
1027: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
1028: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
1029: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);
1031: for (i=0; i<nrqs; ++i) {
1032: MPI_Waitany(nrqs,r_waits2,&idex,r_status2+i);
1033: PetscMalloc((rbuf2[idex][0]+1)*sizeof(PetscInt),&rbuf3[idex]);
1034: PetscMalloc((rbuf2[idex][0]+1)*sizeof(PetscScalar),&rbuf4[idex]);
1035: MPI_Irecv(rbuf3[idex],rbuf2[idex][0],MPIU_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idex);
1036: MPI_Irecv(rbuf4[idex],rbuf2[idex][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idex);
1037: }
1038: PetscFree(r_status2);
1039: PetscFree(r_waits2);
1041: /* Wait on sends1 and sends2 */
1042: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
1043: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);
1045: if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status1);}
1046: if (nrqr) {MPI_Waitall(nrqr,s_waits2,s_status2);}
1047: PetscFree(s_status1);
1048: PetscFree(s_status2);
1049: PetscFree(s_waits1);
1050: PetscFree(s_waits2);
1052: /* Now allocate buffers for a->j, and send them off */
1053: PetscMalloc((nrqr+1)*sizeof(PetscInt*),&sbuf_aj);
1054: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
1055: PetscMalloc((j+1)*sizeof(PetscInt),&sbuf_aj[0]);
1056: for (i=1; i<nrqr; i++) sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1];
1058: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
1059: {
1060: PetscInt nzA,nzB,*a_i = a->i,*b_i = b->i,lwrite;
1061: PetscInt *cworkA,*cworkB,cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray;
1062: PetscInt cend = C->cmap->rend;
1063: PetscInt *a_j = a->j,*b_j = b->j,ctmp;
1065: for (i=0; i<nrqr; i++) {
1066: rbuf1_i = rbuf1[i];
1067: sbuf_aj_i = sbuf_aj[i];
1068: ct1 = 2*rbuf1_i[0] + 1;
1069: ct2 = 0;
1070: for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
1071: kmax = rbuf1[i][2*j];
1072: for (k=0; k<kmax; k++,ct1++) {
1073: row = rbuf1_i[ct1] - rstart;
1074: nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row];
1075: ncols = nzA + nzB;
1076: cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row];
1078: /* load the column indices for this row into cols*/
1079: cols = sbuf_aj_i + ct2;
1081: lwrite = 0;
1082: for (l=0; l<nzB; l++) {
1083: if ((ctmp = bmap[cworkB[l]]) < cstart) cols[lwrite++] = ctmp;
1084: }
1085: for (l=0; l<nzA; l++) cols[lwrite++] = cstart + cworkA[l];
1086: for (l=0; l<nzB; l++) {
1087: if ((ctmp = bmap[cworkB[l]]) >= cend) cols[lwrite++] = ctmp;
1088: }
1090: ct2 += ncols;
1091: }
1092: }
1093: MPI_Isend(sbuf_aj_i,req_size[i],MPIU_INT,req_source[i],tag2,comm,s_waits3+i);
1094: }
1095: }
1096: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
1097: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);
1099: /* Allocate buffers for a->a, and send them off */
1100: PetscMalloc((nrqr+1)*sizeof(PetscScalar*),&sbuf_aa);
1101: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
1102: PetscMalloc((j+1)*sizeof(PetscScalar),&sbuf_aa[0]);
1103: for (i=1; i<nrqr; i++) sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1];
1105: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
1106: {
1107: PetscInt nzA,nzB,*a_i = a->i,*b_i = b->i, *cworkB,lwrite;
1108: PetscInt cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray;
1109: PetscInt cend = C->cmap->rend;
1110: PetscInt *b_j = b->j;
1111: PetscScalar *vworkA,*vworkB,*a_a = a->a,*b_a = b->a;
1113: for (i=0; i<nrqr; i++) {
1114: rbuf1_i = rbuf1[i];
1115: sbuf_aa_i = sbuf_aa[i];
1116: ct1 = 2*rbuf1_i[0]+1;
1117: ct2 = 0;
1118: for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
1119: kmax = rbuf1_i[2*j];
1120: for (k=0; k<kmax; k++,ct1++) {
1121: row = rbuf1_i[ct1] - rstart;
1122: nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row];
1123: ncols = nzA + nzB;
1124: cworkB = b_j + b_i[row];
1125: vworkA = a_a + a_i[row];
1126: vworkB = b_a + b_i[row];
1128: /* load the column values for this row into vals*/
1129: vals = sbuf_aa_i+ct2;
1131: lwrite = 0;
1132: for (l=0; l<nzB; l++) {
1133: if ((bmap[cworkB[l]]) < cstart) vals[lwrite++] = vworkB[l];
1134: }
1135: for (l=0; l<nzA; l++) vals[lwrite++] = vworkA[l];
1136: for (l=0; l<nzB; l++) {
1137: if ((bmap[cworkB[l]]) >= cend) vals[lwrite++] = vworkB[l];
1138: }
1140: ct2 += ncols;
1141: }
1142: }
1143: MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
1144: }
1145: }
1146: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
1147: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
1148: PetscFree(rbuf1[0]);
1149: PetscFree(rbuf1);
1151: /* Form the matrix */
1152: /* create col map: global col of C -> local col of submatrices */
1153: {
1154: const PetscInt *icol_i;
1155: #if defined(PETSC_USE_CTABLE)
1156: PetscMalloc((1+ismax)*sizeof(PetscTable),&cmap);
1157: for (i=0; i<ismax; i++) {
1158: if (!allcolumns[i]) {
1159: PetscTableCreate(ncol[i]+1,C->cmap->N+1,&cmap[i]);
1161: jmax = ncol[i];
1162: icol_i = icol[i];
1163: cmap_i = cmap[i];
1164: for (j=0; j<jmax; j++) {
1165: PetscTableAdd(cmap[i],icol_i[j]+1,j+1,INSERT_VALUES);
1166: }
1167: } else {
1168: cmap[i] = NULL;
1169: }
1170: }
1171: #else
1172: PetscMalloc(ismax*sizeof(PetscInt*),&cmap);
1173: for (i=0; i<ismax; i++) {
1174: if (!allcolumns[i]) {
1175: PetscMalloc(C->cmap->N*sizeof(PetscInt),&cmap[i]);
1176: PetscMemzero(cmap[i],C->cmap->N*sizeof(PetscInt));
1177: jmax = ncol[i];
1178: icol_i = icol[i];
1179: cmap_i = cmap[i];
1180: for (j=0; j<jmax; j++) {
1181: cmap_i[icol_i[j]] = j+1;
1182: }
1183: } else {
1184: cmap[i] = NULL;
1185: }
1186: }
1187: #endif
1188: }
1190: /* Create lens which is required for MatCreate... */
1191: for (i=0,j=0; i<ismax; i++) j += nrow[i];
1192: PetscMalloc(ismax*sizeof(PetscInt*),&lens);
1193: if (ismax) {
1194: PetscMalloc(j*sizeof(PetscInt),&lens[0]);
1195: PetscMemzero(lens[0],j*sizeof(PetscInt));
1196: }
1197: for (i=1; i<ismax; i++) lens[i] = lens[i-1] + nrow[i-1];
1199: /* Update lens from local data */
1200: for (i=0; i<ismax; i++) {
1201: jmax = nrow[i];
1202: if (!allcolumns[i]) cmap_i = cmap[i];
1203: irow_i = irow[i];
1204: lens_i = lens[i];
1205: for (j=0; j<jmax; j++) {
1206: l = 0;
1207: row = irow_i[j];
1208: while (row >= C->rmap->range[l+1]) l++;
1209: proc = l;
1210: if (proc == rank) {
1211: MatGetRow_MPIAIJ(C,row,&ncols,&cols,0);
1212: if (!allcolumns[i]) {
1213: for (k=0; k<ncols; k++) {
1214: #if defined(PETSC_USE_CTABLE)
1215: PetscTableFind(cmap_i,cols[k]+1,&tcol);
1216: #else
1217: tcol = cmap_i[cols[k]];
1218: #endif
1219: if (tcol) lens_i[j]++;
1220: }
1221: } else { /* allcolumns */
1222: lens_i[j] = ncols;
1223: }
1224: MatRestoreRow_MPIAIJ(C,row,&ncols,&cols,0);
1225: }
1226: }
1227: }
1229: /* Create row map: global row of C -> local row of submatrices */
1230: #if defined(PETSC_USE_CTABLE)
1231: PetscMalloc((1+ismax)*sizeof(PetscTable),&rmap);
1232: for (i=0; i<ismax; i++) {
1233: PetscTableCreate(nrow[i]+1,C->rmap->N+1,&rmap[i]);
1234: rmap_i = rmap[i];
1235: irow_i = irow[i];
1236: jmax = nrow[i];
1237: for (j=0; j<jmax; j++) {
1238: PetscTableAdd(rmap[i],irow_i[j]+1,j+1,INSERT_VALUES);
1239: }
1240: }
1241: #else
1242: PetscMalloc(ismax*sizeof(PetscInt*),&rmap);
1243: if (ismax) {
1244: PetscMalloc(ismax*C->rmap->N*sizeof(PetscInt),&rmap[0]);
1245: PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));
1246: }
1247: for (i=1; i<ismax; i++) rmap[i] = rmap[i-1] + C->rmap->N;
1248: for (i=0; i<ismax; i++) {
1249: rmap_i = rmap[i];
1250: irow_i = irow[i];
1251: jmax = nrow[i];
1252: for (j=0; j<jmax; j++) {
1253: rmap_i[irow_i[j]] = j;
1254: }
1255: }
1256: #endif
1258: /* Update lens from offproc data */
1259: {
1260: PetscInt *rbuf2_i,*rbuf3_i,*sbuf1_i;
1262: for (tmp2=0; tmp2<nrqs; tmp2++) {
1263: MPI_Waitany(nrqs,r_waits3,&idex2,r_status3+tmp2);
1264: idex = pa[idex2];
1265: sbuf1_i = sbuf1[idex];
1266: jmax = sbuf1_i[0];
1267: ct1 = 2*jmax+1;
1268: ct2 = 0;
1269: rbuf2_i = rbuf2[idex2];
1270: rbuf3_i = rbuf3[idex2];
1271: for (j=1; j<=jmax; j++) {
1272: is_no = sbuf1_i[2*j-1];
1273: max1 = sbuf1_i[2*j];
1274: lens_i = lens[is_no];
1275: if (!allcolumns[is_no]) cmap_i = cmap[is_no];
1276: rmap_i = rmap[is_no];
1277: for (k=0; k<max1; k++,ct1++) {
1278: #if defined(PETSC_USE_CTABLE)
1279: PetscTableFind(rmap_i,sbuf1_i[ct1]+1,&row);
1280: row--;
1281: if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table");
1282: #else
1283: row = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */
1284: #endif
1285: max2 = rbuf2_i[ct1];
1286: for (l=0; l<max2; l++,ct2++) {
1287: if (!allcolumns[is_no]) {
1288: #if defined(PETSC_USE_CTABLE)
1289: PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);
1290: #else
1291: tcol = cmap_i[rbuf3_i[ct2]];
1292: #endif
1293: if (tcol) lens_i[row]++;
1294: } else { /* allcolumns */
1295: lens_i[row]++; /* lens_i[row] += max2 ? */
1296: }
1297: }
1298: }
1299: }
1300: }
1301: }
1302: PetscFree(r_status3);
1303: PetscFree(r_waits3);
1304: if (nrqr) {MPI_Waitall(nrqr,s_waits3,s_status3);}
1305: PetscFree(s_status3);
1306: PetscFree(s_waits3);
1308: /* Create the submatrices */
1309: if (scall == MAT_REUSE_MATRIX) {
1310: PetscBool flag;
1312: /*
1313: Assumes new rows are same length as the old rows,hence bug!
1314: */
1315: for (i=0; i<ismax; i++) {
1316: mat = (Mat_SeqAIJ*)(submats[i]->data);
1317: if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
1318: PetscMemcmp(mat->ilen,lens[i],submats[i]->rmap->n*sizeof(PetscInt),&flag);
1319: if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
1320: /* Initial matrix as if empty */
1321: PetscMemzero(mat->ilen,submats[i]->rmap->n*sizeof(PetscInt));
1323: submats[i]->factortype = C->factortype;
1324: }
1325: } else {
1326: for (i=0; i<ismax; i++) {
1327: PetscInt rbs,cbs;
1328: ISGetBlockSize(isrow[i],&rbs);
1329: ISGetBlockSize(iscol[i],&cbs);
1331: MatCreate(PETSC_COMM_SELF,submats+i);
1332: MatSetSizes(submats[i],nrow[i],ncol[i],PETSC_DETERMINE,PETSC_DETERMINE);
1334: MatSetBlockSizes(submats[i],rbs,cbs);
1335: MatSetType(submats[i],((PetscObject)A)->type_name);
1336: MatSeqAIJSetPreallocation(submats[i],0,lens[i]);
1337: }
1338: }
1340: /* Assemble the matrices */
1341: /* First assemble the local rows */
1342: {
1343: PetscInt ilen_row,*imat_ilen,*imat_j,*imat_i,old_row;
1344: PetscScalar *imat_a;
1346: for (i=0; i<ismax; i++) {
1347: mat = (Mat_SeqAIJ*)submats[i]->data;
1348: imat_ilen = mat->ilen;
1349: imat_j = mat->j;
1350: imat_i = mat->i;
1351: imat_a = mat->a;
1353: if (!allcolumns[i]) cmap_i = cmap[i];
1354: rmap_i = rmap[i];
1355: irow_i = irow[i];
1356: jmax = nrow[i];
1357: for (j=0; j<jmax; j++) {
1358: l = 0;
1359: row = irow_i[j];
1360: while (row >= C->rmap->range[l+1]) l++;
1361: proc = l;
1362: if (proc == rank) {
1363: old_row = row;
1364: #if defined(PETSC_USE_CTABLE)
1365: PetscTableFind(rmap_i,row+1,&row);
1366: row--;
1367: #else
1368: row = rmap_i[row];
1369: #endif
1370: ilen_row = imat_ilen[row];
1371: MatGetRow_MPIAIJ(C,old_row,&ncols,&cols,&vals);
1372: mat_i = imat_i[row];
1373: mat_a = imat_a + mat_i;
1374: mat_j = imat_j + mat_i;
1375: if (!allcolumns[i]) {
1376: for (k=0; k<ncols; k++) {
1377: #if defined(PETSC_USE_CTABLE)
1378: PetscTableFind(cmap_i,cols[k]+1,&tcol);
1379: #else
1380: tcol = cmap_i[cols[k]];
1381: #endif
1382: if (tcol) {
1383: *mat_j++ = tcol - 1;
1384: *mat_a++ = vals[k];
1385: ilen_row++;
1386: }
1387: }
1388: } else { /* allcolumns */
1389: for (k=0; k<ncols; k++) {
1390: *mat_j++ = cols[k]; /* global col index! */
1391: *mat_a++ = vals[k];
1392: ilen_row++;
1393: }
1394: }
1395: MatRestoreRow_MPIAIJ(C,old_row,&ncols,&cols,&vals);
1397: imat_ilen[row] = ilen_row;
1398: }
1399: }
1400: }
1401: }
1403: /* Now assemble the off proc rows*/
1404: {
1405: PetscInt *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen;
1406: PetscInt *imat_j,*imat_i;
1407: PetscScalar *imat_a,*rbuf4_i;
1409: for (tmp2=0; tmp2<nrqs; tmp2++) {
1410: MPI_Waitany(nrqs,r_waits4,&idex2,r_status4+tmp2);
1411: idex = pa[idex2];
1412: sbuf1_i = sbuf1[idex];
1413: jmax = sbuf1_i[0];
1414: ct1 = 2*jmax + 1;
1415: ct2 = 0;
1416: rbuf2_i = rbuf2[idex2];
1417: rbuf3_i = rbuf3[idex2];
1418: rbuf4_i = rbuf4[idex2];
1419: for (j=1; j<=jmax; j++) {
1420: is_no = sbuf1_i[2*j-1];
1421: rmap_i = rmap[is_no];
1422: if (!allcolumns[is_no]) cmap_i = cmap[is_no];
1423: mat = (Mat_SeqAIJ*)submats[is_no]->data;
1424: imat_ilen = mat->ilen;
1425: imat_j = mat->j;
1426: imat_i = mat->i;
1427: imat_a = mat->a;
1428: max1 = sbuf1_i[2*j];
1429: for (k=0; k<max1; k++,ct1++) {
1430: row = sbuf1_i[ct1];
1431: #if defined(PETSC_USE_CTABLE)
1432: PetscTableFind(rmap_i,row+1,&row);
1433: row--;
1434: #else
1435: row = rmap_i[row];
1436: #endif
1437: ilen = imat_ilen[row];
1438: mat_i = imat_i[row];
1439: mat_a = imat_a + mat_i;
1440: mat_j = imat_j + mat_i;
1441: max2 = rbuf2_i[ct1];
1442: if (!allcolumns[is_no]) {
1443: for (l=0; l<max2; l++,ct2++) {
1445: #if defined(PETSC_USE_CTABLE)
1446: PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);
1447: #else
1448: tcol = cmap_i[rbuf3_i[ct2]];
1449: #endif
1450: if (tcol) {
1451: *mat_j++ = tcol - 1;
1452: *mat_a++ = rbuf4_i[ct2];
1453: ilen++;
1454: }
1455: }
1456: } else { /* allcolumns */
1457: for (l=0; l<max2; l++,ct2++) {
1458: *mat_j++ = rbuf3_i[ct2]; /* same global column index of C */
1459: *mat_a++ = rbuf4_i[ct2];
1460: ilen++;
1461: }
1462: }
1463: imat_ilen[row] = ilen;
1464: }
1465: }
1466: }
1467: }
1469: PetscFree(r_status4);
1470: PetscFree(r_waits4);
1471: if (nrqr) {MPI_Waitall(nrqr,s_waits4,s_status4);}
1472: PetscFree(s_waits4);
1473: PetscFree(s_status4);
1475: /* Restore the indices */
1476: for (i=0; i<ismax; i++) {
1477: ISRestoreIndices(isrow[i],irow+i);
1478: if (!allcolumns[i]) {
1479: ISRestoreIndices(iscol[i],icol+i);
1480: }
1481: }
1483: /* Destroy allocated memory */
1484: PetscFree4(irow,icol,nrow,ncol);
1485: PetscFree4(w1,w2,w3,w4);
1486: PetscFree(pa);
1488: PetscFree4(sbuf1,ptr,tmp,ctr);
1489: PetscFree(rbuf2);
1490: for (i=0; i<nrqr; ++i) {
1491: PetscFree(sbuf2[i]);
1492: }
1493: for (i=0; i<nrqs; ++i) {
1494: PetscFree(rbuf3[i]);
1495: PetscFree(rbuf4[i]);
1496: }
1498: PetscFree3(sbuf2,req_size,req_source);
1499: PetscFree(rbuf3);
1500: PetscFree(rbuf4);
1501: PetscFree(sbuf_aj[0]);
1502: PetscFree(sbuf_aj);
1503: PetscFree(sbuf_aa[0]);
1504: PetscFree(sbuf_aa);
1506: #if defined(PETSC_USE_CTABLE)
1507: for (i=0; i<ismax; i++) {PetscTableDestroy((PetscTable*)&rmap[i]);}
1508: #else
1509: if (ismax) {PetscFree(rmap[0]);}
1510: #endif
1511: PetscFree(rmap);
1513: for (i=0; i<ismax; i++) {
1514: if (!allcolumns[i]) {
1515: #if defined(PETSC_USE_CTABLE)
1516: PetscTableDestroy((PetscTable*)&cmap[i]);
1517: #else
1518: PetscFree(cmap[i]);
1519: #endif
1520: }
1521: }
1522: PetscFree(cmap);
1523: if (ismax) {PetscFree(lens[0]);}
1524: PetscFree(lens);
1526: for (i=0; i<ismax; i++) {
1527: MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);
1528: MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);
1529: }
1530: return(0);
1531: }
1533: /*
1534: Observe that the Seq matrices used to construct this MPI matrix are not increfed.
1535: Be careful not to destroy them elsewhere.
1536: */
1539: PetscErrorCode MatCreateMPIAIJFromSeqMatrices_Private(MPI_Comm comm, Mat A, Mat B, Mat *C)
1540: {
1541: /* If making this function public, change the error returned in this function away from _PLIB. */
1543: Mat_MPIAIJ *aij;
1544: PetscBool seqaij;
1547: /* Check to make sure the component matrices are compatible with C. */
1548: PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &seqaij);
1549: if (!seqaij) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Diagonal matrix is of wrong type");
1550: PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &seqaij);
1551: if (!seqaij) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Off-diagonal matrix is of wrong type");
1552: if (A->rmap->n != B->rmap->n || A->rmap->bs != B->rmap->bs || A->cmap->bs != B->cmap->bs) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incompatible component matrices of an MPIAIJ matrix");
1554: MatCreate(comm, C);
1555: MatSetSizes(*C,A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE);
1556: MatSetBlockSizes(*C,A->rmap->bs, A->cmap->bs);
1557: PetscLayoutSetUp((*C)->rmap);
1558: PetscLayoutSetUp((*C)->cmap);
1559: if ((*C)->cmap->N != A->cmap->n + B->cmap->n) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incompatible component matrices of an MPIAIJ matrix");
1560: MatSetType(*C, MATMPIAIJ);
1561: aij = (Mat_MPIAIJ*)((*C)->data);
1562: aij->A = A;
1563: aij->B = B;
1564: PetscLogObjectParent(*C,A);
1565: PetscLogObjectParent(*C,B);
1567: (*C)->preallocated = (PetscBool)(A->preallocated && B->preallocated);
1568: (*C)->assembled = (PetscBool)(A->assembled && B->assembled);
1569: return(0);
1570: }
1574: PetscErrorCode MatMPIAIJExtractSeqMatrices_Private(Mat C, Mat *A, Mat *B)
1575: {
1576: Mat_MPIAIJ *aij = (Mat_MPIAIJ*) (C->data);
1581: *A = aij->A;
1582: *B = aij->B;
1583: /* Note that we don't incref *A and *B, so be careful! */
1584: return(0);
1585: }
1589: PetscErrorCode MatGetSubMatricesParallel_MPIXAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[],
1590: PetscErrorCode(*getsubmats_seq)(Mat, PetscInt, const IS[], const IS[], MatReuse, Mat**),
1591: PetscErrorCode(*makefromseq)(MPI_Comm, Mat, Mat,Mat*),
1592: PetscErrorCode(*extractseq)(Mat, Mat*, Mat*))
1593: {
1595: PetscMPIInt size, flag;
1596: PetscInt i,ii;
1597: PetscInt ismax_c;
1600: if (!ismax) return(0);
1602: for (i = 0, ismax_c = 0; i < ismax; ++i) {
1603: MPI_Comm_compare(((PetscObject)isrow[i])->comm,((PetscObject)iscol[i])->comm, &flag);
1604: if (flag != MPI_IDENT) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column index sets must have the same communicator");
1605: MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1606: if (size > 1) ++ismax_c;
1607: }
1608: if (!ismax_c) { /* Sequential ISs only, so can call the sequential matrix extraction subroutine. */
1609: (*getsubmats_seq)(C,ismax,isrow,iscol,scall,submat);
1610: } else { /* if (ismax_c) */
1611: Mat *A,*B;
1612: IS *isrow_c, *iscol_c;
1613: PetscMPIInt size;
1614: /*
1615: Allocate the necessary arrays to hold the resulting parallel matrices as well as the intermediate
1616: array of sequential matrices underlying the resulting parallel matrices.
1617: Which arrays to allocate is based on the value of MatReuse scall.
1618: There are as many diag matrices as there are original index sets.
1619: There are only as many parallel and off-diag matrices, as there are parallel (comm size > 1) index sets.
1621: Sequential matrix arrays are allocated in any event: even if the array of parallel matrices already exists,
1622: we need to consolidate the underlying seq matrices into as single array to serve as placeholders into getsubmats_seq
1623: will deposite the extracted diag and off-diag parts.
1624: However, if reuse is taking place, we have to allocate the seq matrix arrays here.
1625: If reuse is NOT taking place, then the seq matrix arrays are allocated by getsubmats_seq.
1626: */
1628: /* Parallel matrix array is allocated only if no reuse is taking place. */
1629: if (scall != MAT_REUSE_MATRIX) {
1630: PetscMalloc((ismax)*sizeof(Mat),submat);
1631: } else {
1632: PetscMalloc(ismax*sizeof(Mat), &A);
1633: PetscMalloc(ismax_c*sizeof(Mat), &B);
1634: /* If parallel matrices are being reused, then simply reuse the underlying seq matrices as well. */
1635: for (i = 0, ii = 0; i < ismax; ++i) {
1636: MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1637: if (size > 1) {
1638: (*extractseq)((*submat)[i],A+i,B+ii);
1639: ++ii;
1640: } else A[i] = (*submat)[i];
1641: }
1642: }
1643: /*
1644: Construct the complements of the iscol ISs for parallel ISs only.
1645: These are used to extract the off-diag portion of the resulting parallel matrix.
1646: The row IS for the off-diag portion is the same as for the diag portion,
1647: so we merely alias the row IS, while skipping those that are sequential.
1648: */
1649: PetscMalloc2(ismax_c,IS,&isrow_c, ismax_c, IS, &iscol_c);
1650: for (i = 0, ii = 0; i < ismax; ++i) {
1651: MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1652: if (size > 1) {
1653: isrow_c[ii] = isrow[i];
1655: ISGetNonlocalIS(iscol[i], &(iscol_c[ii]));
1656: ++ii;
1657: }
1658: }
1659: /* Now obtain the sequential A and B submatrices separately. */
1660: (*getsubmats_seq)(C,ismax,isrow, iscol,scall, &A);
1661: (*getsubmats_seq)(C,ismax_c,isrow_c, iscol_c,scall, &B);
1662: for (ii = 0; ii < ismax_c; ++ii) {
1663: ISDestroy(&iscol_c[ii]);
1664: }
1665: PetscFree2(isrow_c, iscol_c);
1666: /*
1667: If scall == MAT_REUSE_MATRIX, we are done, since the sequential matrices A & B
1668: have been extracted directly into the parallel matrices containing them, or
1669: simply into the sequential matrix identical with the corresponding A (if size == 1).
1670: Otherwise, make sure that parallel matrices are constructed from A & B, or the
1671: A is put into the correct submat slot (if size == 1).
1672: */
1673: if (scall != MAT_REUSE_MATRIX) {
1674: for (i = 0, ii = 0; i < ismax; ++i) {
1675: MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1676: if (size > 1) {
1677: /*
1678: For each parallel isrow[i], create parallel matrices from the extracted sequential matrices.
1679: */
1680: /* Construct submat[i] from the Seq pieces A and B. */
1681: (*makefromseq)(((PetscObject)isrow[i])->comm, A[i], B[ii], (*submat)+i);
1683: ++ii;
1684: } else (*submat)[i] = A[i];
1685: }
1686: }
1687: PetscFree(A);
1688: PetscFree(B);
1689: }
1690: return(0);
1691: } /* MatGetSubMatricesParallel_MPIXAIJ() */
1697: PetscErrorCode MatGetSubMatricesParallel_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
1698: {
1702: MatGetSubMatricesParallel_MPIXAIJ(C,ismax,isrow,iscol,scall,submat,MatGetSubMatrices_MPIAIJ,MatCreateMPIAIJFromSeqMatrices_Private,MatMPIAIJExtractSeqMatrices_Private);
1703: return(0);
1704: }