Actual source code: vpscat.h

petsc-3.4.2 2013-07-02
  2: /*
  3:      Defines the methods VecScatterBegin/End_1,2,......
  4:      This is included by vpscat.c with different values for BS

  6:      This is a terrible way of doing "templates" in C.
  7: */
  8: #define PETSCMAP1_a(a,b)  a ## _ ## b
  9: #define PETSCMAP1_b(a,b)  PETSCMAP1_a(a,b)
 10: #define PETSCMAP1(a)      PETSCMAP1_b(a,BS)

 14: PetscErrorCode PETSCMAP1(VecScatterBegin)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
 15: {
 16:   VecScatter_MPI_General *to,*from;
 17:   PetscScalar            *xv,*yv,*svalues;
 18:   MPI_Request            *rwaits,*swaits;
 19:   PetscErrorCode         ierr;
 20:   PetscInt               i,*indices,*sstarts,nrecvs,nsends,bs;

 23:   if (mode & SCATTER_REVERSE) {
 24:     to     = (VecScatter_MPI_General*)ctx->fromdata;
 25:     from   = (VecScatter_MPI_General*)ctx->todata;
 26:     rwaits = from->rev_requests;
 27:     swaits = to->rev_requests;
 28:   } else {
 29:     to     = (VecScatter_MPI_General*)ctx->todata;
 30:     from   = (VecScatter_MPI_General*)ctx->fromdata;
 31:     rwaits = from->requests;
 32:     swaits = to->requests;
 33:   }
 34:   bs      = to->bs;
 35:   svalues = to->values;
 36:   nrecvs  = from->n;
 37:   nsends  = to->n;
 38:   indices = to->indices;
 39:   sstarts = to->starts;
 40: #if defined(PETSC_HAVE_CUSP)

 42: #if defined(PETSC_HAVE_TXPETSCGPU)

 44: #if 0
 45:   /* This branch messages the entire vector */
 46:   VecGetArrayRead(xin,(const PetscScalar**)&xv);
 47: #else
 48:   /*
 49:    This branch messages only the parts that are necessary.
 50:    ... this seems to perform about the same due to the necessity of calling
 51:        a separate kernel before the SpMV for gathering data into
 52:        a contiguous buffer. We leaves both branches in for the time being.
 53:        I expect that ultimately this branch will be the right choice, however
 54:        the just is still out.
 55:    */
 56:   if (xin->valid_GPU_array == PETSC_CUSP_GPU) {
 57:     if (xin->spptr && ctx->spptr) {
 58:       VecCUSPCopyFromGPUSome_Public(xin,(PetscCUSPIndices)ctx->spptr);
 59:     }
 60:   }
 61: #endif
 62:   xv = *(PetscScalar**)xin->data;

 64: #else
 65:   if (!xin->map->n || ((xin->map->n > 10000) && (sstarts[nsends]*bs < 0.05*xin->map->n) && (xin->valid_GPU_array == PETSC_CUSP_GPU) && !(to->local.n))) {
 66:     if (!ctx->spptr) {
 67:       PetscInt k,*tindices,n = sstarts[nsends],*sindices;
 68:       PetscMalloc(n*sizeof(PetscInt),&tindices);
 69:       PetscMemcpy(tindices,to->indices,n*sizeof(PetscInt));
 70:       PetscSortRemoveDupsInt(&n,tindices);
 71:       PetscMalloc(bs*n*sizeof(PetscInt),&sindices);
 72:       for (i=0; i<n; i++) {
 73:         for (k=0; k<bs; k++) sindices[i*bs+k] = tindices[i]+k;
 74:       }
 75:       PetscFree(tindices);
 76:       PetscCUSPIndicesCreate(n*bs,sindices,n*bs,sindices,(PetscCUSPIndices*)&ctx->spptr);
 77:       PetscFree(sindices);
 78:     }
 79:     VecCUSPCopyFromGPUSome_Public(xin,(PetscCUSPIndices)ctx->spptr);
 80:     xv   = *(PetscScalar**)xin->data;
 81:   } else {
 82:     VecGetArrayRead(xin,(const PetscScalar**)&xv);
 83:   }
 84: #endif

 86: #else
 87:   VecGetArrayRead(xin,(const PetscScalar**)&xv);
 88: #endif

 90:   if (xin != yin) {VecGetArray(yin,&yv);}
 91:   else yv = xv;

 93:   if (!(mode & SCATTER_LOCAL)) {
 94:     if (!from->use_readyreceiver && !to->sendfirst && !to->use_alltoallv  & !to->use_window) {
 95:       /* post receives since they were not previously posted    */
 96:       if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
 97:     }

 99: #if defined(PETSC_HAVE_MPI_ALLTOALLW)  && !defined(PETSC_USE_64BIT_INDICES)
100:     if (to->use_alltoallw && addv == INSERT_VALUES) {
101:       MPI_Alltoallw(xv,to->wcounts,to->wdispls,to->types,yv,from->wcounts,from->wdispls,from->types,PetscObjectComm((PetscObject)ctx));
102:     } else
103: #endif
104:     if (ctx->packtogether || to->use_alltoallv || to->use_window) {
105:       /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
106:       PETSCMAP1(Pack)(sstarts[nsends],indices,xv,svalues);
107:       if (to->use_alltoallv) {
108:         MPI_Alltoallv(to->values,to->counts,to->displs,MPIU_SCALAR,from->values,from->counts,from->displs,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
109: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
110:       } else if (to->use_window) {
111:         PetscInt cnt;

113:         MPI_Win_fence(0,from->window);
114:         for (i=0; i<nsends; i++) {
115:           cnt  = bs*(to->starts[i+1]-to->starts[i]);
116:           MPI_Put(to->values+bs*to->starts[i],cnt,MPIU_SCALAR,to->procs[i],bs*to->winstarts[i],cnt,MPIU_SCALAR,from->window);
117:         }
118: #endif
119:       } else if (nsends) {
120:         MPI_Startall_isend(to->starts[to->n],nsends,swaits);
121:       }
122:     } else {
123:       /* this version packs and sends one at a time */
124:       for (i=0; i<nsends; i++) {
125:         PETSCMAP1(Pack)(sstarts[i+1]-sstarts[i],indices + sstarts[i],xv,svalues + bs*sstarts[i]);
126:         MPI_Start_isend(sstarts[i+1]-sstarts[i],swaits+i);
127:       }
128:     }

130:     if (!from->use_readyreceiver && to->sendfirst && !to->use_alltoallv && !to->use_window) {
131:       /* post receives since they were not previously posted   */
132:       if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
133:     }
134:   }

136:   /* take care of local scatters */
137:   if (to->local.n) {
138:     if (to->local.is_copy && addv == INSERT_VALUES) {
139:       PetscMemcpy(yv + from->local.copy_start,xv + to->local.copy_start,to->local.copy_length);
140:     } else {
141:       PETSCMAP1(Scatter)(to->local.n,to->local.vslots,xv,from->local.vslots,yv,addv);
142:     }
143:   }
144: #if defined(PETSC_HAVE_CUSP)
145:   if (xin->valid_GPU_array != PETSC_CUSP_GPU) {
146:     VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
147:   } else {
148:     VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
149:   }
150: #else
151:   VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
152: #endif
153:   if (xin != yin) {VecRestoreArray(yin,&yv);}
154:   return(0);
155: }

157: /* --------------------------------------------------------------------------------------*/

161: PetscErrorCode PETSCMAP1(VecScatterEnd)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
162: {
163:   VecScatter_MPI_General *to,*from;
164:   PetscScalar            *rvalues,*yv;
165:   PetscErrorCode         ierr;
166:   PetscInt               nrecvs,nsends,*indices,count,*rstarts,bs;
167:   PetscMPIInt            imdex;
168:   MPI_Request            *rwaits,*swaits;
169:   MPI_Status             xrstatus,*rstatus,*sstatus;

172:   if (mode & SCATTER_LOCAL) return(0);
173:   VecGetArray(yin,&yv);

175:   to      = (VecScatter_MPI_General*)ctx->todata;
176:   from    = (VecScatter_MPI_General*)ctx->fromdata;
177:   rwaits  = from->requests;
178:   swaits  = to->requests;
179:   sstatus = to->sstatus;    /* sstatus and rstatus are always stored in to */
180:   rstatus = to->rstatus;
181:   if (mode & SCATTER_REVERSE) {
182:     to     = (VecScatter_MPI_General*)ctx->fromdata;
183:     from   = (VecScatter_MPI_General*)ctx->todata;
184:     rwaits = from->rev_requests;
185:     swaits = to->rev_requests;
186:   }
187:   bs      = from->bs;
188:   rvalues = from->values;
189:   nrecvs  = from->n;
190:   nsends  = to->n;
191:   indices = from->indices;
192:   rstarts = from->starts;

194:   if (ctx->packtogether || (to->use_alltoallw && (addv != INSERT_VALUES)) || (to->use_alltoallv && !to->use_alltoallw) || to->use_window) {
195: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
196:     if (to->use_window) {MPI_Win_fence(0,from->window);}
197:     else
198: #endif
199:     if (nrecvs && !to->use_alltoallv) {MPI_Waitall(nrecvs,rwaits,rstatus);}
200:     PETSCMAP1(UnPack)(from->starts[from->n],from->values,indices,yv,addv);
201:   } else if (!to->use_alltoallw) {
202:     /* unpack one at a time */
203:     count = nrecvs;
204:     while (count) {
205:       if (ctx->reproduce) {
206:         imdex = count - 1;
207:         MPI_Wait(rwaits+imdex,&xrstatus);
208:       } else {
209:         MPI_Waitany(nrecvs,rwaits,&imdex,&xrstatus);
210:       }
211:       /* unpack receives into our local space */
212:       PETSCMAP1(UnPack)(rstarts[imdex+1] - rstarts[imdex],rvalues + bs*rstarts[imdex],indices + rstarts[imdex],yv,addv);
213:       count--;
214:     }
215:   }
216:   if (from->use_readyreceiver) {
217:     if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
218:     MPI_Barrier(PetscObjectComm((PetscObject)ctx));
219:   }

221:   /* wait on sends */
222:   if (nsends  && !to->use_alltoallv  && !to->use_window) {MPI_Waitall(nsends,swaits,sstatus);}
223:   VecRestoreArray(yin,&yv);
224: #if defined(PETSC_HAVE_TXPETSCGPU)
225:   if (yin->valid_GPU_array == PETSC_CUSP_CPU) {
226:     if (yin->spptr && ctx->spptr) {
227:       VecCUSPCopyToGPUSome_Public(yin,(PetscCUSPIndices)ctx->spptr);
228:     }
229:   }
230: #endif
231:   return(0);
232: }

234: #undef PETSCMAP1_a
235: #undef PETSCMAP1_b
236: #undef PETSCMAP1
237: #undef BS