Actual source code: dvec2.c

petsc-3.4.2 2013-07-02
  2: /*
  3:    Defines some vector operation functions that are shared by
  4:   sequential and parallel vectors.
  5: */
  6: #include <../src/vec/vec/impls/dvecimpl.h>
  7: #include <petsc-private/kernels/petscaxpy.h>
  8: #include <petscthreadcomm.h>

 10: #if defined(PETSC_THREADCOMM_ACTIVE)
 11: PetscErrorCode VecMDot_kernel(PetscInt thread_id,Vec xin,PetscInt *nvp,Vec *yvec,PetscThreadCommReduction red)
 12: {
 14:   PetscInt       *trstarts=xin->map->trstarts;
 15:   PetscInt       start,end,nv=*nvp,i,j;
 16:   PetscScalar    *xx,*yy;
 17:   PetscScalar    sum;

 19:   start = trstarts[thread_id];
 20:   end   = trstarts[thread_id+1];
 21:   VecGetArray(xin,&xx);
 22:   for (i=0; i<nv; i++) {
 23:     sum  = 0.;
 24:     VecGetArray(yvec[i],&yy);
 25:     for (j=start; j<end; j++) sum += xx[j]*PetscConj(yy[j]);
 26:     PetscThreadReductionKernelPost(thread_id,red,&sum);
 27:     VecRestoreArray(yvec[i],&yy);
 28:   }
 29:   VecRestoreArray(xin,&xx);

 31:   return 0;
 32: }

 36: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 37: {
 38:   PetscErrorCode           ierr;
 39:   PetscThreadCommReduction red;
 40:   PetscInt                 *nvp,nreds,i,j;

 43:   PetscThreadCommGetInts(PetscObjectComm((PetscObject)xin),&nvp,NULL,NULL);
 44:   for (i=0; i<nv; i+=nreds) {
 45:     nreds = PetscMin(nv-i,PETSC_REDUCTIONS_MAX);
 46:     PetscThreadReductionBegin(PetscObjectComm((PetscObject)xin),THREADCOMM_SUM,PETSC_SCALAR,nreds,&red);
 47:     *nvp  = nreds;
 48:     PetscThreadCommRunKernel4(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecMDot_kernel,xin,nvp,(Vec*)yin+i,red);
 49:     for (j=0; j<nreds; j++) {
 50:       PetscThreadReductionEnd(red,&z[i+j]);
 51:     }
 52:   }
 53:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
 54:   return(0);
 55: }

 57: #else
 58: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
 59: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
 62: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 63: {
 64:   PetscErrorCode    ierr;
 65:   PetscInt          i,nv_rem,n = xin->map->n;
 66:   PetscScalar       sum0,sum1,sum2,sum3;
 67:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
 68:   Vec               *yy;

 71:   sum0 = 0.0;
 72:   sum1 = 0.0;
 73:   sum2 = 0.0;

 75:   i      = nv;
 76:   nv_rem = nv&0x3;
 77:   yy     = (Vec*)yin;
 78:   VecGetArrayRead(xin,&x);

 80:   switch (nv_rem) {
 81:   case 3:
 82:     VecGetArrayRead(yy[0],&yy0);
 83:     VecGetArrayRead(yy[1],&yy1);
 84:     VecGetArrayRead(yy[2],&yy2);
 85:     fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
 86:     VecRestoreArrayRead(yy[0],&yy0);
 87:     VecRestoreArrayRead(yy[1],&yy1);
 88:     VecRestoreArrayRead(yy[2],&yy2);
 89:     z[0] = sum0;
 90:     z[1] = sum1;
 91:     z[2] = sum2;
 92:     break;
 93:   case 2:
 94:     VecGetArrayRead(yy[0],&yy0);
 95:     VecGetArrayRead(yy[1],&yy1);
 96:     fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
 97:     VecRestoreArrayRead(yy[0],&yy0);
 98:     VecRestoreArrayRead(yy[1],&yy1);
 99:     z[0] = sum0;
100:     z[1] = sum1;
101:     break;
102:   case 1:
103:     VecGetArrayRead(yy[0],&yy0);
104:     fortranmdot1_(x,yy0,&n,&sum0);
105:     VecRestoreArrayRead(yy[0],&yy0);
106:     z[0] = sum0;
107:     break;
108:   case 0:
109:     break;
110:   }
111:   z  += nv_rem;
112:   i  -= nv_rem;
113:   yy += nv_rem;

115:   while (i >0) {
116:     sum0 = 0.;
117:     sum1 = 0.;
118:     sum2 = 0.;
119:     sum3 = 0.;
120:     VecGetArrayRead(yy[0],&yy0);
121:     VecGetArrayRead(yy[1],&yy1);
122:     VecGetArrayRead(yy[2],&yy2);
123:     VecGetArrayRead(yy[3],&yy3);
124:     fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
125:     VecRestoreArrayRead(yy[0],&yy0);
126:     VecRestoreArrayRead(yy[1],&yy1);
127:     VecRestoreArrayRead(yy[2],&yy2);
128:     VecRestoreArrayRead(yy[3],&yy3);
129:     yy  += 4;
130:     z[0] = sum0;
131:     z[1] = sum1;
132:     z[2] = sum2;
133:     z[3] = sum3;
134:     z   += 4;
135:     i   -= 4;
136:   }
137:   VecRestoreArrayRead(xin,&x);
138:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
139:   return(0);
140: }

142: #else
145: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
146: {
147:   PetscErrorCode    ierr;
148:   PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
149:   PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
150:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
151:   Vec               *yy;

154:   sum0 = 0.;
155:   sum1 = 0.;
156:   sum2 = 0.;

158:   i      = nv;
159:   nv_rem = nv&0x3;
160:   yy     = (Vec*)yin;
161:   j      = n;
162:   VecGetArrayRead(xin,&xbase);
163:   x      = xbase;

165:   switch (nv_rem) {
166:   case 3:
167:     VecGetArrayRead(yy[0],&yy0);
168:     VecGetArrayRead(yy[1],&yy1);
169:     VecGetArrayRead(yy[2],&yy2);
170:     switch (j_rem=j&0x3) {
171:     case 3:
172:       x2    = x[2];
173:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
174:       sum2 += x2*PetscConj(yy2[2]);
175:     case 2:
176:       x1    = x[1];
177:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
178:       sum2 += x1*PetscConj(yy2[1]);
179:     case 1:
180:       x0    = x[0];
181:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
182:       sum2 += x0*PetscConj(yy2[0]);
183:     case 0:
184:       x   += j_rem;
185:       yy0 += j_rem;
186:       yy1 += j_rem;
187:       yy2 += j_rem;
188:       j   -= j_rem;
189:       break;
190:     }
191:     while (j>0) {
192:       x0 = x[0];
193:       x1 = x[1];
194:       x2 = x[2];
195:       x3 = x[3];
196:       x += 4;

198:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
199:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
200:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
201:       j    -= 4;
202:     }
203:     z[0] = sum0;
204:     z[1] = sum1;
205:     z[2] = sum2;
206:     VecRestoreArrayRead(yy[0],&yy0);
207:     VecRestoreArrayRead(yy[1],&yy1);
208:     VecRestoreArrayRead(yy[2],&yy2);
209:     break;
210:   case 2:
211:     VecGetArrayRead(yy[0],&yy0);
212:     VecGetArrayRead(yy[1],&yy1);
213:     switch (j_rem=j&0x3) {
214:     case 3:
215:       x2    = x[2];
216:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
217:     case 2:
218:       x1    = x[1];
219:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
220:     case 1:
221:       x0    = x[0];
222:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
223:     case 0:
224:       x   += j_rem;
225:       yy0 += j_rem;
226:       yy1 += j_rem;
227:       j   -= j_rem;
228:       break;
229:     }
230:     while (j>0) {
231:       x0 = x[0];
232:       x1 = x[1];
233:       x2 = x[2];
234:       x3 = x[3];
235:       x += 4;

237:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
238:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
239:       j    -= 4;
240:     }
241:     z[0] = sum0;
242:     z[1] = sum1;

244:     VecRestoreArrayRead(yy[0],&yy0);
245:     VecRestoreArrayRead(yy[1],&yy1);
246:     break;
247:   case 1:
248:     VecGetArrayRead(yy[0],&yy0);
249:     switch (j_rem=j&0x3) {
250:     case 3:
251:       x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
252:     case 2:
253:       x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
254:     case 1:
255:       x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
256:     case 0:
257:       x   += j_rem;
258:       yy0 += j_rem;
259:       j   -= j_rem;
260:       break;
261:     }
262:     while (j>0) {
263:       sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
264:             + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
265:       yy0  +=4;
266:       j    -= 4; x+=4;
267:     }
268:     z[0] = sum0;

270:     VecRestoreArrayRead(yy[0],&yy0);
271:     break;
272:   case 0:
273:     break;
274:   }
275:   z  += nv_rem;
276:   i  -= nv_rem;
277:   yy += nv_rem;

279:   while (i >0) {
280:     sum0 = 0.;
281:     sum1 = 0.;
282:     sum2 = 0.;
283:     sum3 = 0.;
284:     VecGetArrayRead(yy[0],&yy0);
285:     VecGetArrayRead(yy[1],&yy1);
286:     VecGetArrayRead(yy[2],&yy2);
287:     VecGetArrayRead(yy[3],&yy3);

289:     j = n;
290:     x = xbase;
291:     switch (j_rem=j&0x3) {
292:     case 3:
293:       x2    = x[2];
294:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
295:       sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
296:     case 2:
297:       x1    = x[1];
298:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
299:       sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
300:     case 1:
301:       x0    = x[0];
302:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
303:       sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
304:     case 0:
305:       x   += j_rem;
306:       yy0 += j_rem;
307:       yy1 += j_rem;
308:       yy2 += j_rem;
309:       yy3 += j_rem;
310:       j   -= j_rem;
311:       break;
312:     }
313:     while (j>0) {
314:       x0 = x[0];
315:       x1 = x[1];
316:       x2 = x[2];
317:       x3 = x[3];
318:       x += 4;

320:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
321:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
322:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
323:       sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
324:       j    -= 4;
325:     }
326:     z[0] = sum0;
327:     z[1] = sum1;
328:     z[2] = sum2;
329:     z[3] = sum3;
330:     z   += 4;
331:     i   -= 4;
332:     VecRestoreArrayRead(yy[0],&yy0);
333:     VecRestoreArrayRead(yy[1],&yy1);
334:     VecRestoreArrayRead(yy[2],&yy2);
335:     VecRestoreArrayRead(yy[3],&yy3);
336:     yy  += 4;
337:   }
338:   VecRestoreArrayRead(xin,&xbase);
339:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
340:   return(0);
341: }
342: #endif
343: #endif
344: /* ----------------------------------------------------------------------------*/
347: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
348: {
349:   PetscErrorCode    ierr;
350:   PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
351:   PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
352:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
353:   Vec               *yy;

356:   sum0 = 0.;
357:   sum1 = 0.;
358:   sum2 = 0.;

360:   i      = nv;
361:   nv_rem = nv&0x3;
362:   yy     = (Vec*)yin;
363:   j      = n;
364:   VecGetArrayRead(xin,&x);

366:   switch (nv_rem) {
367:   case 3:
368:     VecGetArrayRead(yy[0],&yy0);
369:     VecGetArrayRead(yy[1],&yy1);
370:     VecGetArrayRead(yy[2],&yy2);
371:     switch (j_rem=j&0x3) {
372:     case 3:
373:       x2    = x[2];
374:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
375:       sum2 += x2*yy2[2];
376:     case 2:
377:       x1    = x[1];
378:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
379:       sum2 += x1*yy2[1];
380:     case 1:
381:       x0    = x[0];
382:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
383:       sum2 += x0*yy2[0];
384:     case 0:
385:       x   += j_rem;
386:       yy0 += j_rem;
387:       yy1 += j_rem;
388:       yy2 += j_rem;
389:       j   -= j_rem;
390:       break;
391:     }
392:     while (j>0) {
393:       x0 = x[0];
394:       x1 = x[1];
395:       x2 = x[2];
396:       x3 = x[3];
397:       x += 4;

399:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
400:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
401:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
402:       j    -= 4;
403:     }
404:     z[0] = sum0;
405:     z[1] = sum1;
406:     z[2] = sum2;
407:     VecRestoreArrayRead(yy[0],&yy0);
408:     VecRestoreArrayRead(yy[1],&yy1);
409:     VecRestoreArrayRead(yy[2],&yy2);
410:     break;
411:   case 2:
412:     VecGetArrayRead(yy[0],&yy0);
413:     VecGetArrayRead(yy[1],&yy1);
414:     switch (j_rem=j&0x3) {
415:     case 3:
416:       x2    = x[2];
417:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
418:     case 2:
419:       x1    = x[1];
420:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
421:     case 1:
422:       x0    = x[0];
423:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
424:     case 0:
425:       x   += j_rem;
426:       yy0 += j_rem;
427:       yy1 += j_rem;
428:       j   -= j_rem;
429:       break;
430:     }
431:     while (j>0) {
432:       x0 = x[0];
433:       x1 = x[1];
434:       x2 = x[2];
435:       x3 = x[3];
436:       x += 4;

438:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
439:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
440:       j    -= 4;
441:     }
442:     z[0] = sum0;
443:     z[1] = sum1;

445:     VecRestoreArrayRead(yy[0],&yy0);
446:     VecRestoreArrayRead(yy[1],&yy1);
447:     break;
448:   case 1:
449:     VecGetArrayRead(yy[0],&yy0);
450:     switch (j_rem=j&0x3) {
451:     case 3:
452:       x2 = x[2]; sum0 += x2*yy0[2];
453:     case 2:
454:       x1 = x[1]; sum0 += x1*yy0[1];
455:     case 1:
456:       x0 = x[0]; sum0 += x0*yy0[0];
457:     case 0:
458:       x   += j_rem;
459:       yy0 += j_rem;
460:       j   -= j_rem;
461:       break;
462:     }
463:     while (j>0) {
464:       sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
465:       j    -= 4; x+=4;
466:     }
467:     z[0] = sum0;

469:     VecRestoreArrayRead(yy[0],&yy0);
470:     break;
471:   case 0:
472:     break;
473:   }
474:   z  += nv_rem;
475:   i  -= nv_rem;
476:   yy += nv_rem;

478:   while (i >0) {
479:     sum0 = 0.;
480:     sum1 = 0.;
481:     sum2 = 0.;
482:     sum3 = 0.;
483:     VecGetArrayRead(yy[0],&yy0);
484:     VecGetArrayRead(yy[1],&yy1);
485:     VecGetArrayRead(yy[2],&yy2);
486:     VecGetArrayRead(yy[3],&yy3);

488:     j = n;
489:     switch (j_rem=j&0x3) {
490:     case 3:
491:       x2    = x[2];
492:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
493:       sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
494:     case 2:
495:       x1    = x[1];
496:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
497:       sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
498:     case 1:
499:       x0    = x[0];
500:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
501:       sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
502:     case 0:
503:       x   += j_rem;
504:       yy0 += j_rem;
505:       yy1 += j_rem;
506:       yy2 += j_rem;
507:       yy3 += j_rem;
508:       j   -= j_rem;
509:       break;
510:     }
511:     while (j>0) {
512:       x0 = x[0];
513:       x1 = x[1];
514:       x2 = x[2];
515:       x3 = x[3];
516:       x += 4;

518:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
519:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
520:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
521:       sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
522:       j    -= 4;
523:     }
524:     z[0] = sum0;
525:     z[1] = sum1;
526:     z[2] = sum2;
527:     z[3] = sum3;
528:     z   += 4;
529:     i   -= 4;
530:     VecRestoreArrayRead(yy[0],&yy0);
531:     VecRestoreArrayRead(yy[1],&yy1);
532:     VecRestoreArrayRead(yy[2],&yy2);
533:     VecRestoreArrayRead(yy[3],&yy3);
534:     yy  += 4;
535:   }
536:   VecRestoreArrayRead(xin,&x);
537:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
538:   return(0);
539: }

541: #if defined(PETSC_THREADCOMM_ACTIVE)
542: PetscErrorCode VecMax_kernel(PetscInt thread_id,Vec xin,PetscThreadCommReduction red)
543: {
544:   PetscErrorCode    ierr;
545:   PetscInt          *trstarts=xin->map->trstarts;
546:   PetscInt          start,end,i;
547:   const PetscScalar *xx;
548:   PetscReal         lred[2],tmp;

550:   start   = trstarts[thread_id];
551:   end     = trstarts[thread_id+1];
552:   VecGetArrayRead(xin,&xx);
553:   lred[0] = xx[start]; lred[1] = start;
554:   for (i=start+1; i<end; i++) {
555:     if ((tmp = PetscRealPart(xx[i])) > lred[0]) { lred[0] = tmp; lred[1] = i;}
556:   }
557:   VecRestoreArrayRead(xin,&xx);
558:   PetscThreadReductionKernelPost(thread_id,red,lred);
559:   return 0;
560: }

564: PetscErrorCode VecMax_Seq(Vec xin,PetscInt *idx,PetscReal *z)
565: {
566:   PetscErrorCode           ierr;
567:   PetscInt                 n=xin->map->n;
568:   PetscThreadCommReduction red;
569:   PetscReal                out[2];

572:   if (!n) {
573:     *z   = PETSC_MIN_REAL;
574:     *idx = -1;
575:   } else {
576:     PetscThreadReductionBegin(PetscObjectComm((PetscObject)xin),THREADCOMM_MAXLOC,PETSC_REAL,1,&red);
577:     PetscThreadCommRunKernel2(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecMax_kernel,xin,red);
578:     PetscThreadReductionEnd(red,out);
579:     *z   = out[0];
580:     if (idx) *idx = (PetscInt)out[1];
581:   }
582:   return(0);
583: }
584: #else
587: PetscErrorCode VecMax_Seq(Vec xin,PetscInt *idx,PetscReal *z)
588: {
589:   PetscInt          i,j=0,n = xin->map->n;
590:   PetscReal         max,tmp;
591:   const PetscScalar *xx;
592:   PetscErrorCode    ierr;

595:   VecGetArrayRead(xin,&xx);
596:   if (!n) {
597:     max = PETSC_MIN_REAL;
598:     j   = -1;
599:   } else {
600:     max = PetscRealPart(*xx++); j = 0;
601:     for (i=1; i<n; i++) {
602:       if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
603:     }
604:   }
605:   *z = max;
606:   if (idx) *idx = j;
607:   VecRestoreArrayRead(xin,&xx);
608:   return(0);
609: }
610: #endif

612: #if defined(PETSC_THREADCOMM_ACTIVE)
613: PetscErrorCode VecMin_kernel(PetscInt thread_id,Vec xin,PetscThreadCommReduction red)
614: {
615:   PetscErrorCode    ierr;
616:   PetscInt          *trstarts=xin->map->trstarts;
617:   PetscInt          start,end,i;
618:   const PetscScalar *xx;
619:   PetscReal         lred[2],tmp;

621:   start   = trstarts[thread_id];
622:   end     = trstarts[thread_id+1];
623:   VecGetArrayRead(xin,&xx);
624:   lred[0] = xx[start]; lred[1] = start;
625:   for (i=start+1;i<end;i++) {
626:     if ((tmp = PetscRealPart(xx[i])) < lred[0]) { lred[0] = tmp; lred[1] = i;}
627:   }
628:   VecRestoreArrayRead(xin,&xx);
629:   PetscThreadReductionKernelPost(thread_id,red,lred);
630:   return 0;
631: }

635: PetscErrorCode VecMin_Seq(Vec xin,PetscInt *idx,PetscReal *z)
636: {
637:   PetscErrorCode           ierr;
638:   PetscInt                 n=xin->map->n;
639:   PetscThreadCommReduction red;
640:   PetscReal                out[2];

643:   if (!n) {
644:     *z   = PETSC_MAX_REAL;
645:     *idx = -1;
646:   } else {
647:     PetscThreadReductionBegin(PetscObjectComm((PetscObject)xin),THREADCOMM_MINLOC,PETSC_REAL,1,&red);
648:     PetscThreadCommRunKernel2(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecMin_kernel,xin,red);
649:     PetscThreadReductionEnd(red,out);
650:     *z   = out[0];
651:     if (idx) *idx = (PetscInt)out[1];
652:   }
653:   return(0);
654: }
655: #else
658: PetscErrorCode VecMin_Seq(Vec xin,PetscInt *idx,PetscReal *z)
659: {
660:   PetscInt          i,j=0,n = xin->map->n;
661:   PetscReal         min,tmp;
662:   const PetscScalar *xx;
663:   PetscErrorCode    ierr;

666:   VecGetArrayRead(xin,&xx);
667:   if (!n) {
668:     min = PETSC_MAX_REAL;
669:     j   = -1;
670:   } else {
671:     min = PetscRealPart(*xx++); j = 0;
672:     for (i=1; i<n; i++) {
673:       if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
674:     }
675:   }
676:   *z = min;
677:   if (idx) *idx = j;
678:   VecGetArrayRead(xin,&xx);
679:   return(0);
680: }
681: #endif

683: #if defined(PETSC_THREADCOMM_ACTIVE)
684: PetscErrorCode VecSet_kernel(PetscInt thread_id,Vec xin,PetscScalar *alpha_p)
685: {
687:   PetscScalar    *xx;
688:   PetscInt       i,start,end;
689:   PetscInt       *trstarts=xin->map->trstarts;
690:   PetscScalar    alpha    = *alpha_p;

692:   start = trstarts[thread_id];
693:   end   = trstarts[thread_id+1];
694:   VecGetArray(xin,&xx);
695:   if (alpha == (PetscScalar)0.0) {
696:     PetscMemzero(xx+start,(end-start)*sizeof(PetscScalar));
697:   } else {
698:     for (i=start; i<end; i++) xx[i] = alpha;
699:   }
700:   VecRestoreArray(xin,&xx);
701:   return 0;
702: }

706: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
707: {
709:   PetscScalar    *scalar;

712:   PetscThreadCommGetScalars(PetscObjectComm((PetscObject)xin),&scalar,NULL,NULL);
713:   *scalar = alpha;
714:   PetscThreadCommRunKernel2(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecSet_kernel,xin,scalar);
715:   return(0);
716: }
717: #else
720: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
721: {
722:   PetscInt       i,n = xin->map->n;
723:   PetscScalar    *xx;

727:   VecGetArray(xin,&xx);
728:   if (alpha == (PetscScalar)0.0) {
729:     PetscMemzero(xx,n*sizeof(PetscScalar));
730:   } else {
731:     for (i=0; i<n; i++) xx[i] = alpha;
732:   }
733:   VecRestoreArray(xin,&xx);
734:   return(0);
735: }
736: #endif

738: #if defined(PETSC_THREADCOMM_ACTIVE)
739: PetscErrorCode VecMAXPY_kernel(PetscInt thread_id,Vec xin,PetscInt *nv_p,const PetscScalar *alpha,Vec *y)
740: {
741:   PetscErrorCode    ierr;
742:   PetscInt          *trstarts=xin->map->trstarts,j,j_rem,nv=*nv_p;
743:   PetscInt          start,end,n;
744:   const PetscScalar *yy0,*yy1,*yy2,*yy3;
745:   PetscScalar       *xx,*x,alpha0,alpha1,alpha2,alpha3;
746:   const PetscScalar *y0,*y1,*y2,*y3;


749:   start = trstarts[thread_id];
750:   end   = trstarts[thread_id+1];
751:   n     = end-start;
752: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
753: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
754: #endif

756:   VecGetArray(xin,&xx);
757:   x    = xx+start;
758:   switch (j_rem=nv&0x3) {
759:   case 3:
760:     VecGetArrayRead(y[0],&yy0);
761:     VecGetArrayRead(y[1],&yy1);
762:     VecGetArrayRead(y[2],&yy2);
763:     alpha0 = alpha[0];
764:     alpha1 = alpha[1];
765:     alpha2 = alpha[2];
766:     alpha += 3;
767:     y0 = yy0 + start; y1 = yy1+start; y2 = yy2+start;
768:     PetscKernelAXPY3(x,alpha0,alpha1,alpha2,y0,y1,y2,n);
769:     VecRestoreArrayRead(y[0],&yy0);
770:     VecRestoreArrayRead(y[1],&yy1);
771:     VecRestoreArrayRead(y[2],&yy2);
772:     y   += 3;
773:     break;
774:   case 2:
775:     VecGetArrayRead(y[0],&yy0);
776:     VecGetArrayRead(y[1],&yy1);
777:     alpha0 = alpha[0];
778:     alpha1 = alpha[1];
779:     alpha +=2;
780:     y0 = yy0+start; y1=yy1+start;
781:     PetscKernelAXPY2(x,alpha0,alpha1,y0,y1,n);
782:     VecRestoreArrayRead(y[0],&yy0);
783:     VecRestoreArrayRead(y[1],&yy1);
784:     y   +=2;
785:     break;
786:   case 1:
787:     VecGetArrayRead(y[0],&yy0);
788:     alpha0 = *alpha++;
789:     y0     = yy0 + start;
790:     PetscKernelAXPY(x,alpha0,y0,n);
791:     VecRestoreArrayRead(y[0],&yy0);
792:     y   +=1;
793:     break;
794:   }
795:   for (j=j_rem; j<nv; j+=4) {
796:     VecGetArrayRead(y[0],&yy0);
797:     VecGetArrayRead(y[1],&yy1);
798:     VecGetArrayRead(y[2],&yy2);
799:     VecGetArrayRead(y[3],&yy3);
800:     alpha0 = alpha[0];
801:     alpha1 = alpha[1];
802:     alpha2 = alpha[2];
803:     alpha3 = alpha[3];
804:     alpha += 4;

806:     y0 = yy0+start; y1=yy1+start; y2=yy2+start; y3=yy3+start;
807:     PetscKernelAXPY4(x,alpha0,alpha1,alpha2,alpha3,y0,y1,y2,y3,n);
808:     VecRestoreArrayRead(y[0],&yy0);
809:     VecRestoreArrayRead(y[1],&yy1);
810:     VecRestoreArrayRead(y[2],&yy2);
811:     VecRestoreArrayRead(y[3],&yy3);
812:     y   += 4;
813:   }
814:   VecRestoreArray(xin,&xx);
815:   return 0;
816: }

820: PetscErrorCode VecMAXPY_Seq(Vec xin,PetscInt nv,const PetscScalar *alpha,Vec *y)
821: {
823:   PetscInt       *int1;

826:   PetscLogFlops(nv*2.0*xin->map->n);
827:   PetscThreadCommGetInts(PetscObjectComm((PetscObject)xin),&int1,NULL,NULL);
828:   *int1 = nv;
829:   PetscThreadCommRunKernel4(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecMAXPY_kernel,xin,int1,(void*)alpha,y);
830:   return(0);
831: }
832: #else
835: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
836: {
837:   PetscErrorCode    ierr;
838:   PetscInt          n = xin->map->n,j,j_rem;
839:   const PetscScalar *yy0,*yy1,*yy2,*yy3;
840:   PetscScalar       *xx,alpha0,alpha1,alpha2,alpha3;

842: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
843: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
844: #endif

847:   PetscLogFlops(nv*2.0*n);
848:   VecGetArray(xin,&xx);
849:   switch (j_rem=nv&0x3) {
850:   case 3:
851:     VecGetArrayRead(y[0],&yy0);
852:     VecGetArrayRead(y[1],&yy1);
853:     VecGetArrayRead(y[2],&yy2);
854:     alpha0 = alpha[0];
855:     alpha1 = alpha[1];
856:     alpha2 = alpha[2];
857:     alpha += 3;
858:     PetscKernelAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
859:     VecRestoreArrayRead(y[0],&yy0);
860:     VecRestoreArrayRead(y[1],&yy1);
861:     VecRestoreArrayRead(y[2],&yy2);
862:     y   += 3;
863:     break;
864:   case 2:
865:     VecGetArrayRead(y[0],&yy0);
866:     VecGetArrayRead(y[1],&yy1);
867:     alpha0 = alpha[0];
868:     alpha1 = alpha[1];
869:     alpha +=2;
870:     PetscKernelAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
871:     VecRestoreArrayRead(y[0],&yy0);
872:     VecRestoreArrayRead(y[1],&yy1);
873:     y   +=2;
874:     break;
875:   case 1:
876:     VecGetArrayRead(y[0],&yy0);
877:     alpha0 = *alpha++;
878:     PetscKernelAXPY(xx,alpha0,yy0,n);
879:     VecRestoreArrayRead(y[0],&yy0);
880:     y   +=1;
881:     break;
882:   }
883:   for (j=j_rem; j<nv; j+=4) {
884:     VecGetArrayRead(y[0],&yy0);
885:     VecGetArrayRead(y[1],&yy1);
886:     VecGetArrayRead(y[2],&yy2);
887:     VecGetArrayRead(y[3],&yy3);
888:     alpha0 = alpha[0];
889:     alpha1 = alpha[1];
890:     alpha2 = alpha[2];
891:     alpha3 = alpha[3];
892:     alpha += 4;

894:     PetscKernelAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
895:     VecRestoreArrayRead(y[0],&yy0);
896:     VecRestoreArrayRead(y[1],&yy1);
897:     VecRestoreArrayRead(y[2],&yy2);
898:     VecRestoreArrayRead(y[3],&yy3);
899:     y   += 4;
900:   }
901:   VecRestoreArray(xin,&xx);
902:   return(0);
903: }
904: #endif

906: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>
907: #if defined(PETSC_THREADCOMM_ACTIVE)
908: PetscErrorCode VecAYPX_kernel(PetscInt thread_id,Vec yin,PetscScalar *alpha_p,Vec xin)
909: {
910:   PetscErrorCode    ierr;
911:   PetscScalar       *yy;
912:   const PetscScalar *xx;
913:   PetscInt          *trstarts=yin->map->trstarts,i;
914:   PetscScalar       alpha = *alpha_p;

916:   VecGetArrayRead(xin,&xx);
917:   VecGetArray(yin,&yy);

919:   if (alpha == (PetscScalar)-1.0) {
920:     for (i=trstarts[thread_id]; i < trstarts[thread_id+1]; i++) {
921:       yy[i] = xx[i] - yy[i];
922:     }
923:   } else {
924: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
925:       PetscInt start=trstarts[thread_id],end=trstarts[thread_id+1],n;
926:       PetscScalar oalpha = alpha;
927:       n = end - start;
928:       fortranaypx_(&n,&oalpha,xx+start,yy+start);
929:   }
930: #else
931:     for (i=trstarts[thread_id]; i < trstarts[thread_id+1];i++) {
932:       yy[i] = xx[i] + alpha*yy[i];
933:     }
934:   }
935: #endif
936:   VecRestoreArrayRead(xin,&xx);
937:   VecRestoreArray(yin,&yy);
938:   return 0;
939: }
942: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
943: {
944:   PetscErrorCode    ierr;
945:   PetscInt          n=yin->map->n;

948:   if (alpha == (PetscScalar)0.0) {
949:     VecCopy(xin,yin);
950:   } else if (alpha == (PetscScalar)1.0) {
951:     VecAXPY_Seq(yin,alpha,xin);
952:   } else {
953:     PetscScalar *scal1;
954:     PetscThreadCommGetScalars(PetscObjectComm((PetscObject)yin),&scal1,NULL,NULL);
955:     *scal1 = alpha;
956:     PetscThreadCommRunKernel3(PetscObjectComm((PetscObject)yin),(PetscThreadKernel)VecAYPX_kernel,yin,scal1,xin);
957:     if (alpha == (PetscScalar)-1.0) {
958:       PetscLogFlops(1.0*n);
959:     } else {
960:       PetscLogFlops(2.0*n);
961:     }
962:   }
963:   return(0);
964: }
965: #else
968: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
969: {
970:   PetscErrorCode    ierr;
971:   PetscInt          n = yin->map->n;
972:   PetscScalar       *yy;
973:   const PetscScalar *xx;

976:   if (alpha == (PetscScalar)0.0) {
977:     VecCopy(xin,yin);
978:   } else if (alpha == (PetscScalar)1.0) {
979:     VecAXPY_Seq(yin,alpha,xin);
980:   } else if (alpha == (PetscScalar)-1.0) {
981:     PetscInt i;
982:     VecGetArrayRead(xin,&xx);
983:     VecGetArray(yin,&yy);

985:     for (i=0; i<n; i++) yy[i] = xx[i] - yy[i];

987:     VecRestoreArrayRead(xin,&xx);
988:     VecRestoreArray(yin,&yy);
989:     PetscLogFlops(1.0*n);
990:   } else {
991:     VecGetArrayRead(xin,&xx);
992:     VecGetArray(yin,&yy);
993: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
994:     {
995:       PetscScalar oalpha = alpha;
996:       fortranaypx_(&n,&oalpha,xx,yy);
997:     }
998: #else
999:     {
1000:       PetscInt i;

1002:       for (i=0; i<n; i++) yy[i] = xx[i] + alpha*yy[i];
1003:     }
1004: #endif
1005:     VecRestoreArrayRead(xin,&xx);
1006:     VecRestoreArray(yin,&yy);
1007:     PetscLogFlops(2.0*n);
1008:   }
1009:   return(0);
1010: }
1011: #endif

1013: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
1014: /*
1015:    IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
1016:   to be slower than a regular C loop.  Hence,we do not include it.
1017:   void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
1018: */
1019: #if defined(PETSC_THREADCOMM_ACTIVE)
1020: PetscErrorCode VecWAXPY_kernel(PetscInt thread_id,Vec win,PetscScalar *alpha_p,Vec xin,Vec yin)
1021: {
1022:   PetscErrorCode    ierr;
1023:   PetscScalar       alpha=*alpha_p;
1024:   PetscInt          *trstarts=win->map->trstarts,start,end,i;
1025:   const PetscScalar *xx,*yy;
1026:   PetscScalar       *ww;

1028:   start = trstarts[thread_id];
1029:   end   = trstarts[thread_id+1];

1031:   VecGetArrayRead(xin,&xx);
1032:   VecGetArrayRead(yin,&yy);
1033:   VecGetArray(win,&ww);
1034:   if (alpha == (PetscScalar)1.0) {
1035:     for (i=start; i < end; i++) ww[i] = yy[i] + xx[i];
1036:   } else if (alpha == (PetscScalar)-1.0) {
1037:     for (i=start; i < end; i++) ww[i] = yy[i] - xx[i];
1038:   } else if (alpha == (PetscScalar)0.0) {
1039:     PetscMemcpy(ww+start,yy+start,(end-start)*sizeof(PetscScalar));
1040:   } else {
1041:     PetscScalar oalpha = alpha;
1042: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
1043:     fortranwaxpy_(&n,&oalpha,xx+start,yy+start,ww+start);
1044: #else
1045:     for (i=start; i < end; i++) ww[i] = yy[i] + oalpha*xx[i];
1046: #endif
1047:   }
1048:   VecRestoreArrayRead(xin,&xx);
1049:   VecRestoreArrayRead(yin,&yy);
1050:   VecRestoreArray(win,&ww);
1051:   return 0;
1052: }

1056: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
1057: {
1058:   PetscErrorCode     ierr;
1059:   PetscInt           n = win->map->n;
1060:   PetscScalar        *scal1;

1063:   PetscThreadCommGetScalars(PetscObjectComm((PetscObject)win),&scal1,NULL,NULL);
1064:   *scal1 = alpha;
1065:   PetscThreadCommRunKernel4(PetscObjectComm((PetscObject)win),(PetscThreadKernel)VecWAXPY_kernel,win,scal1,xin,yin);
1066:   if (alpha == (PetscScalar)1.0) {
1067:     PetscLogFlops(n);
1068:   } else if (alpha == (PetscScalar)-1.0) {
1069:     PetscLogFlops(n);
1070:   } else {
1071:     PetscLogFlops(2.0*n);
1072:   }
1073:   return(0);
1074: }
1075: #else
1078: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
1079: {
1080:   PetscErrorCode     ierr;
1081:   PetscInt           i,n = win->map->n;
1082:   PetscScalar        *ww;
1083:   const PetscScalar  *yy,*xx;

1086:   VecGetArrayRead(xin,&xx);
1087:   VecGetArrayRead(yin,&yy);
1088:   VecGetArray(win,&ww);
1089:   if (alpha == (PetscScalar)1.0) {
1090:     PetscLogFlops(n);
1091:     /* could call BLAS axpy after call to memcopy, but may be slower */
1092:     for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
1093:   } else if (alpha == (PetscScalar)-1.0) {
1094:     PetscLogFlops(n);
1095:     for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
1096:   } else if (alpha == (PetscScalar)0.0) {
1097:     PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
1098:   } else {
1099:     PetscScalar oalpha = alpha;
1100: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
1101:     fortranwaxpy_(&n,&oalpha,xx,yy,ww);
1102: #else
1103:     for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
1104: #endif
1105:     PetscLogFlops(2.0*n);
1106:   }
1107:   VecRestoreArrayRead(xin,&xx);
1108:   VecRestoreArrayRead(yin,&yy);
1109:   VecRestoreArray(win,&ww);
1110:   return(0);
1111: }
1112: #endif

1116: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
1117: {
1118:   PetscErrorCode    ierr;
1119:   PetscInt          n = xin->map->n,i;
1120:   const PetscScalar *xx,*yy;
1121:   PetscReal         m = 0.0;

1124:   VecGetArrayRead(xin,&xx);
1125:   VecGetArrayRead(yin,&yy);
1126:   for (i = 0; i < n; i++) {
1127:     if (yy[i] != (PetscScalar)0.0) {
1128:       m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
1129:     } else {
1130:       m = PetscMax(PetscAbsScalar(xx[i]), m);
1131:     }
1132:   }
1133:   VecRestoreArrayRead(xin,&xx);
1134:   VecRestoreArrayRead(yin,&yy);
1135:   MPI_Allreduce(&m,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
1136:   PetscLogFlops(n);
1137:   return(0);
1138: }

1142: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
1143: {
1144:   Vec_Seq *v = (Vec_Seq*)vin->data;

1147:   if (v->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
1148:   v->unplacedarray = v->array;  /* save previous array so reset can bring it back */
1149:   v->array         = (PetscScalar*)a;
1150:   return(0);
1151: }

1155: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
1156: {
1157:   Vec_Seq        *v = (Vec_Seq*)vin->data;

1161:   PetscFree(v->array_allocated);
1162:   v->array_allocated = v->array = (PetscScalar*)a;
1163:   return(0);
1164: }