Actual source code: dvec2.c
petsc-3.4.2 2013-07-02
2: /*
3: Defines some vector operation functions that are shared by
4: sequential and parallel vectors.
5: */
6: #include <../src/vec/vec/impls/dvecimpl.h>
7: #include <petsc-private/kernels/petscaxpy.h>
8: #include <petscthreadcomm.h>
10: #if defined(PETSC_THREADCOMM_ACTIVE)
11: PetscErrorCode VecMDot_kernel(PetscInt thread_id,Vec xin,PetscInt *nvp,Vec *yvec,PetscThreadCommReduction red)
12: {
14: PetscInt *trstarts=xin->map->trstarts;
15: PetscInt start,end,nv=*nvp,i,j;
16: PetscScalar *xx,*yy;
17: PetscScalar sum;
19: start = trstarts[thread_id];
20: end = trstarts[thread_id+1];
21: VecGetArray(xin,&xx);
22: for (i=0; i<nv; i++) {
23: sum = 0.;
24: VecGetArray(yvec[i],&yy);
25: for (j=start; j<end; j++) sum += xx[j]*PetscConj(yy[j]);
26: PetscThreadReductionKernelPost(thread_id,red,&sum);
27: VecRestoreArray(yvec[i],&yy);
28: }
29: VecRestoreArray(xin,&xx);
31: return 0;
32: }
36: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
37: {
38: PetscErrorCode ierr;
39: PetscThreadCommReduction red;
40: PetscInt *nvp,nreds,i,j;
43: PetscThreadCommGetInts(PetscObjectComm((PetscObject)xin),&nvp,NULL,NULL);
44: for (i=0; i<nv; i+=nreds) {
45: nreds = PetscMin(nv-i,PETSC_REDUCTIONS_MAX);
46: PetscThreadReductionBegin(PetscObjectComm((PetscObject)xin),THREADCOMM_SUM,PETSC_SCALAR,nreds,&red);
47: *nvp = nreds;
48: PetscThreadCommRunKernel4(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecMDot_kernel,xin,nvp,(Vec*)yin+i,red);
49: for (j=0; j<nreds; j++) {
50: PetscThreadReductionEnd(red,&z[i+j]);
51: }
52: }
53: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
54: return(0);
55: }
57: #else
58: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
59: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
62: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
63: {
64: PetscErrorCode ierr;
65: PetscInt i,nv_rem,n = xin->map->n;
66: PetscScalar sum0,sum1,sum2,sum3;
67: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
68: Vec *yy;
71: sum0 = 0.0;
72: sum1 = 0.0;
73: sum2 = 0.0;
75: i = nv;
76: nv_rem = nv&0x3;
77: yy = (Vec*)yin;
78: VecGetArrayRead(xin,&x);
80: switch (nv_rem) {
81: case 3:
82: VecGetArrayRead(yy[0],&yy0);
83: VecGetArrayRead(yy[1],&yy1);
84: VecGetArrayRead(yy[2],&yy2);
85: fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
86: VecRestoreArrayRead(yy[0],&yy0);
87: VecRestoreArrayRead(yy[1],&yy1);
88: VecRestoreArrayRead(yy[2],&yy2);
89: z[0] = sum0;
90: z[1] = sum1;
91: z[2] = sum2;
92: break;
93: case 2:
94: VecGetArrayRead(yy[0],&yy0);
95: VecGetArrayRead(yy[1],&yy1);
96: fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
97: VecRestoreArrayRead(yy[0],&yy0);
98: VecRestoreArrayRead(yy[1],&yy1);
99: z[0] = sum0;
100: z[1] = sum1;
101: break;
102: case 1:
103: VecGetArrayRead(yy[0],&yy0);
104: fortranmdot1_(x,yy0,&n,&sum0);
105: VecRestoreArrayRead(yy[0],&yy0);
106: z[0] = sum0;
107: break;
108: case 0:
109: break;
110: }
111: z += nv_rem;
112: i -= nv_rem;
113: yy += nv_rem;
115: while (i >0) {
116: sum0 = 0.;
117: sum1 = 0.;
118: sum2 = 0.;
119: sum3 = 0.;
120: VecGetArrayRead(yy[0],&yy0);
121: VecGetArrayRead(yy[1],&yy1);
122: VecGetArrayRead(yy[2],&yy2);
123: VecGetArrayRead(yy[3],&yy3);
124: fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
125: VecRestoreArrayRead(yy[0],&yy0);
126: VecRestoreArrayRead(yy[1],&yy1);
127: VecRestoreArrayRead(yy[2],&yy2);
128: VecRestoreArrayRead(yy[3],&yy3);
129: yy += 4;
130: z[0] = sum0;
131: z[1] = sum1;
132: z[2] = sum2;
133: z[3] = sum3;
134: z += 4;
135: i -= 4;
136: }
137: VecRestoreArrayRead(xin,&x);
138: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
139: return(0);
140: }
142: #else
145: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
146: {
147: PetscErrorCode ierr;
148: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
149: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
150: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
151: Vec *yy;
154: sum0 = 0.;
155: sum1 = 0.;
156: sum2 = 0.;
158: i = nv;
159: nv_rem = nv&0x3;
160: yy = (Vec*)yin;
161: j = n;
162: VecGetArrayRead(xin,&xbase);
163: x = xbase;
165: switch (nv_rem) {
166: case 3:
167: VecGetArrayRead(yy[0],&yy0);
168: VecGetArrayRead(yy[1],&yy1);
169: VecGetArrayRead(yy[2],&yy2);
170: switch (j_rem=j&0x3) {
171: case 3:
172: x2 = x[2];
173: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
174: sum2 += x2*PetscConj(yy2[2]);
175: case 2:
176: x1 = x[1];
177: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
178: sum2 += x1*PetscConj(yy2[1]);
179: case 1:
180: x0 = x[0];
181: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
182: sum2 += x0*PetscConj(yy2[0]);
183: case 0:
184: x += j_rem;
185: yy0 += j_rem;
186: yy1 += j_rem;
187: yy2 += j_rem;
188: j -= j_rem;
189: break;
190: }
191: while (j>0) {
192: x0 = x[0];
193: x1 = x[1];
194: x2 = x[2];
195: x3 = x[3];
196: x += 4;
198: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
199: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
200: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
201: j -= 4;
202: }
203: z[0] = sum0;
204: z[1] = sum1;
205: z[2] = sum2;
206: VecRestoreArrayRead(yy[0],&yy0);
207: VecRestoreArrayRead(yy[1],&yy1);
208: VecRestoreArrayRead(yy[2],&yy2);
209: break;
210: case 2:
211: VecGetArrayRead(yy[0],&yy0);
212: VecGetArrayRead(yy[1],&yy1);
213: switch (j_rem=j&0x3) {
214: case 3:
215: x2 = x[2];
216: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
217: case 2:
218: x1 = x[1];
219: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
220: case 1:
221: x0 = x[0];
222: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
223: case 0:
224: x += j_rem;
225: yy0 += j_rem;
226: yy1 += j_rem;
227: j -= j_rem;
228: break;
229: }
230: while (j>0) {
231: x0 = x[0];
232: x1 = x[1];
233: x2 = x[2];
234: x3 = x[3];
235: x += 4;
237: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
238: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
239: j -= 4;
240: }
241: z[0] = sum0;
242: z[1] = sum1;
244: VecRestoreArrayRead(yy[0],&yy0);
245: VecRestoreArrayRead(yy[1],&yy1);
246: break;
247: case 1:
248: VecGetArrayRead(yy[0],&yy0);
249: switch (j_rem=j&0x3) {
250: case 3:
251: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
252: case 2:
253: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
254: case 1:
255: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
256: case 0:
257: x += j_rem;
258: yy0 += j_rem;
259: j -= j_rem;
260: break;
261: }
262: while (j>0) {
263: sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
264: + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
265: yy0 +=4;
266: j -= 4; x+=4;
267: }
268: z[0] = sum0;
270: VecRestoreArrayRead(yy[0],&yy0);
271: break;
272: case 0:
273: break;
274: }
275: z += nv_rem;
276: i -= nv_rem;
277: yy += nv_rem;
279: while (i >0) {
280: sum0 = 0.;
281: sum1 = 0.;
282: sum2 = 0.;
283: sum3 = 0.;
284: VecGetArrayRead(yy[0],&yy0);
285: VecGetArrayRead(yy[1],&yy1);
286: VecGetArrayRead(yy[2],&yy2);
287: VecGetArrayRead(yy[3],&yy3);
289: j = n;
290: x = xbase;
291: switch (j_rem=j&0x3) {
292: case 3:
293: x2 = x[2];
294: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
295: sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
296: case 2:
297: x1 = x[1];
298: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
299: sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
300: case 1:
301: x0 = x[0];
302: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
303: sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
304: case 0:
305: x += j_rem;
306: yy0 += j_rem;
307: yy1 += j_rem;
308: yy2 += j_rem;
309: yy3 += j_rem;
310: j -= j_rem;
311: break;
312: }
313: while (j>0) {
314: x0 = x[0];
315: x1 = x[1];
316: x2 = x[2];
317: x3 = x[3];
318: x += 4;
320: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
321: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
322: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
323: sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
324: j -= 4;
325: }
326: z[0] = sum0;
327: z[1] = sum1;
328: z[2] = sum2;
329: z[3] = sum3;
330: z += 4;
331: i -= 4;
332: VecRestoreArrayRead(yy[0],&yy0);
333: VecRestoreArrayRead(yy[1],&yy1);
334: VecRestoreArrayRead(yy[2],&yy2);
335: VecRestoreArrayRead(yy[3],&yy3);
336: yy += 4;
337: }
338: VecRestoreArrayRead(xin,&xbase);
339: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
340: return(0);
341: }
342: #endif
343: #endif
344: /* ----------------------------------------------------------------------------*/
347: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
348: {
349: PetscErrorCode ierr;
350: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
351: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
352: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
353: Vec *yy;
356: sum0 = 0.;
357: sum1 = 0.;
358: sum2 = 0.;
360: i = nv;
361: nv_rem = nv&0x3;
362: yy = (Vec*)yin;
363: j = n;
364: VecGetArrayRead(xin,&x);
366: switch (nv_rem) {
367: case 3:
368: VecGetArrayRead(yy[0],&yy0);
369: VecGetArrayRead(yy[1],&yy1);
370: VecGetArrayRead(yy[2],&yy2);
371: switch (j_rem=j&0x3) {
372: case 3:
373: x2 = x[2];
374: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
375: sum2 += x2*yy2[2];
376: case 2:
377: x1 = x[1];
378: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
379: sum2 += x1*yy2[1];
380: case 1:
381: x0 = x[0];
382: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
383: sum2 += x0*yy2[0];
384: case 0:
385: x += j_rem;
386: yy0 += j_rem;
387: yy1 += j_rem;
388: yy2 += j_rem;
389: j -= j_rem;
390: break;
391: }
392: while (j>0) {
393: x0 = x[0];
394: x1 = x[1];
395: x2 = x[2];
396: x3 = x[3];
397: x += 4;
399: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
400: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
401: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
402: j -= 4;
403: }
404: z[0] = sum0;
405: z[1] = sum1;
406: z[2] = sum2;
407: VecRestoreArrayRead(yy[0],&yy0);
408: VecRestoreArrayRead(yy[1],&yy1);
409: VecRestoreArrayRead(yy[2],&yy2);
410: break;
411: case 2:
412: VecGetArrayRead(yy[0],&yy0);
413: VecGetArrayRead(yy[1],&yy1);
414: switch (j_rem=j&0x3) {
415: case 3:
416: x2 = x[2];
417: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
418: case 2:
419: x1 = x[1];
420: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
421: case 1:
422: x0 = x[0];
423: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
424: case 0:
425: x += j_rem;
426: yy0 += j_rem;
427: yy1 += j_rem;
428: j -= j_rem;
429: break;
430: }
431: while (j>0) {
432: x0 = x[0];
433: x1 = x[1];
434: x2 = x[2];
435: x3 = x[3];
436: x += 4;
438: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
439: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
440: j -= 4;
441: }
442: z[0] = sum0;
443: z[1] = sum1;
445: VecRestoreArrayRead(yy[0],&yy0);
446: VecRestoreArrayRead(yy[1],&yy1);
447: break;
448: case 1:
449: VecGetArrayRead(yy[0],&yy0);
450: switch (j_rem=j&0x3) {
451: case 3:
452: x2 = x[2]; sum0 += x2*yy0[2];
453: case 2:
454: x1 = x[1]; sum0 += x1*yy0[1];
455: case 1:
456: x0 = x[0]; sum0 += x0*yy0[0];
457: case 0:
458: x += j_rem;
459: yy0 += j_rem;
460: j -= j_rem;
461: break;
462: }
463: while (j>0) {
464: sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
465: j -= 4; x+=4;
466: }
467: z[0] = sum0;
469: VecRestoreArrayRead(yy[0],&yy0);
470: break;
471: case 0:
472: break;
473: }
474: z += nv_rem;
475: i -= nv_rem;
476: yy += nv_rem;
478: while (i >0) {
479: sum0 = 0.;
480: sum1 = 0.;
481: sum2 = 0.;
482: sum3 = 0.;
483: VecGetArrayRead(yy[0],&yy0);
484: VecGetArrayRead(yy[1],&yy1);
485: VecGetArrayRead(yy[2],&yy2);
486: VecGetArrayRead(yy[3],&yy3);
488: j = n;
489: switch (j_rem=j&0x3) {
490: case 3:
491: x2 = x[2];
492: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
493: sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
494: case 2:
495: x1 = x[1];
496: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
497: sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
498: case 1:
499: x0 = x[0];
500: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
501: sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
502: case 0:
503: x += j_rem;
504: yy0 += j_rem;
505: yy1 += j_rem;
506: yy2 += j_rem;
507: yy3 += j_rem;
508: j -= j_rem;
509: break;
510: }
511: while (j>0) {
512: x0 = x[0];
513: x1 = x[1];
514: x2 = x[2];
515: x3 = x[3];
516: x += 4;
518: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
519: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
520: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
521: sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
522: j -= 4;
523: }
524: z[0] = sum0;
525: z[1] = sum1;
526: z[2] = sum2;
527: z[3] = sum3;
528: z += 4;
529: i -= 4;
530: VecRestoreArrayRead(yy[0],&yy0);
531: VecRestoreArrayRead(yy[1],&yy1);
532: VecRestoreArrayRead(yy[2],&yy2);
533: VecRestoreArrayRead(yy[3],&yy3);
534: yy += 4;
535: }
536: VecRestoreArrayRead(xin,&x);
537: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
538: return(0);
539: }
541: #if defined(PETSC_THREADCOMM_ACTIVE)
542: PetscErrorCode VecMax_kernel(PetscInt thread_id,Vec xin,PetscThreadCommReduction red)
543: {
544: PetscErrorCode ierr;
545: PetscInt *trstarts=xin->map->trstarts;
546: PetscInt start,end,i;
547: const PetscScalar *xx;
548: PetscReal lred[2],tmp;
550: start = trstarts[thread_id];
551: end = trstarts[thread_id+1];
552: VecGetArrayRead(xin,&xx);
553: lred[0] = xx[start]; lred[1] = start;
554: for (i=start+1; i<end; i++) {
555: if ((tmp = PetscRealPart(xx[i])) > lred[0]) { lred[0] = tmp; lred[1] = i;}
556: }
557: VecRestoreArrayRead(xin,&xx);
558: PetscThreadReductionKernelPost(thread_id,red,lred);
559: return 0;
560: }
564: PetscErrorCode VecMax_Seq(Vec xin,PetscInt *idx,PetscReal *z)
565: {
566: PetscErrorCode ierr;
567: PetscInt n=xin->map->n;
568: PetscThreadCommReduction red;
569: PetscReal out[2];
572: if (!n) {
573: *z = PETSC_MIN_REAL;
574: *idx = -1;
575: } else {
576: PetscThreadReductionBegin(PetscObjectComm((PetscObject)xin),THREADCOMM_MAXLOC,PETSC_REAL,1,&red);
577: PetscThreadCommRunKernel2(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecMax_kernel,xin,red);
578: PetscThreadReductionEnd(red,out);
579: *z = out[0];
580: if (idx) *idx = (PetscInt)out[1];
581: }
582: return(0);
583: }
584: #else
587: PetscErrorCode VecMax_Seq(Vec xin,PetscInt *idx,PetscReal *z)
588: {
589: PetscInt i,j=0,n = xin->map->n;
590: PetscReal max,tmp;
591: const PetscScalar *xx;
592: PetscErrorCode ierr;
595: VecGetArrayRead(xin,&xx);
596: if (!n) {
597: max = PETSC_MIN_REAL;
598: j = -1;
599: } else {
600: max = PetscRealPart(*xx++); j = 0;
601: for (i=1; i<n; i++) {
602: if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
603: }
604: }
605: *z = max;
606: if (idx) *idx = j;
607: VecRestoreArrayRead(xin,&xx);
608: return(0);
609: }
610: #endif
612: #if defined(PETSC_THREADCOMM_ACTIVE)
613: PetscErrorCode VecMin_kernel(PetscInt thread_id,Vec xin,PetscThreadCommReduction red)
614: {
615: PetscErrorCode ierr;
616: PetscInt *trstarts=xin->map->trstarts;
617: PetscInt start,end,i;
618: const PetscScalar *xx;
619: PetscReal lred[2],tmp;
621: start = trstarts[thread_id];
622: end = trstarts[thread_id+1];
623: VecGetArrayRead(xin,&xx);
624: lred[0] = xx[start]; lred[1] = start;
625: for (i=start+1;i<end;i++) {
626: if ((tmp = PetscRealPart(xx[i])) < lred[0]) { lred[0] = tmp; lred[1] = i;}
627: }
628: VecRestoreArrayRead(xin,&xx);
629: PetscThreadReductionKernelPost(thread_id,red,lred);
630: return 0;
631: }
635: PetscErrorCode VecMin_Seq(Vec xin,PetscInt *idx,PetscReal *z)
636: {
637: PetscErrorCode ierr;
638: PetscInt n=xin->map->n;
639: PetscThreadCommReduction red;
640: PetscReal out[2];
643: if (!n) {
644: *z = PETSC_MAX_REAL;
645: *idx = -1;
646: } else {
647: PetscThreadReductionBegin(PetscObjectComm((PetscObject)xin),THREADCOMM_MINLOC,PETSC_REAL,1,&red);
648: PetscThreadCommRunKernel2(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecMin_kernel,xin,red);
649: PetscThreadReductionEnd(red,out);
650: *z = out[0];
651: if (idx) *idx = (PetscInt)out[1];
652: }
653: return(0);
654: }
655: #else
658: PetscErrorCode VecMin_Seq(Vec xin,PetscInt *idx,PetscReal *z)
659: {
660: PetscInt i,j=0,n = xin->map->n;
661: PetscReal min,tmp;
662: const PetscScalar *xx;
663: PetscErrorCode ierr;
666: VecGetArrayRead(xin,&xx);
667: if (!n) {
668: min = PETSC_MAX_REAL;
669: j = -1;
670: } else {
671: min = PetscRealPart(*xx++); j = 0;
672: for (i=1; i<n; i++) {
673: if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
674: }
675: }
676: *z = min;
677: if (idx) *idx = j;
678: VecGetArrayRead(xin,&xx);
679: return(0);
680: }
681: #endif
683: #if defined(PETSC_THREADCOMM_ACTIVE)
684: PetscErrorCode VecSet_kernel(PetscInt thread_id,Vec xin,PetscScalar *alpha_p)
685: {
687: PetscScalar *xx;
688: PetscInt i,start,end;
689: PetscInt *trstarts=xin->map->trstarts;
690: PetscScalar alpha = *alpha_p;
692: start = trstarts[thread_id];
693: end = trstarts[thread_id+1];
694: VecGetArray(xin,&xx);
695: if (alpha == (PetscScalar)0.0) {
696: PetscMemzero(xx+start,(end-start)*sizeof(PetscScalar));
697: } else {
698: for (i=start; i<end; i++) xx[i] = alpha;
699: }
700: VecRestoreArray(xin,&xx);
701: return 0;
702: }
706: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
707: {
709: PetscScalar *scalar;
712: PetscThreadCommGetScalars(PetscObjectComm((PetscObject)xin),&scalar,NULL,NULL);
713: *scalar = alpha;
714: PetscThreadCommRunKernel2(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecSet_kernel,xin,scalar);
715: return(0);
716: }
717: #else
720: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
721: {
722: PetscInt i,n = xin->map->n;
723: PetscScalar *xx;
727: VecGetArray(xin,&xx);
728: if (alpha == (PetscScalar)0.0) {
729: PetscMemzero(xx,n*sizeof(PetscScalar));
730: } else {
731: for (i=0; i<n; i++) xx[i] = alpha;
732: }
733: VecRestoreArray(xin,&xx);
734: return(0);
735: }
736: #endif
738: #if defined(PETSC_THREADCOMM_ACTIVE)
739: PetscErrorCode VecMAXPY_kernel(PetscInt thread_id,Vec xin,PetscInt *nv_p,const PetscScalar *alpha,Vec *y)
740: {
741: PetscErrorCode ierr;
742: PetscInt *trstarts=xin->map->trstarts,j,j_rem,nv=*nv_p;
743: PetscInt start,end,n;
744: const PetscScalar *yy0,*yy1,*yy2,*yy3;
745: PetscScalar *xx,*x,alpha0,alpha1,alpha2,alpha3;
746: const PetscScalar *y0,*y1,*y2,*y3;
749: start = trstarts[thread_id];
750: end = trstarts[thread_id+1];
751: n = end-start;
752: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
753: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
754: #endif
756: VecGetArray(xin,&xx);
757: x = xx+start;
758: switch (j_rem=nv&0x3) {
759: case 3:
760: VecGetArrayRead(y[0],&yy0);
761: VecGetArrayRead(y[1],&yy1);
762: VecGetArrayRead(y[2],&yy2);
763: alpha0 = alpha[0];
764: alpha1 = alpha[1];
765: alpha2 = alpha[2];
766: alpha += 3;
767: y0 = yy0 + start; y1 = yy1+start; y2 = yy2+start;
768: PetscKernelAXPY3(x,alpha0,alpha1,alpha2,y0,y1,y2,n);
769: VecRestoreArrayRead(y[0],&yy0);
770: VecRestoreArrayRead(y[1],&yy1);
771: VecRestoreArrayRead(y[2],&yy2);
772: y += 3;
773: break;
774: case 2:
775: VecGetArrayRead(y[0],&yy0);
776: VecGetArrayRead(y[1],&yy1);
777: alpha0 = alpha[0];
778: alpha1 = alpha[1];
779: alpha +=2;
780: y0 = yy0+start; y1=yy1+start;
781: PetscKernelAXPY2(x,alpha0,alpha1,y0,y1,n);
782: VecRestoreArrayRead(y[0],&yy0);
783: VecRestoreArrayRead(y[1],&yy1);
784: y +=2;
785: break;
786: case 1:
787: VecGetArrayRead(y[0],&yy0);
788: alpha0 = *alpha++;
789: y0 = yy0 + start;
790: PetscKernelAXPY(x,alpha0,y0,n);
791: VecRestoreArrayRead(y[0],&yy0);
792: y +=1;
793: break;
794: }
795: for (j=j_rem; j<nv; j+=4) {
796: VecGetArrayRead(y[0],&yy0);
797: VecGetArrayRead(y[1],&yy1);
798: VecGetArrayRead(y[2],&yy2);
799: VecGetArrayRead(y[3],&yy3);
800: alpha0 = alpha[0];
801: alpha1 = alpha[1];
802: alpha2 = alpha[2];
803: alpha3 = alpha[3];
804: alpha += 4;
806: y0 = yy0+start; y1=yy1+start; y2=yy2+start; y3=yy3+start;
807: PetscKernelAXPY4(x,alpha0,alpha1,alpha2,alpha3,y0,y1,y2,y3,n);
808: VecRestoreArrayRead(y[0],&yy0);
809: VecRestoreArrayRead(y[1],&yy1);
810: VecRestoreArrayRead(y[2],&yy2);
811: VecRestoreArrayRead(y[3],&yy3);
812: y += 4;
813: }
814: VecRestoreArray(xin,&xx);
815: return 0;
816: }
820: PetscErrorCode VecMAXPY_Seq(Vec xin,PetscInt nv,const PetscScalar *alpha,Vec *y)
821: {
823: PetscInt *int1;
826: PetscLogFlops(nv*2.0*xin->map->n);
827: PetscThreadCommGetInts(PetscObjectComm((PetscObject)xin),&int1,NULL,NULL);
828: *int1 = nv;
829: PetscThreadCommRunKernel4(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecMAXPY_kernel,xin,int1,(void*)alpha,y);
830: return(0);
831: }
832: #else
835: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
836: {
837: PetscErrorCode ierr;
838: PetscInt n = xin->map->n,j,j_rem;
839: const PetscScalar *yy0,*yy1,*yy2,*yy3;
840: PetscScalar *xx,alpha0,alpha1,alpha2,alpha3;
842: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
843: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
844: #endif
847: PetscLogFlops(nv*2.0*n);
848: VecGetArray(xin,&xx);
849: switch (j_rem=nv&0x3) {
850: case 3:
851: VecGetArrayRead(y[0],&yy0);
852: VecGetArrayRead(y[1],&yy1);
853: VecGetArrayRead(y[2],&yy2);
854: alpha0 = alpha[0];
855: alpha1 = alpha[1];
856: alpha2 = alpha[2];
857: alpha += 3;
858: PetscKernelAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
859: VecRestoreArrayRead(y[0],&yy0);
860: VecRestoreArrayRead(y[1],&yy1);
861: VecRestoreArrayRead(y[2],&yy2);
862: y += 3;
863: break;
864: case 2:
865: VecGetArrayRead(y[0],&yy0);
866: VecGetArrayRead(y[1],&yy1);
867: alpha0 = alpha[0];
868: alpha1 = alpha[1];
869: alpha +=2;
870: PetscKernelAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
871: VecRestoreArrayRead(y[0],&yy0);
872: VecRestoreArrayRead(y[1],&yy1);
873: y +=2;
874: break;
875: case 1:
876: VecGetArrayRead(y[0],&yy0);
877: alpha0 = *alpha++;
878: PetscKernelAXPY(xx,alpha0,yy0,n);
879: VecRestoreArrayRead(y[0],&yy0);
880: y +=1;
881: break;
882: }
883: for (j=j_rem; j<nv; j+=4) {
884: VecGetArrayRead(y[0],&yy0);
885: VecGetArrayRead(y[1],&yy1);
886: VecGetArrayRead(y[2],&yy2);
887: VecGetArrayRead(y[3],&yy3);
888: alpha0 = alpha[0];
889: alpha1 = alpha[1];
890: alpha2 = alpha[2];
891: alpha3 = alpha[3];
892: alpha += 4;
894: PetscKernelAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
895: VecRestoreArrayRead(y[0],&yy0);
896: VecRestoreArrayRead(y[1],&yy1);
897: VecRestoreArrayRead(y[2],&yy2);
898: VecRestoreArrayRead(y[3],&yy3);
899: y += 4;
900: }
901: VecRestoreArray(xin,&xx);
902: return(0);
903: }
904: #endif
906: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>
907: #if defined(PETSC_THREADCOMM_ACTIVE)
908: PetscErrorCode VecAYPX_kernel(PetscInt thread_id,Vec yin,PetscScalar *alpha_p,Vec xin)
909: {
910: PetscErrorCode ierr;
911: PetscScalar *yy;
912: const PetscScalar *xx;
913: PetscInt *trstarts=yin->map->trstarts,i;
914: PetscScalar alpha = *alpha_p;
916: VecGetArrayRead(xin,&xx);
917: VecGetArray(yin,&yy);
919: if (alpha == (PetscScalar)-1.0) {
920: for (i=trstarts[thread_id]; i < trstarts[thread_id+1]; i++) {
921: yy[i] = xx[i] - yy[i];
922: }
923: } else {
924: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
925: PetscInt start=trstarts[thread_id],end=trstarts[thread_id+1],n;
926: PetscScalar oalpha = alpha;
927: n = end - start;
928: fortranaypx_(&n,&oalpha,xx+start,yy+start);
929: }
930: #else
931: for (i=trstarts[thread_id]; i < trstarts[thread_id+1];i++) {
932: yy[i] = xx[i] + alpha*yy[i];
933: }
934: }
935: #endif
936: VecRestoreArrayRead(xin,&xx);
937: VecRestoreArray(yin,&yy);
938: return 0;
939: }
942: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
943: {
944: PetscErrorCode ierr;
945: PetscInt n=yin->map->n;
948: if (alpha == (PetscScalar)0.0) {
949: VecCopy(xin,yin);
950: } else if (alpha == (PetscScalar)1.0) {
951: VecAXPY_Seq(yin,alpha,xin);
952: } else {
953: PetscScalar *scal1;
954: PetscThreadCommGetScalars(PetscObjectComm((PetscObject)yin),&scal1,NULL,NULL);
955: *scal1 = alpha;
956: PetscThreadCommRunKernel3(PetscObjectComm((PetscObject)yin),(PetscThreadKernel)VecAYPX_kernel,yin,scal1,xin);
957: if (alpha == (PetscScalar)-1.0) {
958: PetscLogFlops(1.0*n);
959: } else {
960: PetscLogFlops(2.0*n);
961: }
962: }
963: return(0);
964: }
965: #else
968: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
969: {
970: PetscErrorCode ierr;
971: PetscInt n = yin->map->n;
972: PetscScalar *yy;
973: const PetscScalar *xx;
976: if (alpha == (PetscScalar)0.0) {
977: VecCopy(xin,yin);
978: } else if (alpha == (PetscScalar)1.0) {
979: VecAXPY_Seq(yin,alpha,xin);
980: } else if (alpha == (PetscScalar)-1.0) {
981: PetscInt i;
982: VecGetArrayRead(xin,&xx);
983: VecGetArray(yin,&yy);
985: for (i=0; i<n; i++) yy[i] = xx[i] - yy[i];
987: VecRestoreArrayRead(xin,&xx);
988: VecRestoreArray(yin,&yy);
989: PetscLogFlops(1.0*n);
990: } else {
991: VecGetArrayRead(xin,&xx);
992: VecGetArray(yin,&yy);
993: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
994: {
995: PetscScalar oalpha = alpha;
996: fortranaypx_(&n,&oalpha,xx,yy);
997: }
998: #else
999: {
1000: PetscInt i;
1002: for (i=0; i<n; i++) yy[i] = xx[i] + alpha*yy[i];
1003: }
1004: #endif
1005: VecRestoreArrayRead(xin,&xx);
1006: VecRestoreArray(yin,&yy);
1007: PetscLogFlops(2.0*n);
1008: }
1009: return(0);
1010: }
1011: #endif
1013: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
1014: /*
1015: IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
1016: to be slower than a regular C loop. Hence,we do not include it.
1017: void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
1018: */
1019: #if defined(PETSC_THREADCOMM_ACTIVE)
1020: PetscErrorCode VecWAXPY_kernel(PetscInt thread_id,Vec win,PetscScalar *alpha_p,Vec xin,Vec yin)
1021: {
1022: PetscErrorCode ierr;
1023: PetscScalar alpha=*alpha_p;
1024: PetscInt *trstarts=win->map->trstarts,start,end,i;
1025: const PetscScalar *xx,*yy;
1026: PetscScalar *ww;
1028: start = trstarts[thread_id];
1029: end = trstarts[thread_id+1];
1031: VecGetArrayRead(xin,&xx);
1032: VecGetArrayRead(yin,&yy);
1033: VecGetArray(win,&ww);
1034: if (alpha == (PetscScalar)1.0) {
1035: for (i=start; i < end; i++) ww[i] = yy[i] + xx[i];
1036: } else if (alpha == (PetscScalar)-1.0) {
1037: for (i=start; i < end; i++) ww[i] = yy[i] - xx[i];
1038: } else if (alpha == (PetscScalar)0.0) {
1039: PetscMemcpy(ww+start,yy+start,(end-start)*sizeof(PetscScalar));
1040: } else {
1041: PetscScalar oalpha = alpha;
1042: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
1043: fortranwaxpy_(&n,&oalpha,xx+start,yy+start,ww+start);
1044: #else
1045: for (i=start; i < end; i++) ww[i] = yy[i] + oalpha*xx[i];
1046: #endif
1047: }
1048: VecRestoreArrayRead(xin,&xx);
1049: VecRestoreArrayRead(yin,&yy);
1050: VecRestoreArray(win,&ww);
1051: return 0;
1052: }
1056: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
1057: {
1058: PetscErrorCode ierr;
1059: PetscInt n = win->map->n;
1060: PetscScalar *scal1;
1063: PetscThreadCommGetScalars(PetscObjectComm((PetscObject)win),&scal1,NULL,NULL);
1064: *scal1 = alpha;
1065: PetscThreadCommRunKernel4(PetscObjectComm((PetscObject)win),(PetscThreadKernel)VecWAXPY_kernel,win,scal1,xin,yin);
1066: if (alpha == (PetscScalar)1.0) {
1067: PetscLogFlops(n);
1068: } else if (alpha == (PetscScalar)-1.0) {
1069: PetscLogFlops(n);
1070: } else {
1071: PetscLogFlops(2.0*n);
1072: }
1073: return(0);
1074: }
1075: #else
1078: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
1079: {
1080: PetscErrorCode ierr;
1081: PetscInt i,n = win->map->n;
1082: PetscScalar *ww;
1083: const PetscScalar *yy,*xx;
1086: VecGetArrayRead(xin,&xx);
1087: VecGetArrayRead(yin,&yy);
1088: VecGetArray(win,&ww);
1089: if (alpha == (PetscScalar)1.0) {
1090: PetscLogFlops(n);
1091: /* could call BLAS axpy after call to memcopy, but may be slower */
1092: for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
1093: } else if (alpha == (PetscScalar)-1.0) {
1094: PetscLogFlops(n);
1095: for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
1096: } else if (alpha == (PetscScalar)0.0) {
1097: PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
1098: } else {
1099: PetscScalar oalpha = alpha;
1100: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
1101: fortranwaxpy_(&n,&oalpha,xx,yy,ww);
1102: #else
1103: for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
1104: #endif
1105: PetscLogFlops(2.0*n);
1106: }
1107: VecRestoreArrayRead(xin,&xx);
1108: VecRestoreArrayRead(yin,&yy);
1109: VecRestoreArray(win,&ww);
1110: return(0);
1111: }
1112: #endif
1116: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
1117: {
1118: PetscErrorCode ierr;
1119: PetscInt n = xin->map->n,i;
1120: const PetscScalar *xx,*yy;
1121: PetscReal m = 0.0;
1124: VecGetArrayRead(xin,&xx);
1125: VecGetArrayRead(yin,&yy);
1126: for (i = 0; i < n; i++) {
1127: if (yy[i] != (PetscScalar)0.0) {
1128: m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
1129: } else {
1130: m = PetscMax(PetscAbsScalar(xx[i]), m);
1131: }
1132: }
1133: VecRestoreArrayRead(xin,&xx);
1134: VecRestoreArrayRead(yin,&yy);
1135: MPI_Allreduce(&m,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
1136: PetscLogFlops(n);
1137: return(0);
1138: }
1142: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
1143: {
1144: Vec_Seq *v = (Vec_Seq*)vin->data;
1147: if (v->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
1148: v->unplacedarray = v->array; /* save previous array so reset can bring it back */
1149: v->array = (PetscScalar*)a;
1150: return(0);
1151: }
1155: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
1156: {
1157: Vec_Seq *v = (Vec_Seq*)vin->data;
1161: PetscFree(v->array_allocated);
1162: v->array_allocated = v->array = (PetscScalar*)a;
1163: return(0);
1164: }