Actual source code: dvec2.c

  1: #define PETSCVEC_DLL
  2: /* 
  3:    Defines some vector operation functions that are shared by 
  4:   sequential and parallel vectors.
  5: */
 6:  #include src/vec/vec/impls/dvecimpl.h
 7:  #include src/inline/dot.h
 8:  #include src/inline/setval.h
 9:  #include src/inline/axpy.h

 11: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
 14: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 15: {
 16:   Vec_Seq        *xv = (Vec_Seq *)xin->data;
 18:   PetscInt       i,nv_rem,n = xin->map.n;
 19:   PetscScalar    sum0,sum1,sum2,sum3,*yy0,*yy1,*yy2,*yy3,*x;
 20:   Vec            *yy;

 23:   sum0 = 0;
 24:   sum1 = 0;
 25:   sum2 = 0;

 27:   i      = nv;
 28:   nv_rem = nv&0x3;
 29:   yy     = (Vec*)yin;
 30:   x      = xv->array;

 32:   switch (nv_rem) {
 33:   case 3:
 34:     VecGetArray(yy[0],&yy0);
 35:     VecGetArray(yy[1],&yy1);
 36:     VecGetArray(yy[2],&yy2);
 37:     fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
 38:     VecRestoreArray(yy[0],&yy0);
 39:     VecRestoreArray(yy[1],&yy1);
 40:     VecRestoreArray(yy[2],&yy2);
 41:     z[0] = sum0;
 42:     z[1] = sum1;
 43:     z[2] = sum2;
 44:     break;
 45:   case 2:
 46:     VecGetArray(yy[0],&yy0);
 47:     VecGetArray(yy[1],&yy1);
 48:     fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
 49:     VecRestoreArray(yy[0],&yy0);
 50:     VecRestoreArray(yy[1],&yy1);
 51:     z[0] = sum0;
 52:     z[1] = sum1;
 53:     break;
 54:   case 1:
 55:     VecGetArray(yy[0],&yy0);
 56:     fortranmdot1_(x,yy0,&n,&sum0);
 57:     VecRestoreArray(yy[0],&yy0);
 58:     z[0] = sum0;
 59:     break;
 60:   case 0:
 61:     break;
 62:   }
 63:   z  += nv_rem;
 64:   i  -= nv_rem;
 65:   yy += nv_rem;

 67:   while (i >0) {
 68:     sum0 = 0;
 69:     sum1 = 0;
 70:     sum2 = 0;
 71:     sum3 = 0;
 72:     VecGetArray(yy[0],&yy0);
 73:     VecGetArray(yy[1],&yy1);
 74:     VecGetArray(yy[2],&yy2);
 75:     VecGetArray(yy[3],&yy3);
 76:     fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
 77:     VecRestoreArray(yy[0],&yy0);
 78:     VecRestoreArray(yy[1],&yy1);
 79:     VecRestoreArray(yy[2],&yy2);
 80:     VecRestoreArray(yy[3],&yy3);
 81:     yy  += 4;
 82:     z[0] = sum0;
 83:     z[1] = sum1;
 84:     z[2] = sum2;
 85:     z[3] = sum3;
 86:     z   += 4;
 87:     i   -= 4;
 88:   }
 89:   if (xin->map.n > 0) {
 90:     PetscLogFlops(nv*(2*xin->map.n-1));
 91:   }
 92:   return(0);
 93: }

 95: #else
 98: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 99: {
100:   Vec_Seq        *xv = (Vec_Seq *)xin->data;
102:   PetscInt       n = xin->map.n,i,j,nv_rem,j_rem;
103:   PetscScalar    sum0,sum1,sum2,sum3,x0,x1,x2,x3,* PETSC_RESTRICT x;
104:   PetscScalar    * PETSC_RESTRICT yy0,* PETSC_RESTRICT yy1,* PETSC_RESTRICT yy2,*PETSC_RESTRICT yy3;
105:   Vec            *yy;

108:   sum0 = 0;
109:   sum1 = 0;
110:   sum2 = 0;

112:   i      = nv;
113:   nv_rem = nv&0x3;
114:   yy     = (Vec *)yin;
115:   j      = n;
116:   x      = xv->array;

118:   switch (nv_rem) {
119:   case 3:
120:     VecGetArray(yy[0],(PetscScalar **)&yy0);
121:     VecGetArray(yy[1],(PetscScalar **)&yy1);
122:     VecGetArray(yy[2],(PetscScalar **)&yy2);
123:     switch (j_rem=j&0x3) {
124:     case 3:
125:       x2 = x[2];
126:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
127:       sum2 += x2*PetscConj(yy2[2]);
128:     case 2:
129:       x1 = x[1];
130:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
131:       sum2 += x1*PetscConj(yy2[1]);
132:     case 1:
133:       x0 = x[0];
134:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
135:       sum2 += x0*PetscConj(yy2[0]);
136:     case 0:
137:       x   += j_rem;
138:       yy0 += j_rem;
139:       yy1 += j_rem;
140:       yy2 += j_rem;
141:       j   -= j_rem;
142:       break;
143:     }
144:     while (j>0) {
145:       x0 = x[0];
146:       x1 = x[1];
147:       x2 = x[2];
148:       x3 = x[3];
149:       x += 4;
150: 
151:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
152:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
153:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
154:       j -= 4;
155:     }
156:     z[0] = sum0;
157:     z[1] = sum1;
158:     z[2] = sum2;
159:     VecRestoreArray(yy[0],(PetscScalar **)&yy0);
160:     VecRestoreArray(yy[1],(PetscScalar **)&yy1);
161:     VecRestoreArray(yy[2],(PetscScalar **)&yy2);
162:     break;
163:   case 2:
164:     VecGetArray(yy[0],(PetscScalar **)&yy0);
165:     VecGetArray(yy[1],(PetscScalar **)&yy1);
166:     switch (j_rem=j&0x3) {
167:     case 3:
168:       x2 = x[2];
169:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
170:     case 2:
171:       x1 = x[1];
172:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
173:     case 1:
174:       x0 = x[0];
175:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
176:     case 0:
177:       x   += j_rem;
178:       yy0 += j_rem;
179:       yy1 += j_rem;
180:       j   -= j_rem;
181:       break;
182:     }
183:     while (j>0) {
184:       x0 = x[0];
185:       x1 = x[1];
186:       x2 = x[2];
187:       x3 = x[3];
188:       x += 4;
189: 
190:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
191:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
192:       j -= 4;
193:     }
194:     z[0] = sum0;
195:     z[1] = sum1;
196: 
197:     VecRestoreArray(yy[0],(PetscScalar **)&yy0);
198:     VecRestoreArray(yy[1],(PetscScalar **)&yy1);
199:     break;
200:   case 1:
201:     VecGetArray(yy[0],(PetscScalar **)&yy0);
202:     switch (j_rem=j&0x3) {
203:     case 3:
204:       x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
205:     case 2:
206:       x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
207:     case 1:
208:       x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
209:     case 0:
210:       x   += j_rem;
211:       yy0 += j_rem;
212:       j   -= j_rem;
213:       break;
214:     }
215:     while (j>0) {
216:       sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
217:             + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
218:       yy0+=4;
219:       j -= 4; x+=4;
220:     }
221:     z[0] = sum0;

223:     VecRestoreArray(yy[0],(PetscScalar **)&yy0);
224:     break;
225:   case 0:
226:     break;
227:   }
228:   z  += nv_rem;
229:   i  -= nv_rem;
230:   yy += nv_rem;

232:   while (i >0) {
233:     sum0 = 0;
234:     sum1 = 0;
235:     sum2 = 0;
236:     sum3 = 0;
237:     VecGetArray(yy[0],(PetscScalar **)&yy0);
238:     VecGetArray(yy[1],(PetscScalar **)&yy1);
239:     VecGetArray(yy[2],(PetscScalar **)&yy2);
240:     VecGetArray(yy[3],(PetscScalar **)&yy3);

242:     j = n;
243:     x = xv->array;
244:     switch (j_rem=j&0x3) {
245:     case 3:
246:       x2 = x[2];
247:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
248:       sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
249:     case 2:
250:       x1 = x[1];
251:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
252:       sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
253:     case 1:
254:       x0 = x[0];
255:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
256:       sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
257:     case 0:
258:       x   += j_rem;
259:       yy0 += j_rem;
260:       yy1 += j_rem;
261:       yy2 += j_rem;
262:       yy3 += j_rem;
263:       j   -= j_rem;
264:       break;
265:     }
266:     while (j>0) {
267:       x0 = x[0];
268:       x1 = x[1];
269:       x2 = x[2];
270:       x3 = x[3];
271:       x += 4;
272: 
273:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
274:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
275:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
276:       sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
277:       j -= 4;
278:     }
279:     z[0] = sum0;
280:     z[1] = sum1;
281:     z[2] = sum2;
282:     z[3] = sum3;
283:     z   += 4;
284:     i   -= 4;
285:     VecRestoreArray(yy[0],(PetscScalar **)&yy0);
286:     VecRestoreArray(yy[1],(PetscScalar **)&yy1);
287:     VecRestoreArray(yy[2],(PetscScalar **)&yy2);
288:     VecRestoreArray(yy[3],(PetscScalar **)&yy3);
289:     yy  += 4;
290:   }
291:   if (xin->map.n > 0) {
292:     PetscLogFlops(nv*(2*xin->map.n-1));
293:   }
294:   return(0);
295: }
296: #endif

298: /* ----------------------------------------------------------------------------*/
301: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
302: {
303:   Vec_Seq        *xv = (Vec_Seq *)xin->data;
305:   PetscInt       n = xin->map.n,i,j,nv_rem,j_rem;
306:   PetscScalar    sum0,sum1,sum2,sum3,*yy0,*yy1,*yy2,*yy3,x0,x1,x2,x3,*x;
307:   Vec            *yy;
308: 

311:   sum0 = 0;
312:   sum1 = 0;
313:   sum2 = 0;

315:   i      = nv;
316:   nv_rem = nv&0x3;
317:   yy     = (Vec*)yin;
318:   j    = n;
319:   x    = xv->array;

321:   switch (nv_rem) {
322:   case 3:
323:     VecGetArray(yy[0],&yy0);
324:     VecGetArray(yy[1],&yy1);
325:     VecGetArray(yy[2],&yy2);
326:     switch (j_rem=j&0x3) {
327:     case 3:
328:       x2 = x[2];
329:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
330:       sum2 += x2*yy2[2];
331:     case 2:
332:       x1 = x[1];
333:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
334:       sum2 += x1*yy2[1];
335:     case 1:
336:       x0 = x[0];
337:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
338:       sum2 += x0*yy2[0];
339:     case 0:
340:       x  += j_rem;
341:       yy0 += j_rem;
342:       yy1 += j_rem;
343:       yy2 += j_rem;
344:       j  -= j_rem;
345:       break;
346:     }
347:     while (j>0) {
348:       x0 = x[0];
349:       x1 = x[1];
350:       x2 = x[2];
351:       x3 = x[3];
352:       x += 4;
353: 
354:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
355:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
356:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
357:       j -= 4;
358:     }
359:     z[0] = sum0;
360:     z[1] = sum1;
361:     z[2] = sum2;
362:     VecRestoreArray(yy[0],&yy0);
363:     VecRestoreArray(yy[1],&yy1);
364:     VecRestoreArray(yy[2],&yy2);
365:     break;
366:   case 2:
367:     VecGetArray(yy[0],&yy0);
368:     VecGetArray(yy[1],&yy1);
369:     switch (j_rem=j&0x3) {
370:     case 3:
371:       x2 = x[2];
372:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
373:     case 2:
374:       x1 = x[1];
375:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
376:     case 1:
377:       x0 = x[0];
378:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
379:     case 0:
380:       x  += j_rem;
381:       yy0 += j_rem;
382:       yy1 += j_rem;
383:       j  -= j_rem;
384:       break;
385:     }
386:     while (j>0) {
387:       x0 = x[0];
388:       x1 = x[1];
389:       x2 = x[2];
390:       x3 = x[3];
391:       x += 4;
392: 
393:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
394:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
395:       j -= 4;
396:     }
397:     z[0] = sum0;
398:     z[1] = sum1;
399: 
400:     VecRestoreArray(yy[0],&yy0);
401:     VecRestoreArray(yy[1],&yy1);
402:     break;
403:   case 1:
404:     VecGetArray(yy[0],&yy0);
405:     switch (j_rem=j&0x3) {
406:     case 3:
407:       x2 = x[2]; sum0 += x2*yy0[2];
408:     case 2:
409:       x1 = x[1]; sum0 += x1*yy0[1];
410:     case 1:
411:       x0 = x[0]; sum0 += x0*yy0[0];
412:     case 0:
413:       x  += j_rem;
414:       yy0 += j_rem;
415:       j  -= j_rem;
416:       break;
417:     }
418:     while (j>0) {
419:       sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
420:       j -= 4; x+=4;
421:     }
422:     z[0] = sum0;

424:     VecRestoreArray(yy[0],&yy0);
425:     break;
426:   case 0:
427:     break;
428:   }
429:   z  += nv_rem;
430:   i  -= nv_rem;
431:   yy += nv_rem;

433:   while (i >0) {
434:     sum0 = 0;
435:     sum1 = 0;
436:     sum2 = 0;
437:     sum3 = 0;
438:     VecGetArray(yy[0],&yy0);
439:     VecGetArray(yy[1],&yy1);
440:     VecGetArray(yy[2],&yy2);
441:     VecGetArray(yy[3],&yy3);

443:     j = n;
444:     x = xv->array;
445:     switch (j_rem=j&0x3) {
446:     case 3:
447:       x2 = x[2];
448:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
449:       sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
450:     case 2:
451:       x1 = x[1];
452:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
453:       sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
454:     case 1:
455:       x0 = x[0];
456:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
457:       sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
458:     case 0:
459:       x  += j_rem;
460:       yy0 += j_rem;
461:       yy1 += j_rem;
462:       yy2 += j_rem;
463:       yy3 += j_rem;
464:       j  -= j_rem;
465:       break;
466:     }
467:     while (j>0) {
468:       x0 = x[0];
469:       x1 = x[1];
470:       x2 = x[2];
471:       x3 = x[3];
472:       x += 4;
473: 
474:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
475:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
476:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
477:       sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
478:       j -= 4;
479:     }
480:     z[0] = sum0;
481:     z[1] = sum1;
482:     z[2] = sum2;
483:     z[3] = sum3;
484:     z   += 4;
485:     i   -= 4;
486:     VecRestoreArray(yy[0],&yy0);
487:     VecRestoreArray(yy[1],&yy1);
488:     VecRestoreArray(yy[2],&yy2);
489:     VecRestoreArray(yy[3],&yy3);
490:     yy  += 4;
491:   }
492:   if (xin->map.n > 0) {
493:     PetscLogFlops(nv*(2*xin->map.n-1));
494:   }
495:   return(0);
496: }
497: 

501: PetscErrorCode VecMax_Seq(Vec xin,PetscInt* idx,PetscReal * z)
502: {
503:   Vec_Seq      *x = (Vec_Seq*)xin->data;
504:   PetscInt     i,j=0,n = xin->map.n;
505:   PetscReal    max,tmp;
506:   PetscScalar  *xx = x->array;

509:   if (!n) {
510:     max = PETSC_MIN;
511:     j   = -1;
512:   } else {
513: #if defined(PETSC_USE_COMPLEX)
514:       max = PetscRealPart(*xx++); j = 0;
515: #else
516:       max = *xx++; j = 0;
517: #endif
518:     for (i=1; i<n; i++) {
519: #if defined(PETSC_USE_COMPLEX)
520:       if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
521: #else
522:       if ((tmp = *xx++) > max) { j = i; max = tmp; }
523: #endif
524:     }
525:   }
526:   *z   = max;
527:   if (idx) *idx = j;
528:   return(0);
529: }

533: PetscErrorCode VecMin_Seq(Vec xin,PetscInt* idx,PetscReal * z)
534: {
535:   Vec_Seq      *x = (Vec_Seq*)xin->data;
536:   PetscInt     i,j=0,n = xin->map.n;
537:   PetscReal    min,tmp;
538:   PetscScalar  *xx = x->array;

541:   if (!n) {
542:     min = PETSC_MAX;
543:     j   = -1;
544:   } else {
545: #if defined(PETSC_USE_COMPLEX)
546:     min = PetscRealPart(*xx++); j = 0;
547: #else
548:     min = *xx++; j = 0;
549: #endif
550:     for (i=1; i<n; i++) {
551: #if defined(PETSC_USE_COMPLEX)
552:       if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
553: #else
554:       if ((tmp = *xx++) < min) { j = i; min = tmp; }
555: #endif
556:     }
557:   }
558:   *z   = min;
559:   if (idx) *idx = j;
560:   return(0);
561: }

565: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
566: {
567:   Vec_Seq        *x = (Vec_Seq *)xin->data;
569:   PetscInt       n = xin->map.n;
570:   PetscScalar    *xx = x->array;

573:   if (alpha == 0.0) {
574:     PetscMemzero(xx,n*sizeof(PetscScalar));
575:   } else {
576:     SET(xx,n,alpha);
577:   }
578:   return(0);
579: }

583: PetscErrorCode VecSetRandom_Seq(Vec xin,PetscRandom r)
584: {
586:   PetscInt       n = xin->map.n,i;
587:   PetscScalar    *xx;

590:   VecGetArray(xin,&xx);
591:   for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
592:   VecRestoreArray(xin,&xx);
593:   return(0);
594: }

598: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
599: {
600:   Vec_Seq        *xdata = (Vec_Seq*)xin->data;
602:   PetscInt       n = xin->map.n,j,j_rem;
603:   PetscScalar    *xx,*yy0,*yy1,*yy2,*yy3,alpha0,alpha1,alpha2,alpha3;

605: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
606: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
607: #endif

610:   PetscLogFlops(nv*2*n);

612:   xx = xdata->array;
613:   switch (j_rem=nv&0x3) {
614:   case 3:
615:     VecGetArray(y[0],&yy0);
616:     VecGetArray(y[1],&yy1);
617:     VecGetArray(y[2],&yy2);
618:     alpha0 = alpha[0];
619:     alpha1 = alpha[1];
620:     alpha2 = alpha[2];
621:     alpha += 3;
622:     APXY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
623:     VecRestoreArray(y[0],&yy0);
624:     VecRestoreArray(y[1],&yy1);
625:     VecRestoreArray(y[2],&yy2);
626:     y     += 3;
627:     break;
628:   case 2:
629:     VecGetArray(y[0],&yy0);
630:     VecGetArray(y[1],&yy1);
631:     alpha0 = alpha[0];
632:     alpha1 = alpha[1];
633:     alpha +=2;
634:     APXY2(xx,alpha0,alpha1,yy0,yy1,n);
635:     VecRestoreArray(y[0],&yy0);
636:     VecRestoreArray(y[1],&yy1);
637:     y     +=2;
638:     break;
639:   case 1:
640:     VecGetArray(y[0],&yy0);
641:     alpha0 = *alpha++;
642:     {PetscBLASInt nn = (PetscBLASInt)n; APXY(xx,alpha0,yy0,nn);}
643:     VecRestoreArray(y[0],&yy0);
644:     y     +=1;
645:     break;
646:   }
647:   for (j=j_rem; j<nv; j+=4) {
648:     VecGetArray(y[0],&yy0);
649:     VecGetArray(y[1],&yy1);
650:     VecGetArray(y[2],&yy2);
651:     VecGetArray(y[3],&yy3);
652:     alpha0 = alpha[0];
653:     alpha1 = alpha[1];
654:     alpha2 = alpha[2];
655:     alpha3 = alpha[3];
656:     alpha  += 4;

658:     APXY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
659:     VecRestoreArray(y[0],&yy0);
660:     VecRestoreArray(y[1],&yy1);
661:     VecRestoreArray(y[2],&yy2);
662:     VecRestoreArray(y[3],&yy3);
663:     y      += 4;
664:   }
665:   return(0);
666: }

670: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
671: {
672:   Vec_Seq        *y = (Vec_Seq *)yin->data;
674:   PetscInt       n = yin->map.n;
675:   PetscScalar    *yy = y->array,*xx;

678:   if (alpha == 0.0) {
679:     VecCopy_Seq(xin,yin);
680:   } else if (alpha == 1.0) {
681:     VecAXPY_Seq(yin,alpha,xin);
682:   } else {
683:     VecGetArray(xin,&xx);
684: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
685:     {
686:       PetscScalar oalpha = alpha;
687:       fortranaypx_(&n,&oalpha,xx,yy);
688:     }
689: #else
690:     {
691:       PetscInt i;
692:       for (i=0; i<n; i++) {
693:         yy[i] = xx[i] + alpha*yy[i];
694:       }
695:     }
696: #endif
697:     VecRestoreArray(xin,&xx);
698:     PetscLogFlops(2*n);
699:   }
700:   return(0);
701: }

703: /*
704:    IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
705:   to be slower than a regular C loop.  Hence,we do not include it.
706:   void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
707: */

711: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
712: {
713:   Vec_Seq        *w = (Vec_Seq *)win->data;
715:   PetscInt       i,n = win->map.n;
716:   PetscScalar    *ww = w->array,*yy,*xx;

719:   VecGetArray(yin,&yy);
720:   VecGetArray(xin,&xx);
721:   if (alpha == 1.0) {
722:     PetscLogFlops(n);
723:     /* could call BLAS axpy after call to memcopy, but may be slower */
724:     for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
725:   } else if (alpha == -1.0) {
726:     PetscLogFlops(n);
727:     for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
728:   } else if (alpha == 0.0) {
729:     PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
730:   } else {
731:     PetscScalar oalpha = alpha;
732: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
733:     fortranwaxpy_(&n,&oalpha,xx,yy,ww);
734: #else
735:     for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
736: #endif
737:     PetscLogFlops(2*n);
738:   }
739:   VecRestoreArray(yin,&yy);
740:   VecRestoreArray(xin,&xx);
741:   return(0);
742: }

746: PetscErrorCode VecPointwiseMax_Seq(Vec win,Vec xin,Vec yin)
747: {
748:   Vec_Seq        *w = (Vec_Seq *)win->data;
750:   PetscInt       n = win->map.n,i;
751:   PetscScalar    *ww = w->array,*xx,*yy;

754:   VecGetArray(xin,&xx);
755:   if (xin != yin) {
756:     VecGetArray(yin,&yy);
757:   } else {
758:     yy = xx;
759:   }
760:   for (i=0; i<n; i++) {
761:     ww[i] = PetscMax(PetscRealPart(xx[i]),PetscRealPart(yy[i]));
762:   }
763:   VecRestoreArray(xin,&xx);
764:   if (xin != yin) {
765:     VecRestoreArray(yin,&yy);
766:   }
767:   PetscLogFlops(n);
768:   return(0);
769: }

773: PetscErrorCode VecPointwiseMin_Seq(Vec win,Vec xin,Vec yin)
774: {
775:   Vec_Seq        *w = (Vec_Seq *)win->data;
777:   PetscInt       n = win->map.n,i;
778:   PetscScalar    *ww = w->array,*xx,*yy;

781:   VecGetArray(xin,&xx);
782:   if (xin != yin) {
783:     VecGetArray(yin,&yy);
784:   } else {
785:     yy = xx;
786:   }
787:   for (i=0; i<n; i++) {
788:     ww[i] = PetscMin(PetscRealPart(xx[i]),PetscRealPart(yy[i]));
789:   }
790:   VecRestoreArray(xin,&xx);
791:   if (xin != yin) {
792:     VecRestoreArray(yin,&yy);
793:   }
794:   PetscLogFlops(n);
795:   return(0);
796: }

800: PetscErrorCode VecPointwiseMaxAbs_Seq(Vec win,Vec xin,Vec yin)
801: {
802:   Vec_Seq        *w = (Vec_Seq *)win->data;
804:   PetscInt       n = win->map.n,i;
805:   PetscScalar    *ww = w->array,*xx,*yy;

808:   VecGetArray(xin,&xx);
809:   if (xin != yin) {
810:     VecGetArray(yin,&yy);
811:   } else {
812:     yy = xx;
813:   }
814:   for (i=0; i<n; i++) {
815:     ww[i] = PetscMax(PetscAbsScalar(xx[i]),PetscAbsScalar(yy[i]));
816:   }
817:   VecRestoreArray(xin,&xx);
818:   if (xin != yin) {
819:     VecRestoreArray(yin,&yy);
820:   }
821:   PetscLogFlops(n);
822:   return(0);
823: }

827: PetscErrorCode VecPointwiseMult_Seq(Vec win,Vec xin,Vec yin)
828: {
829:   Vec_Seq        *w = (Vec_Seq *)win->data;
831:   PetscInt       n = win->map.n,i;
832:   PetscScalar    *ww = w->array,*xx,*yy;

835:   VecGetArray(xin,&xx);
836:   if (xin != yin) {
837:     VecGetArray(yin,&yy);
838:   } else {
839:     yy = xx;
840:   }

842:   if (ww == xx) {
843:     for (i=0; i<n; i++) ww[i] *= yy[i];
844:   } else if (ww == yy) {
845:     for (i=0; i<n; i++) ww[i] *= xx[i];
846:   } else {
847:     /*  This was suppose to help on SGI but didn't really seem to
848:           PetscReal * PETSC_RESTRICT www = ww;
849:           PetscReal * PETSC_RESTRICT yyy = yy;
850:           PetscReal * PETSC_RESTRICT xxx = xx;
851:           for (i=0; i<n; i++) www[i] = xxx[i] * yyy[i];
852:     */
853: #if defined(PETSC_USE_FORTRAN_KERNEL_XTIMESY)
854:     fortranxtimesy_(xx,yy,ww,&n);
855: #else
856:     for (i=0; i<n; i++) ww[i] = xx[i] * yy[i];
857: #endif
858:   }
859:   VecRestoreArray(xin,&xx);
860:   if (xin != yin) {
861:     VecRestoreArray(yin,&yy);
862:   }
863:   PetscLogFlops(n);
864:   return(0);
865: }

869: PetscErrorCode VecPointwiseDivide_Seq(Vec win,Vec xin,Vec yin)
870: {
871:   Vec_Seq        *w = (Vec_Seq *)win->data;
873:   PetscInt       n = win->map.n,i;
874:   PetscScalar    *ww = w->array,*xx,*yy;

877:   VecGetArray(xin,&xx);
878:   if (xin != yin) {
879:     VecGetArray(yin,&yy);
880:   } else {
881:     yy = xx;
882:   }
883:   for (i=0; i<n; i++) {
884:     ww[i] = xx[i] / yy[i];
885:   }
886:   VecRestoreArray(xin,&xx);
887:   if (xin != yin) {
888:     VecRestoreArray(yin,&yy);
889:   }
890:   PetscLogFlops(n);
891:   return(0);
892: }

896: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
897: {
899:   PetscInt       n = xin->map.n,i;
900:   PetscScalar    *xx,*yy;
901:   PetscReal      m = 0.0;

904:   VecGetArray(yin,&yy);
905:   VecGetArray(xin,&xx);
906:   for(i = 0; i < n; i++) {
907:     if (yy[i] != 0.0) {
908:       m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
909:     } else {
910:       m = PetscMax(PetscAbsScalar(xx[i]), m);
911:     }
912:   }
913:   VecRestoreArray(yin,&yy);
914:   MPI_Allreduce(&m,max,1,MPIU_REAL,MPI_MAX,xin->comm);
915:   PetscLogFlops(n);
916:   return(0);
917: }

921: PetscErrorCode VecGetArray_Seq(Vec vin,PetscScalar *a[])
922: {
923:   Vec_Seq        *v = (Vec_Seq *)vin->data;

927:   if (vin->array_gotten) {
928:     SETERRQ(PETSC_ERR_ORDER,"Array has already been gotten for this vector,you may\n\
929:     have forgotten a call to VecRestoreArray()");
930:   }
931:   vin->array_gotten = PETSC_TRUE;

933:   *a =  v->array;
934:   PetscObjectTakeAccess(vin);
935:   return(0);
936: }

940: PetscErrorCode VecRestoreArray_Seq(Vec vin,PetscScalar *a[])
941: {

945:   if (!vin->array_gotten) {
946:     SETERRQ(PETSC_ERR_ORDER,"Array has not been gotten for this vector, you may\n\
947:     have forgotten a call to VecGetArray()");
948:   }
949:   vin->array_gotten = PETSC_FALSE;
950:   if (a) *a         = 0; /* now user cannot accidently use it again */

952:   PetscObjectGrantAccess(vin);
953:   return(0);
954: }

958: PetscErrorCode VecResetArray_Seq(Vec vin)
959: {
960:   Vec_Seq *v = (Vec_Seq *)vin->data;

963:   v->array         = v->unplacedarray;
964:   v->unplacedarray = 0;
965:   return(0);
966: }

970: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
971: {
972:   Vec_Seq *v = (Vec_Seq *)vin->data;

975:   if (v->unplacedarray) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
976:   v->unplacedarray = v->array;  /* save previous array so reset can bring it back */
977:   v->array = (PetscScalar *)a;
978:   return(0);
979: }

983: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
984: {
985:   Vec_Seq        *v = (Vec_Seq *)vin->data;

989:   PetscFree(v->array_allocated);
990:   v->array_allocated = v->array = (PetscScalar *)a;
991:   return(0);
992: }

996: PetscErrorCode VecGetSize_Seq(Vec vin,PetscInt *size)
997: {
999:   *size = vin->map.n;
1000:   return(0);
1001: }

1005: PetscErrorCode VecConjugate_Seq(Vec xin)
1006: {
1007:   PetscScalar *x = ((Vec_Seq *)xin->data)->array;
1008:   PetscInt    n = xin->map.n;

1011:   while (n-->0) {
1012:     *x = PetscConj(*x);
1013:     x++;
1014:   }
1015:   return(0);
1016: }
1017: