Actual source code: vpscat.c

  1: #define PETSCVEC_DLL

  3: /*
  4:     Defines parallel vector scatters.
  5: */

 7:  #include src/vec/is/isimpl.h
 8:  #include vecimpl.h
 9:  #include src/vec/impls/dvecimpl.h
 10:  #include src/vec/impls/mpi/pvecimpl.h
 11:  #include petscsys.h

 15: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
 16: {
 17:   VecScatter_MPI_General *to=(VecScatter_MPI_General*)ctx->todata;
 18:   VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
 19:   PetscErrorCode         ierr;
 20:   PetscInt               i;
 21:   PetscMPIInt            rank;
 22:   PetscViewerFormat      format;
 23:   PetscTruth             iascii;

 26:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
 27:   if (iascii) {
 28:     MPI_Comm_rank(ctx->comm,&rank);
 29:     PetscViewerGetFormat(viewer,&format);
 30:     if (format ==  PETSC_VIEWER_ASCII_INFO) {
 31:       PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;

 33:       MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,ctx->comm);
 34:       MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,ctx->comm);
 35:       itmp = to->starts[to->n+1];
 36:       MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,ctx->comm);
 37:       itmp = from->starts[from->n+1];
 38:       MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,ctx->comm);
 39:       MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,ctx->comm);

 41:       PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
 42:       PetscViewerASCIIPrintf(viewer,"  Maximum number sends %D\n",nsend_max);
 43:       PetscViewerASCIIPrintf(viewer,"  Maximum number receives %D\n",nrecv_max);
 44:       PetscViewerASCIIPrintf(viewer,"  Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
 45:       PetscViewerASCIIPrintf(viewer,"  Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
 46:       PetscViewerASCIIPrintf(viewer,"  Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));

 48:     } else {
 49:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
 50:       if (to->n) {
 51:         for (i=0; i<to->n; i++){
 52:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]   %D length = %D to whom %D\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
 53:         }
 54:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
 55:         for (i=0; i<to->starts[to->n]; i++){
 56:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,to->indices[i]);
 57:         }
 58:       }

 60:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
 61:       if (from->n) {
 62:         for (i=0; i<from->n; i++){
 63:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %D\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
 64:         }

 66:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
 67:         for (i=0; i<from->starts[from->n]; i++){
 68:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,from->indices[i]);
 69:         }
 70:       }
 71:       if (to->local.n) {
 72:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Indices for local part of scatter\n",rank);
 73:         for (i=0; i<to->local.n; i++){
 74:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]From %D to %D \n",rank,from->local.vslots[i],to->local.vslots[i]);
 75:         }
 76:       }

 78:       PetscViewerFlush(viewer);
 79:     }
 80:   } else {
 81:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for this scatter",((PetscObject)viewer)->type_name);
 82:   }
 83:   return(0);
 84: }

 86: /* -----------------------------------------------------------------------------------*/
 87: /*
 88:       The next routine determines what part of  the local part of the scatter is an
 89:   exact copy of values into their current location. We check this here and
 90:   then know that we need not perform that portion of the scatter.
 91: */
 94: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter_Seq_General *gen_to,VecScatter_Seq_General *gen_from)
 95: {
 96:   PetscInt       n = gen_to->n,n_nonmatching = 0,i,*to_slots = gen_to->vslots,*from_slots = gen_from->vslots;
 98:   PetscInt       *nto_slots,*nfrom_slots,j = 0;
 99: 
101:   for (i=0; i<n; i++) {
102:     if (to_slots[i] != from_slots[i]) n_nonmatching++;
103:   }

105:   if (!n_nonmatching) {
106:     gen_to->nonmatching_computed = PETSC_TRUE;
107:     gen_to->n_nonmatching        = gen_from->n_nonmatching = 0;
108:     PetscLogInfo((0,"VecScatterLocalOptimize_Private:Reduced %D to 0\n", n));
109:   } else if (n_nonmatching == n) {
110:     gen_to->nonmatching_computed = PETSC_FALSE;
111:     PetscLogInfo((0,"VecScatterLocalOptimize_Private:All values non-matching\n"));
112:   } else {
113:     gen_to->nonmatching_computed= PETSC_TRUE;
114:     gen_to->n_nonmatching       = gen_from->n_nonmatching = n_nonmatching;
115:     PetscMalloc2(n_nonmatching,PetscInt,&nto_slots,n_nonmatching,PetscInt,&nfrom_slots);
116:     gen_to->slots_nonmatching   = nto_slots;
117:     gen_from->slots_nonmatching = nfrom_slots;
118:     for (i=0; i<n; i++) {
119:       if (to_slots[i] != from_slots[i]) {
120:         nto_slots[j]   = to_slots[i];
121:         nfrom_slots[j] = from_slots[i];
122:         j++;
123:       }
124:     }
125:     PetscLogInfo((0,"VecScatterLocalOptimize_Private:Reduced %D to %D\n",n,n_nonmatching));
126:   }
127:   return(0);
128: }

130: /* --------------------------------------------------------------------------------------*/
133: PetscErrorCode VecScatterCopy_PtoP(VecScatter in,VecScatter out)
134: {
135:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
136:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
137:   PetscErrorCode         ierr;
138:   PetscInt               ny;

141:   out->postrecvs = in->postrecvs;
142:   out->begin     = in->begin;
143:   out->end       = in->end;
144:   out->copy      = in->copy;
145:   out->destroy   = in->destroy;
146:   out->view      = in->view;

148:   /* allocate entire send scatter context */
149:   PetscNew(VecScatter_MPI_General,&out_to);
150:   PetscNew(VecScatter_MPI_General,&out_from);

152:   ny                = in_to->starts[in_to->n];
153:   out_to->n         = in_to->n;
154:   out_to->type      = in_to->type;
155:   out_to->sendfirst = in_to->sendfirst;
156:   PetscMalloc7(ny,PetscScalar,&out_to->values,out_to->n,MPI_Request,&out_to->requests,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,
157:                       out_to->n,PetscMPIInt,&out_to->procs,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,
158:                       &out_to->rstatus);
159:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
160:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
161:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));

163:   out->todata      = (void*)out_to;
164:   out_to->local.n  = in_to->local.n;
165:   out_to->local.nonmatching_computed = PETSC_FALSE;
166:   out_to->local.n_nonmatching        = 0;
167:   out_to->local.slots_nonmatching    = 0;
168:   if (in_to->local.n) {
169:     PetscMalloc2(in_to->local.n,PetscInt,&out_to->local.vslots,in_from->local.n,PetscInt,&out_from->local.vslots);
170:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
171:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
172:   } else {
173:     out_to->local.vslots   = 0;
174:     out_from->local.vslots = 0;
175:   }

177:   /* allocate entire receive context */
178:   out_from->type      = in_from->type;
179:   ny                  = in_from->starts[in_from->n];
180:   out_from->n         = in_from->n;
181:   out_from->sendfirst = in_from->sendfirst;

183:   PetscMalloc5(ny,PetscScalar,&out_from->values,out_from->n,MPI_Request,&out_from->requests,ny,PetscInt,&out_from->indices,
184:                       out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);

186:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
187:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
188:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
189:   out->fromdata       = (void*)out_from;
190:   out_from->local.n   = in_from->local.n;
191:   out_from->local.nonmatching_computed = PETSC_FALSE;
192:   out_from->local.n_nonmatching        = 0;
193:   out_from->local.slots_nonmatching    = 0;
194:   return(0);
195: }

197: /* -------------------------------------------------------------------------------------*/
200: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
201: {
202:   VecScatter_MPI_General *gen_to   = (VecScatter_MPI_General*)ctx->todata;
203:   VecScatter_MPI_General *gen_from = (VecScatter_MPI_General*)ctx->fromdata;
204:   PetscErrorCode         ierr;

207:   CHKMEMQ;
208:   if (gen_to->local.vslots)             {PetscFree2(gen_to->local.vslots,gen_from->local.vslots);}
209:   if (gen_to->local.slots_nonmatching) {PetscFree2(gen_to->local.slots_nonmatching,gen_from->local.slots_nonmatching);}
210:   PetscFree7(gen_to->values,gen_to->requests,gen_to->indices,gen_to->starts,gen_to->procs,gen_to->sstatus,gen_to->rstatus);
211:   PetscFree5(gen_from->values,gen_from->requests,gen_from->indices,gen_from->starts,gen_from->procs);
212:   PetscFree(gen_from);
213:   PetscFree(gen_to);
214:   PetscHeaderDestroy(ctx);
215:   return(0);
216: }

218: /* --------------------------------------------------------------------------------------*/
219: /*
220:      Even though the next routines are written with parallel 
221:   vectors, either xin or yin (but not both) may be Seq
222:   vectors, one for each processor.
223:   
224:      gen_from indices indicate where arriving stuff is stashed
225:      gen_to   indices indicate where departing stuff came from. 
226:      the naming can be VERY confusing.

228: */
231: PetscErrorCode VecScatterBegin_PtoP(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
232: {
233:   VecScatter_MPI_General *gen_to,*gen_from;
234:   MPI_Comm               comm = ctx->comm;
235:   PetscScalar            *xv,*yv,*val,*rvalues,*svalues;
236:   MPI_Request            *rwaits,*swaits;
237:   PetscInt               i,j,*indices,*rstarts,*sstarts;
238:   PetscMPIInt            tag = ctx->tag,*rprocs,*sprocs;
239:   PetscErrorCode         ierr;
240:   PetscInt               nrecvs,nsends,iend;

243:   CHKMEMQ;
244:   VecGetArray(xin,&xv);
245:   if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
246:   if (mode & SCATTER_REVERSE){
247:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
248:     gen_from = (VecScatter_MPI_General*)ctx->todata;
249:   } else {
250:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
251:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
252:   }
253:   rvalues  = gen_from->values;
254:   svalues  = gen_to->values;
255:   nrecvs   = gen_from->n;
256:   nsends   = gen_to->n;
257:   rwaits   = gen_from->requests;
258:   swaits   = gen_to->requests;
259:   indices  = gen_to->indices;
260:   rstarts  = gen_from->starts;
261:   sstarts  = gen_to->starts;
262:   rprocs   = gen_from->procs;
263:   sprocs   = gen_to->procs;

265:   if (!(mode & SCATTER_LOCAL)) {

267:     if (gen_to->sendfirst) {
268:       /* do sends:  */
269:       for (i=0; i<nsends; i++) {
270:         val  = svalues + sstarts[i];
271:         iend = sstarts[i+1]-sstarts[i];
272:         /* pack the message */
273:         for (j=0; j<iend; j++) {
274:           val[j] = xv[*indices++];
275:         }
276:         MPI_Isend(val,iend,MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
277:       }
278:     }
279: 
280:     /* post receives:   */
281:     for (i=0; i<nrecvs; i++) {
282:       MPI_Irecv(rvalues+rstarts[i],rstarts[i+1]-rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
283:     }

285:     if (!gen_to->sendfirst) {
286:       /* do sends:  */
287:       for (i=0; i<nsends; i++) {
288:         val  = svalues + sstarts[i];
289:         iend = sstarts[i+1]-sstarts[i];
290:         /* pack the message */
291:         for (j=0; j<iend; j++) {
292:           val[j] = xv[*indices++];
293:         }
294:         MPI_Isend(val,iend,MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
295:       }
296:     }
297:   }

299:   /* take care of local scatters */
300:   if (gen_to->local.n && addv == INSERT_VALUES) {
301:     if (yv == xv && !gen_to->local.nonmatching_computed) {
302:       VecScatterLocalOptimize_Private(&gen_to->local,&gen_from->local);
303:     }
304:     if (gen_to->local.is_copy) {
305:       PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
306:     } else if (yv != xv || !gen_to->local.nonmatching_computed) {
307:       PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
308:       PetscInt n       = gen_to->local.n;
309:       for (i=0; i<n; i++) {yv[fslots[i]] = xv[tslots[i]];}
310:     } else {
311:       /* 
312:         In this case, it is copying the values into their old locations, thus we can skip those  
313:       */
314:       PetscInt *tslots = gen_to->local.slots_nonmatching,*fslots = gen_from->local.slots_nonmatching;
315:       PetscInt n       = gen_to->local.n_nonmatching;
316:       for (i=0; i<n; i++) {yv[fslots[i]] = xv[tslots[i]];}
317:     }
318:   } else if (gen_to->local.n) {
319:     PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
320:     PetscInt n = gen_to->local.n;
321:     if (addv == ADD_VALUES) {
322:       for (i=0; i<n; i++) {yv[fslots[i]] += xv[tslots[i]];}
323: #if !defined(PETSC_USE_COMPLEX)
324:     } else if (addv == MAX_VALUES) {
325:       for (i=0; i<n; i++) {yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[tslots[i]]);}
326: #endif
327:     } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
328:   }

330:   VecRestoreArray(xin,&xv);
331:   if (xin != yin) {VecRestoreArray(yin,&yv);}
332:   CHKMEMQ;
333:   return(0);
334: }

336: /* --------------------------------------------------------------------------------------*/
339: PetscErrorCode VecScatterEnd_PtoP(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
340: {
341:   VecScatter_MPI_General *gen_to,*gen_from;
342:   PetscScalar            *rvalues,*yv,*val;
343:   PetscErrorCode         ierr;
344:   PetscInt               nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices;
345:   PetscMPIInt            imdex;
346:   MPI_Request            *rwaits,*swaits;
347:   MPI_Status             rstatus,*sstatus;

350:   CHKMEMQ;
351:   if (mode & SCATTER_LOCAL) return(0);
352:   VecGetArray(yin,&yv);

354:   if (mode & SCATTER_REVERSE){
355:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
356:     gen_from = (VecScatter_MPI_General*)ctx->todata;
357:     sstatus  = gen_from->sstatus;
358:   } else {
359:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
360:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
361:     sstatus  = gen_to->sstatus;
362:   }
363:   rvalues  = gen_from->values;
364:   nrecvs   = gen_from->n;
365:   nsends   = gen_to->n;
366:   rwaits   = gen_from->requests;
367:   swaits   = gen_to->requests;
368:   indices  = gen_from->indices;
369:   rstarts  = gen_from->starts;

371:   /*  wait on receives */
372:   count = nrecvs;
373:   while (count) {
374:     MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
375:     /* unpack receives into our local space */
376:     val      = rvalues + rstarts[imdex];
377:     n        = rstarts[imdex+1]-rstarts[imdex];
378:     lindices = indices + rstarts[imdex];
379:     if (addv == INSERT_VALUES) {
380:       for (i=0; i<n; i++) {
381:         yv[lindices[i]] = *val++;
382:       }
383:     } else if (addv == ADD_VALUES) {
384:       for (i=0; i<n; i++) {
385:         yv[lindices[i]] += *val++;
386:       }
387: #if !defined(PETSC_USE_COMPLEX)
388:     } else if (addv == MAX_VALUES) {
389:       for (i=0; i<n; i++) {
390:         yv[lindices[i]] = PetscMax(yv[lindices[i]],*val); val++;
391:       }
392: #endif
393:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
394:     count--;
395:   }

397:   /* wait on sends */
398:   if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
399:   VecRestoreArray(yin,&yv);
400:   CHKMEMQ;
401:   return(0);
402: }
403: /* ==========================================================================================*/
404: /*
405:     Special scatters for fixed block sizes. These provide better performance
406:     because the local copying and packing and unpacking are done with loop 
407:     unrolling to the size of the block.

409:     Also uses MPI persistent sends and receives, these (at least in theory)
410:     allow MPI to optimize repeated sends and receives of the same type.
411: */

413: /*
414:     This is for use with the "ready-receiver" mode. In theory on some
415:     machines it could lead to better performance. In practice we've never
416:     seen it give better performance. Accessed with the -vecscatter_rr flag.
417: */
420: PetscErrorCode VecScatterPostRecvs_PtoP_X(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
421: {
422:   PetscErrorCode         ierr;
423:   VecScatter_MPI_General *gen_from = (VecScatter_MPI_General*)ctx->fromdata;

426:   if (gen_from->n) {MPI_Startall_irecv(gen_from->starts[gen_from->n]*gen_from->bs,gen_from->n,gen_from->requests);}
427:   return(0);
428: }

430: /* --------------------------------------------------------------------------------------*/
431: /*
432:     Special optimization to see if the local part of the scatter is actually 
433:     a copy. The scatter routines call PetscMemcpy() instead.
434:  
435: */
438: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter_Seq_General *gen_to,VecScatter_Seq_General *gen_from,PetscInt bs)
439: {
440:   PetscInt       n = gen_to->n,i,*to_slots = gen_to->vslots,*from_slots = gen_from->vslots;
441:   PetscInt       to_start,from_start;

445:   to_start   = to_slots[0];
446:   from_start = from_slots[0];

448:   for (i=1; i<n; i++) {
449:     to_start   += bs;
450:     from_start += bs;
451:     if (to_slots[i]   != to_start)   return(0);
452:     if (from_slots[i] != from_start) return(0);
453:   }
454:   gen_to->is_copy       = PETSC_TRUE;
455:   gen_to->copy_start    = to_slots[0];
456:   gen_to->copy_length   = bs*sizeof(PetscScalar)*n;
457:   gen_from->is_copy     = PETSC_TRUE;
458:   gen_from->copy_start  = from_slots[0];
459:   gen_from->copy_length = bs*sizeof(PetscScalar)*n;

461:   PetscLogInfo((0,"VecScatterLocalOptimizeCopy_Private:Local scatter is a copy, optimizing for it\n"));

463:   return(0);
464: }

466: /* --------------------------------------------------------------------------------------*/

470: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
471: {
472:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
473:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
474:   PetscErrorCode         ierr;
475:   PetscInt               ny,bs = in_from->bs;

478:   out->postrecvs = in->postrecvs;
479:   out->begin     = in->begin;
480:   out->end       = in->end;
481:   out->copy      = in->copy;
482:   out->destroy   = in->destroy;
483:   out->view      = in->view;

485:   /* allocate entire send scatter context */
486:   PetscNew(VecScatter_MPI_General,&out_to);
487:   PetscNew(VecScatter_MPI_General,&out_from);

489:   ny                = in_to->starts[in_to->n];
490:   out_to->n         = in_to->n;
491:   out_to->type      = in_to->type;
492:   out_to->sendfirst = in_to->sendfirst;

494:   PetscMalloc7(bs*ny,PetscScalar,&out_to->values,out_to->n,MPI_Request,&out_to->requests,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,
495:                       out_to->n,PetscMPIInt,&out_to->procs,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,
496:                       &out_to->rstatus);
497:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
498:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
499:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
500: 
501:   out->todata       = (void*)out_to;
502:   out_to->local.n   = in_to->local.n;
503:   out_to->local.nonmatching_computed = PETSC_FALSE;
504:   out_to->local.n_nonmatching        = 0;
505:   out_to->local.slots_nonmatching    = 0;
506:   if (in_to->local.n) {
507:     PetscMalloc2(in_to->local.n,PetscInt,&out_to->local.vslots,in_from->local.n,PetscInt,&out_from->local.vslots);
508:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
509:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
510:   } else {
511:     out_to->local.vslots   = 0;
512:     out_from->local.vslots = 0;
513:   }

515:   /* allocate entire receive context */
516:   out_from->type      = in_from->type;
517:   ny                  = in_from->starts[in_from->n];
518:   out_from->n         = in_from->n;
519:   out_from->sendfirst = in_from->sendfirst;

521:   PetscMalloc5(ny*bs,PetscScalar,&out_from->values,out_from->n,MPI_Request,&out_from->requests,ny,PetscInt,&out_from->indices,
522:                       out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
523:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
524:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
525:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
526:   out->fromdata       = (void*)out_from;
527:   out_from->local.n   = in_from->local.n;
528:   out_from->local.nonmatching_computed = PETSC_FALSE;
529:   out_from->local.n_nonmatching        = 0;
530:   out_from->local.slots_nonmatching    = 0;

532:   /* 
533:       set up the request arrays for use with isend_init() and irecv_init()
534:   */
535:   {
536:     PetscMPIInt tag;
537:     MPI_Comm    comm;
538:     PetscInt    *sstarts = out_to->starts,  *rstarts = out_from->starts;
539:     PetscMPIInt *sprocs  = out_to->procs,   *rprocs  = out_from->procs;
540:     PetscInt    i;
541:     PetscTruth  flg;
542:     MPI_Request *swaits  = out_to->requests,*rwaits  = out_from->requests;
543:     MPI_Request *rev_swaits,*rev_rwaits;
544:     PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;

546:     PetscMalloc2(in_to->n,MPI_Request,&out_to->rev_requests,in_from->n,MPI_Request,&out_from->rev_requests);

548:     rev_rwaits = out_to->rev_requests;
549:     rev_swaits = out_from->rev_requests;

551:     out_from->bs = out_to->bs = bs;
552:     tag     = out->tag;
553:     comm    = out->comm;

555:     /* Register the receives that you will use later (sends for scatter reverse) */
556:     for (i=0; i<out_from->n; i++) {
557:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
558:       MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
559:     }

561:     PetscOptionsHasName(PETSC_NULL,"-vecscatter_rr",&flg);
562:     if (flg) {
563:       out->postrecvs               = VecScatterPostRecvs_PtoP_X;
564:       out_to->use_readyreceiver    = PETSC_TRUE;
565:       out_from->use_readyreceiver  = PETSC_TRUE;
566:       for (i=0; i<out_to->n; i++) {
567:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
568:       }
569:       PetscLogInfo((0,"VecScatterCopy_PtoP_X:Using VecScatter ready receiver mode\n"));
570:     } else {
571:       out->postrecvs               = 0;
572:       out_to->use_readyreceiver    = PETSC_FALSE;
573:       out_from->use_readyreceiver  = PETSC_FALSE;
574:       flg                          = PETSC_FALSE;
575:       PetscOptionsHasName(PETSC_NULL,"-vecscatter_ssend",&flg);
576:       if (flg) {
577:         PetscLogInfo((0,"VecScatterCopy_PtoP_X:Using VecScatter Ssend mode\n"));
578:       }
579:       for (i=0; i<out_to->n; i++) {
580:         if (!flg) {
581:           MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
582:         } else {
583:           MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
584:         }
585:       }
586:     }
587:     /* Register receives for scatter reverse */
588:     for (i=0; i<out_to->n; i++) {
589:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
590:     }
591:   }

593:   return(0);
594: }

596: /* --------------------------------------------------------------------------------------*/

600: PetscErrorCode VecScatterBegin_PtoP_12(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
601: {
602:   VecScatter_MPI_General *gen_to,*gen_from;
603:   PetscScalar            *xv,*yv,*val,*svalues;
604:   MPI_Request            *rwaits,*swaits;
605:   PetscInt               *indices,*sstarts,iend,i,j,nrecvs,nsends,idx,len;
606:   PetscErrorCode         ierr;

609:   VecGetArray(xin,&xv);
610:   if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}

612:   if (mode & SCATTER_REVERSE) {
613:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
614:     gen_from = (VecScatter_MPI_General*)ctx->todata;
615:     rwaits   = gen_from->rev_requests;
616:     swaits   = gen_to->rev_requests;
617:   } else {
618:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
619:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
620:     rwaits   = gen_from->requests;
621:     swaits   = gen_to->requests;
622:   }
623:   svalues  = gen_to->values;
624:   nrecvs   = gen_from->n;
625:   nsends   = gen_to->n;
626:   indices  = gen_to->indices;
627:   sstarts  = gen_to->starts;

629:   if (!(mode & SCATTER_LOCAL)) {

631:     if (!gen_from->use_readyreceiver && !gen_to->sendfirst) {
632:       /* post receives since they were not posted in VecScatterPostRecvs()   */
633:       if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
634:     }
635:     if (ctx->packtogether) {
636:       /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
637:       len  = 12*sstarts[nsends];
638:       val  = svalues;
639:       for (i=0; i<len; i += 12) {
640:         idx     = *indices++;
641:         val[0]  = xv[idx];
642:         val[1]  = xv[idx+1];
643:         val[2]  = xv[idx+2];
644:         val[3]  = xv[idx+3];
645:         val[4]  = xv[idx+4];
646:         val[5]  = xv[idx+5];
647:         val[6]  = xv[idx+6];
648:         val[7]  = xv[idx+7];
649:         val[8]  = xv[idx+8];
650:         val[9]  = xv[idx+9];
651:         val[10] = xv[idx+10];
652:         val[11] = xv[idx+11];
653:         val    += 12;
654:       }
655:       if (nsends) {MPI_Startall_isend(len,nsends,swaits);}
656:     } else {
657:       /* this version packs and sends one at a time */
658:       val  = svalues;
659:       for (i=0; i<nsends; i++) {
660:         iend = sstarts[i+1]-sstarts[i];

662:         for (j=0; j<iend; j++) {
663:           idx     = *indices++;
664:           val[0]  = xv[idx];
665:           val[1]  = xv[idx+1];
666:           val[2]  = xv[idx+2];
667:           val[3]  = xv[idx+3];
668:           val[4]  = xv[idx+4];
669:           val[5]  = xv[idx+5];
670:           val[6]  = xv[idx+6];
671:           val[7]  = xv[idx+7];
672:           val[8]  = xv[idx+8];
673:           val[9]  = xv[idx+9];
674:           val[10] = xv[idx+10];
675:           val[11] = xv[idx+11];
676:           val    += 12;
677:         }
678:         MPI_Start_isend(12*iend,swaits+i);
679:       }
680:     }

682:     if (!gen_from->use_readyreceiver && gen_to->sendfirst) {
683:       /* post receives since they were not posted in VecScatterPostRecvs()   */
684:       if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
685:     }
686:   }

688:   /* take care of local scatters */
689:   if (gen_to->local.n) {
690:     PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
691:     PetscInt n       = gen_to->local.n,il,ir;
692:     if (addv == INSERT_VALUES) {
693:       if (gen_to->local.is_copy) {
694:         PetscMemcpy(yv+gen_from->local.copy_start,xv+gen_to->local.copy_start,gen_to->local.copy_length);
695:       } else {
696:         for (i=0; i<n; i++) {
697:           il = fslots[i]; ir = tslots[i];
698:           yv[il]    = xv[ir];
699:           yv[il+1]  = xv[ir+1];
700:           yv[il+2]  = xv[ir+2];
701:           yv[il+3]  = xv[ir+3];
702:           yv[il+4]  = xv[ir+4];
703:           yv[il+5]  = xv[ir+5];
704:           yv[il+6]  = xv[ir+6];
705:           yv[il+7]  = xv[ir+7];
706:           yv[il+8]  = xv[ir+8];
707:           yv[il+9]  = xv[ir+9];
708:           yv[il+10] = xv[ir+10];
709:           yv[il+11] = xv[ir+11];
710:         }
711:       }
712:     }  else if (addv == ADD_VALUES) {
713:       for (i=0; i<n; i++) {
714:         il = fslots[i]; ir = tslots[i];
715:         yv[il]    += xv[ir];
716:         yv[il+1]  += xv[ir+1];
717:         yv[il+2]  += xv[ir+2];
718:         yv[il+3]  += xv[ir+3];
719:         yv[il+4]  += xv[ir+4];
720:         yv[il+5]  += xv[ir+5];
721:         yv[il+6]  += xv[ir+6];
722:         yv[il+7]  += xv[ir+7];
723:         yv[il+8]  += xv[ir+8];
724:         yv[il+9]  += xv[ir+9];
725:         yv[il+10] += xv[ir+10];
726:         yv[il+11] += xv[ir+11];
727:       }
728: #if !defined(PETSC_USE_COMPLEX)
729:     }  else if (addv == MAX_VALUES) {
730:       for (i=0; i<n; i++) {
731:         il = fslots[i]; ir = tslots[i];
732:         yv[il]    = PetscMax(yv[il],xv[ir]);
733:         yv[il+1]  = PetscMax(yv[il+1],xv[ir+1]);
734:         yv[il+2]  = PetscMax(yv[il+2],xv[ir+2]);
735:         yv[il+3]  = PetscMax(yv[il+3],xv[ir+3]);
736:         yv[il+4]  = PetscMax(yv[il+4],xv[ir+4]);
737:         yv[il+5]  = PetscMax(yv[il+5],xv[ir+5]);
738:         yv[il+6]  = PetscMax(yv[il+6],xv[ir+6]);
739:         yv[il+7]  = PetscMax(yv[il+7],xv[ir+7]);
740:         yv[il+8]  = PetscMax(yv[il+8],xv[ir+8]);
741:         yv[il+9]  = PetscMax(yv[il+9],xv[ir+9]);
742:         yv[il+10] = PetscMax(yv[il+10],xv[ir+10]);
743:         yv[il+11] = PetscMax(yv[il+11],xv[ir+11]);
744:       }
745: #endif
746:     } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
747:   }
748:   VecRestoreArray(xin,&xv);
749:   if (xin != yin) {VecRestoreArray(yin,&yv);}
750:   return(0);
751: }

753: /* --------------------------------------------------------------------------------------*/

757: PetscErrorCode VecScatterEnd_PtoP_12(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
758: {
759:   VecScatter_MPI_General *gen_to,*gen_from;
760:   PetscScalar            *rvalues,*yv,*val;
761:   PetscErrorCode         ierr;
762:   PetscMPIInt            imdex;
763:   PetscInt               nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
764:   MPI_Request            *rwaits,*swaits;
765:   MPI_Status             *rstatus,*sstatus;

768:   if (mode & SCATTER_LOCAL) return(0);
769:   VecGetArray(yin,&yv);

771:   if (mode & SCATTER_REVERSE) {
772:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
773:     gen_from = (VecScatter_MPI_General*)ctx->todata;
774:     rwaits   = gen_from->rev_requests;
775:     swaits   = gen_to->rev_requests;
776:     sstatus  = gen_from->sstatus;
777:     rstatus  = gen_from->rstatus;
778:   } else {
779:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
780:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
781:     rwaits   = gen_from->requests;
782:     swaits   = gen_to->requests;
783:     sstatus  = gen_to->sstatus;
784:     rstatus  = gen_to->rstatus;
785:   }
786:   rvalues  = gen_from->values;
787:   nrecvs   = gen_from->n;
788:   nsends   = gen_to->n;
789:   indices  = gen_from->indices;
790:   rstarts  = gen_from->starts;

792:   /*  wait on receives */
793:   count = nrecvs;
794:   if (ctx->packtogether) { /* receive all messages, then unpack all, when -vecscatter_packtogether used */
795:     if (nrecvs) {MPI_Waitall(nrecvs,rwaits,rstatus);}
796:     n        = rstarts[count];
797:     val      = rvalues;
798:     lindices = indices;
799:     if (addv == INSERT_VALUES) {
800:       for (i=0; i<n; i++) {
801:         idx        = lindices[i];
802:         yv[idx]    = val[0];
803:         yv[idx+1]  = val[1];
804:         yv[idx+2]  = val[2];
805:         yv[idx+3]  = val[3];
806:         yv[idx+4]  = val[4];
807:         yv[idx+5]  = val[5];
808:         yv[idx+6]  = val[6];
809:         yv[idx+7]  = val[7];
810:         yv[idx+8]  = val[8];
811:         yv[idx+9]  = val[9];
812:         yv[idx+10] = val[10];
813:         yv[idx+11] = val[11];
814:         val       += 12;
815:       }
816:     } else if (addv == ADD_VALUES) {
817:       for (i=0; i<n; i++) {
818:         idx         = lindices[i];
819:         yv[idx]    += val[0];
820:         yv[idx+1]  += val[1];
821:         yv[idx+2]  += val[2];
822:         yv[idx+3]  += val[3];
823:         yv[idx+4]  += val[4];
824:         yv[idx+5]  += val[5];
825:         yv[idx+6]  += val[6];
826:         yv[idx+7]  += val[7];
827:         yv[idx+8]  += val[8];
828:         yv[idx+9]  += val[9];
829:         yv[idx+10] += val[10];
830:         yv[idx+11] += val[11];
831:         val        += 12;
832:       }
833: #if !defined(PETSC_USE_COMPLEX)
834:     } else if (addv == MAX_VALUES) {
835:       for (i=0; i<n; i++) {
836:         idx        = lindices[i];
837:         yv[idx]    = PetscMax(yv[idx],val[0]);
838:         yv[idx+1]  = PetscMax(yv[idx+1],val[1]);
839:         yv[idx+2]  = PetscMax(yv[idx+2],val[2]);
840:         yv[idx+3]  = PetscMax(yv[idx+3],val[3]);
841:         yv[idx+4]  = PetscMax(yv[idx+4],val[4]);
842:         yv[idx+5]  = PetscMax(yv[idx+5],val[5]);
843:         yv[idx+6]  = PetscMax(yv[idx+6],val[6]);
844:         yv[idx+7]  = PetscMax(yv[idx+7],val[7]);
845:         yv[idx+8]  = PetscMax(yv[idx+8],val[8]);
846:         yv[idx+9]  = PetscMax(yv[idx+9],val[9]);
847:         yv[idx+10] = PetscMax(yv[idx+10],val[10]);
848:         yv[idx+11] = PetscMax(yv[idx+11],val[11]);
849:         val       += 12;
850:       }
851: #endif
852:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
853:   } else { /* unpack each message as it arrives, default version */
854:     while (count) {
855:       MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus[0]);
856:       /* unpack receives into our local space */
857:       val      = rvalues + 12*rstarts[imdex];
858:       lindices = indices + rstarts[imdex];
859:       n        = rstarts[imdex+1] - rstarts[imdex];
860:       if (addv == INSERT_VALUES) {
861:         for (i=0; i<n; i++) {
862:           idx        = lindices[i];
863:           yv[idx]    = val[0];
864:           yv[idx+1]  = val[1];
865:           yv[idx+2]  = val[2];
866:           yv[idx+3]  = val[3];
867:           yv[idx+4]  = val[4];
868:           yv[idx+5]  = val[5];
869:           yv[idx+6]  = val[6];
870:           yv[idx+7]  = val[7];
871:           yv[idx+8]  = val[8];
872:           yv[idx+9]  = val[9];
873:           yv[idx+10] = val[10];
874:           yv[idx+11] = val[11];
875:           val       += 12;
876:       }
877:     } else if (addv == ADD_VALUES) {
878:       for (i=0; i<n; i++) {
879:         idx        = lindices[i];
880:         yv[idx]    += val[0];
881:         yv[idx+1]  += val[1];
882:         yv[idx+2]  += val[2];
883:         yv[idx+3]  += val[3];
884:         yv[idx+4]  += val[4];
885:         yv[idx+5]  += val[5];
886:         yv[idx+6]  += val[6];
887:         yv[idx+7]  += val[7];
888:         yv[idx+8]  += val[8];
889:         yv[idx+9]  += val[9];
890:         yv[idx+10] += val[10];
891:         yv[idx+11] += val[11];
892:         val        += 12;
893:       }
894: #if !defined(PETSC_USE_COMPLEX)
895:     } else if (addv == MAX_VALUES) {
896:       for (i=0; i<n; i++) {
897:         idx        = lindices[i];
898:         yv[idx]    = PetscMax(yv[idx],val[0]);
899:         yv[idx+1]  = PetscMax(yv[idx+1],val[1]);
900:         yv[idx+2]  = PetscMax(yv[idx+2],val[2]);
901:         yv[idx+3]  = PetscMax(yv[idx+3],val[3]);
902:         yv[idx+4]  = PetscMax(yv[idx+4],val[4]);
903:         yv[idx+5]  = PetscMax(yv[idx+5],val[5]);
904:         yv[idx+6]  = PetscMax(yv[idx+6],val[6]);
905:         yv[idx+7]  = PetscMax(yv[idx+7],val[7]);
906:         yv[idx+8]  = PetscMax(yv[idx+8],val[8]);
907:         yv[idx+9]  = PetscMax(yv[idx+9],val[9]);
908:         yv[idx+10] = PetscMax(yv[idx+10],val[10]);
909:         yv[idx+11] = PetscMax(yv[idx+11],val[11]);
910:         val        += 12;
911:       }
912: #endif
913:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
914:     count--;
915:     }
916:   }
917:   /* wait on sends */
918:   if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
919:   VecRestoreArray(yin,&yv);
920:   return(0);
921: }

923: /* --------------------------------------------------------------------------------------*/

927: PetscErrorCode VecScatterBegin_PtoP_8(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
928: {
929:   VecScatter_MPI_General *gen_to,*gen_from;
930:   PetscScalar            *xv,*yv,*val,*svalues;
931:   MPI_Request            *rwaits,*swaits;
932:   PetscErrorCode         ierr;
933:   PetscInt               i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;

936:   VecGetArray(xin,&xv);
937:   if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
938:   if (mode & SCATTER_REVERSE) {
939:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
940:     gen_from = (VecScatter_MPI_General*)ctx->todata;
941:     rwaits   = gen_from->rev_requests;
942:     swaits   = gen_to->rev_requests;
943:   } else {
944:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
945:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
946:     rwaits   = gen_from->requests;
947:     swaits   = gen_to->requests;
948:   }
949:   svalues  = gen_to->values;
950:   nrecvs   = gen_from->n;
951:   nsends   = gen_to->n;
952:   indices  = gen_to->indices;
953:   sstarts  = gen_to->starts;

955:   if (!(mode & SCATTER_LOCAL)) {

957:     if (gen_to->sendfirst) {
958:       /* this version packs and sends one at a time */
959:       val  = svalues;
960:       for (i=0; i<nsends; i++) {
961:         iend = sstarts[i+1]-sstarts[i];

963:         for (j=0; j<iend; j++) {
964:           idx     = *indices++;
965:           val[0] = xv[idx];
966:           val[1] = xv[idx+1];
967:           val[2] = xv[idx+2];
968:           val[3] = xv[idx+3];
969:           val[4] = xv[idx+4];
970:           val[5] = xv[idx+5];
971:           val[6] = xv[idx+6];
972:           val[7] = xv[idx+7];
973:           val    += 8;
974:         }
975:         MPI_Start_isend(8*iend,swaits+i);
976:       }
977:     }

979:     if (!gen_from->use_readyreceiver) {
980:       /* post receives since they were not posted in VecScatterPostRecvs()   */
981:       if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
982:     }

984:     if (!gen_to->sendfirst) {
985:       /* this version packs all the messages together and sends */
986:       /*
987:       len  = 5*sstarts[nsends];
988:       val  = svalues;
989:       for (i=0; i<len; i += 5) {
990:         idx     = *indices++;
991:         val[0] = xv[idx];
992:         val[1] = xv[idx+1];
993:         val[2] = xv[idx+2];
994:         val[3] = xv[idx+3];
995:         val[4] = xv[idx+4];
996:         val      += 5;
997:       }
998:       MPI_Startall_isend(len,nsends,swaits);
999:       */

1001:       /* this version packs and sends one at a time */
1002:       val  = svalues;
1003:       for (i=0; i<nsends; i++) {
1004:         iend = sstarts[i+1]-sstarts[i];

1006:         for (j=0; j<iend; j++) {
1007:           idx     = *indices++;
1008:           val[0] = xv[idx];
1009:           val[1] = xv[idx+1];
1010:           val[2] = xv[idx+2];
1011:           val[3] = xv[idx+3];
1012:           val[4] = xv[idx+4];
1013:           val[5] = xv[idx+5];
1014:           val[6] = xv[idx+6];
1015:           val[7] = xv[idx+7];
1016:           val    += 8;
1017:         }
1018:         MPI_Start_isend(8*iend,swaits+i);
1019:       }
1020:     }
1021:   }

1023:   /* take care of local scatters */
1024:   if (gen_to->local.n) {
1025:     PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
1026:     PetscInt n       = gen_to->local.n,il,ir;
1027:     if (addv == INSERT_VALUES) {
1028:       if (gen_to->local.is_copy) {
1029:         PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1030:       } else {
1031:         for (i=0; i<n; i++) {
1032:           il = fslots[i]; ir = tslots[i];
1033:           yv[il]   = xv[ir];
1034:           yv[il+1] = xv[ir+1];
1035:           yv[il+2] = xv[ir+2];
1036:           yv[il+3] = xv[ir+3];
1037:           yv[il+4] = xv[ir+4];
1038:           yv[il+5] = xv[ir+5];
1039:           yv[il+6] = xv[ir+6];
1040:           yv[il+7] = xv[ir+7];
1041:         }
1042:       }
1043:     }  else if (addv == ADD_VALUES) {
1044:       for (i=0; i<n; i++) {
1045:         il = fslots[i]; ir = tslots[i];
1046:         yv[il]   += xv[ir];
1047:         yv[il+1] += xv[ir+1];
1048:         yv[il+2] += xv[ir+2];
1049:         yv[il+3] += xv[ir+3];
1050:         yv[il+4] += xv[ir+4];
1051:         yv[il+5] += xv[ir+5];
1052:         yv[il+6] += xv[ir+6];
1053:         yv[il+7] += xv[ir+7];
1054:       }
1055: #if !defined(PETSC_USE_COMPLEX)
1056:     }  else if (addv == MAX_VALUES) {
1057:       for (i=0; i<n; i++) {
1058:         il = fslots[i]; ir = tslots[i];
1059:         yv[il]   = PetscMax(yv[il],xv[ir]);
1060:         yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1061:         yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1062:         yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1063:         yv[il+4] = PetscMax(yv[il+4],xv[ir+4]);
1064:         yv[il+5] = PetscMax(yv[il+5],xv[ir+5]);
1065:         yv[il+6] = PetscMax(yv[il+6],xv[ir+6]);
1066:         yv[il+7] = PetscMax(yv[il+7],xv[ir+7]);
1067:       }
1068: #endif
1069:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1070:   }
1071:   VecRestoreArray(xin,&xv);
1072:   if (xin != yin) {VecRestoreArray(yin,&yv);}
1073:   return(0);
1074: }

1076: /* --------------------------------------------------------------------------------------*/

1080: PetscErrorCode VecScatterEnd_PtoP_8(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1081: {
1082:   VecScatter_MPI_General *gen_to,*gen_from;
1083:   PetscScalar            *rvalues,*yv,*val;
1084:   PetscErrorCode         ierr;
1085:   PetscInt               nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1086:   PetscMPIInt            imdex;
1087:   MPI_Request            *rwaits,*swaits;
1088:   MPI_Status             rstatus,*sstatus;

1091:   if (mode & SCATTER_LOCAL) return(0);
1092:   VecGetArray(yin,&yv);

1094:   if (mode & SCATTER_REVERSE) {
1095:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
1096:     gen_from = (VecScatter_MPI_General*)ctx->todata;
1097:     rwaits   = gen_from->rev_requests;
1098:     swaits   = gen_to->rev_requests;
1099:     sstatus  = gen_from->sstatus;
1100:   } else {
1101:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
1102:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1103:     rwaits   = gen_from->requests;
1104:     swaits   = gen_to->requests;
1105:     sstatus  = gen_to->sstatus;
1106:   }
1107:   rvalues  = gen_from->values;
1108:   nrecvs   = gen_from->n;
1109:   nsends   = gen_to->n;
1110:   indices  = gen_from->indices;
1111:   rstarts  = gen_from->starts;

1113:   /*  wait on receives */
1114:   count = nrecvs;
1115:   while (count) {
1116:     MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1117:     /* unpack receives into our local space */
1118:     val      = rvalues + 8*rstarts[imdex];
1119:     lindices = indices + rstarts[imdex];
1120:     n        = rstarts[imdex+1] - rstarts[imdex];
1121:     if (addv == INSERT_VALUES) {
1122:       for (i=0; i<n; i++) {
1123:         idx       = lindices[i];
1124:         yv[idx]   = val[0];
1125:         yv[idx+1] = val[1];
1126:         yv[idx+2] = val[2];
1127:         yv[idx+3] = val[3];
1128:         yv[idx+4] = val[4];
1129:         yv[idx+5] = val[5];
1130:         yv[idx+6] = val[6];
1131:         yv[idx+7] = val[7];
1132:         val      += 8;
1133:       }
1134:     } else if (addv == ADD_VALUES) {
1135:       for (i=0; i<n; i++) {
1136:         idx       = lindices[i];
1137:         yv[idx]   += val[0];
1138:         yv[idx+1] += val[1];
1139:         yv[idx+2] += val[2];
1140:         yv[idx+3] += val[3];
1141:         yv[idx+4] += val[4];
1142:         yv[idx+5] += val[5];
1143:         yv[idx+6] += val[6];
1144:         yv[idx+7] += val[7];
1145:         val       += 8;
1146:       }
1147: #if !defined(PETSC_USE_COMPLEX)
1148:     } else if (addv == MAX_VALUES) {
1149:       for (i=0; i<n; i++) {
1150:         idx       = lindices[i];
1151:         yv[idx]   = PetscMax(yv[idx],val[0]);
1152:         yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1153:         yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1154:         yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1155:         yv[idx+4] = PetscMax(yv[idx+4],val[4]);
1156:         yv[idx+5] = PetscMax(yv[idx+5],val[5]);
1157:         yv[idx+6] = PetscMax(yv[idx+6],val[6]);
1158:         yv[idx+7] = PetscMax(yv[idx+7],val[7]);
1159:         val       += 8;
1160:       }
1161: #endif
1162:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1163:     count--;
1164:   }
1165:   /* wait on sends */
1166:   if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
1167:   VecRestoreArray(yin,&yv);
1168:   return(0);
1169: }
1170: /* --------------------------------------------------------------------------------------*/

1174: PetscErrorCode VecScatterBegin_PtoP_7(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1175: {
1176:   VecScatter_MPI_General *gen_to,*gen_from;
1177:   PetscScalar            *xv,*yv,*val,*svalues;
1178:   MPI_Request            *rwaits,*swaits;
1179:   PetscErrorCode         ierr;
1180:   PetscInt               i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;

1183:   VecGetArray(xin,&xv);
1184:   if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1185:   if (mode & SCATTER_REVERSE) {
1186:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
1187:     gen_from = (VecScatter_MPI_General*)ctx->todata;
1188:     rwaits   = gen_from->rev_requests;
1189:     swaits   = gen_to->rev_requests;
1190:   } else {
1191:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
1192:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1193:     rwaits   = gen_from->requests;
1194:     swaits   = gen_to->requests;
1195:   }
1196:   svalues  = gen_to->values;
1197:   nrecvs   = gen_from->n;
1198:   nsends   = gen_to->n;
1199:   indices  = gen_to->indices;
1200:   sstarts  = gen_to->starts;

1202:   if (!(mode & SCATTER_LOCAL)) {

1204:     if (gen_to->sendfirst) {
1205:       /* this version packs and sends one at a time */
1206:       val  = svalues;
1207:       for (i=0; i<nsends; i++) {
1208:         iend = sstarts[i+1]-sstarts[i];

1210:         for (j=0; j<iend; j++) {
1211:           idx     = *indices++;
1212:           val[0] = xv[idx];
1213:           val[1] = xv[idx+1];
1214:           val[2] = xv[idx+2];
1215:           val[3] = xv[idx+3];
1216:           val[4] = xv[idx+4];
1217:           val[5] = xv[idx+5];
1218:           val[6] = xv[idx+6];
1219:           val    += 7;
1220:         }
1221:         MPI_Start_isend(7*iend,swaits+i);
1222:       }
1223:     }

1225:     if (!gen_from->use_readyreceiver) {
1226:       /* post receives since they were not posted in VecScatterPostRecvs()   */
1227:       if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
1228:     }

1230:     if (!gen_to->sendfirst) {
1231:       /* this version packs all the messages together and sends */
1232:       /*
1233:       len  = 5*sstarts[nsends];
1234:       val  = svalues;
1235:       for (i=0; i<len; i += 5) {
1236:         idx     = *indices++;
1237:         val[0] = xv[idx];
1238:         val[1] = xv[idx+1];
1239:         val[2] = xv[idx+2];
1240:         val[3] = xv[idx+3];
1241:         val[4] = xv[idx+4];
1242:         val      += 5;
1243:       }
1244:       MPI_Startall_isend(len,nsends,swaits);
1245:       */

1247:       /* this version packs and sends one at a time */
1248:       val  = svalues;
1249:       for (i=0; i<nsends; i++) {
1250:         iend = sstarts[i+1]-sstarts[i];

1252:         for (j=0; j<iend; j++) {
1253:           idx     = *indices++;
1254:           val[0] = xv[idx];
1255:           val[1] = xv[idx+1];
1256:           val[2] = xv[idx+2];
1257:           val[3] = xv[idx+3];
1258:           val[4] = xv[idx+4];
1259:           val[5] = xv[idx+5];
1260:           val[6] = xv[idx+6];
1261:           val    += 7;
1262:         }
1263:         MPI_Start_isend(7*iend,swaits+i);
1264:       }
1265:     }
1266:   }

1268:   /* take care of local scatters */
1269:   if (gen_to->local.n) {
1270:     PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
1271:     PetscInt n       = gen_to->local.n,il,ir;
1272:     if (addv == INSERT_VALUES) {
1273:       if (gen_to->local.is_copy) {
1274:         PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1275:       } else {
1276:         for (i=0; i<n; i++) {
1277:           il = fslots[i]; ir = tslots[i];
1278:           yv[il]   = xv[ir];
1279:           yv[il+1] = xv[ir+1];
1280:           yv[il+2] = xv[ir+2];
1281:           yv[il+3] = xv[ir+3];
1282:           yv[il+4] = xv[ir+4];
1283:           yv[il+5] = xv[ir+5];
1284:           yv[il+6] = xv[ir+6];
1285:         }
1286:       }
1287:     }  else if (addv == ADD_VALUES) {
1288:       for (i=0; i<n; i++) {
1289:         il = fslots[i]; ir = tslots[i];
1290:         yv[il]   += xv[ir];
1291:         yv[il+1] += xv[ir+1];
1292:         yv[il+2] += xv[ir+2];
1293:         yv[il+3] += xv[ir+3];
1294:         yv[il+4] += xv[ir+4];
1295:         yv[il+5] += xv[ir+5];
1296:         yv[il+6] += xv[ir+6];
1297:       }
1298: #if !defined(PETSC_USE_COMPLEX)
1299:     }  else if (addv == MAX_VALUES) {
1300:       for (i=0; i<n; i++) {
1301:         il = fslots[i]; ir = tslots[i];
1302:         yv[il]   = PetscMax(yv[il],xv[ir]);
1303:         yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1304:         yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1305:         yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1306:         yv[il+4] = PetscMax(yv[il+4],xv[ir+4]);
1307:         yv[il+5] = PetscMax(yv[il+5],xv[ir+5]);
1308:         yv[il+6] = PetscMax(yv[il+6],xv[ir+6]);
1309:       }
1310: #endif
1311:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1312:   }
1313:   VecRestoreArray(xin,&xv);
1314:   if (xin != yin) {VecRestoreArray(yin,&yv);}
1315:   return(0);
1316: }

1318: /* --------------------------------------------------------------------------------------*/

1322: PetscErrorCode VecScatterEnd_PtoP_7(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1323: {
1324:   VecScatter_MPI_General *gen_to,*gen_from;
1325:   PetscScalar            *rvalues,*yv,*val;
1326:   PetscErrorCode         ierr;
1327:   PetscInt               nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1328:   PetscMPIInt            imdex;
1329:   MPI_Request            *rwaits,*swaits;
1330:   MPI_Status             rstatus,*sstatus;

1333:   if (mode & SCATTER_LOCAL) return(0);
1334:   VecGetArray(yin,&yv);

1336:   if (mode & SCATTER_REVERSE) {
1337:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
1338:     gen_from = (VecScatter_MPI_General*)ctx->todata;
1339:     rwaits   = gen_from->rev_requests;
1340:     swaits   = gen_to->rev_requests;
1341:     sstatus  = gen_from->sstatus;
1342:   } else {
1343:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
1344:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1345:     rwaits   = gen_from->requests;
1346:     swaits   = gen_to->requests;
1347:     sstatus  = gen_to->sstatus;
1348:   }
1349:   rvalues  = gen_from->values;
1350:   nrecvs   = gen_from->n;
1351:   nsends   = gen_to->n;
1352:   indices  = gen_from->indices;
1353:   rstarts  = gen_from->starts;

1355:   /*  wait on receives */
1356:   count = nrecvs;
1357:   while (count) {
1358:     MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1359:     /* unpack receives into our local space */
1360:     val      = rvalues + 7*rstarts[imdex];
1361:     lindices = indices + rstarts[imdex];
1362:     n        = rstarts[imdex+1] - rstarts[imdex];
1363:     if (addv == INSERT_VALUES) {
1364:       for (i=0; i<n; i++) {
1365:         idx       = lindices[i];
1366:         yv[idx]   = val[0];
1367:         yv[idx+1] = val[1];
1368:         yv[idx+2] = val[2];
1369:         yv[idx+3] = val[3];
1370:         yv[idx+4] = val[4];
1371:         yv[idx+5] = val[5];
1372:         yv[idx+6] = val[6];
1373:         val      += 7;
1374:       }
1375:     } else if (addv == ADD_VALUES) {
1376:       for (i=0; i<n; i++) {
1377:         idx       = lindices[i];
1378:         yv[idx]   += val[0];
1379:         yv[idx+1] += val[1];
1380:         yv[idx+2] += val[2];
1381:         yv[idx+3] += val[3];
1382:         yv[idx+4] += val[4];
1383:         yv[idx+5] += val[5];
1384:         yv[idx+6] += val[6];
1385:         val       += 7;
1386:       }
1387: #if !defined(PETSC_USE_COMPLEX)
1388:     } else if (addv == MAX_VALUES) {
1389:       for (i=0; i<n; i++) {
1390:         idx       = lindices[i];
1391:         yv[idx]   = PetscMax(yv[idx],val[0]);
1392:         yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1393:         yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1394:         yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1395:         yv[idx+4] = PetscMax(yv[idx+4],val[4]);
1396:         yv[idx+5] = PetscMax(yv[idx+5],val[5]);
1397:         yv[idx+6] = PetscMax(yv[idx+6],val[6]);
1398:         val       += 7;
1399:       }
1400: #endif
1401:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1402:     count--;
1403:   }
1404:   /* wait on sends */
1405:   if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
1406:   VecRestoreArray(yin,&yv);
1407:   return(0);
1408: }

1410: /* --------------------------------------------------------------------------------------*/

1414: PetscErrorCode VecScatterBegin_PtoP_6(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1415: {
1416:   VecScatter_MPI_General *gen_to,*gen_from;
1417:   PetscScalar            *xv,*yv,*val,*svalues;
1418:   MPI_Request            *rwaits,*swaits;
1419:   PetscErrorCode         ierr;
1420:   PetscInt               i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;

1423:   VecGetArray(xin,&xv);
1424:   if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1425:   if (mode & SCATTER_REVERSE) {
1426:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
1427:     gen_from = (VecScatter_MPI_General*)ctx->todata;
1428:     rwaits   = gen_from->rev_requests;
1429:     swaits   = gen_to->rev_requests;
1430:   } else {
1431:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
1432:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1433:     rwaits   = gen_from->requests;
1434:     swaits   = gen_to->requests;
1435:   }
1436:   svalues  = gen_to->values;
1437:   nrecvs   = gen_from->n;
1438:   nsends   = gen_to->n;
1439:   indices  = gen_to->indices;
1440:   sstarts  = gen_to->starts;

1442:   if (!(mode & SCATTER_LOCAL)) {

1444:     if (gen_to->sendfirst) {
1445:       /* this version packs and sends one at a time */
1446:       val  = svalues;
1447:       for (i=0; i<nsends; i++) {
1448:         iend = sstarts[i+1]-sstarts[i];

1450:         for (j=0; j<iend; j++) {
1451:           idx     = *indices++;
1452:           val[0] = xv[idx];
1453:           val[1] = xv[idx+1];
1454:           val[2] = xv[idx+2];
1455:           val[3] = xv[idx+3];
1456:           val[4] = xv[idx+4];
1457:           val[5] = xv[idx+5];
1458:           val    += 6;
1459:         }
1460:         MPI_Start_isend(6*iend,swaits+i);
1461:       }
1462:     }

1464:     if (!gen_from->use_readyreceiver) {
1465:       /* post receives since they were not posted in VecScatterPostRecvs()   */
1466:       if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
1467:     }

1469:     if (!gen_to->sendfirst) {
1470:       /* this version packs all the messages together and sends */
1471:       /*
1472:       len  = 5*sstarts[nsends];
1473:       val  = svalues;
1474:       for (i=0; i<len; i += 5) {
1475:         idx     = *indices++;
1476:         val[0] = xv[idx];
1477:         val[1] = xv[idx+1];
1478:         val[2] = xv[idx+2];
1479:         val[3] = xv[idx+3];
1480:         val[4] = xv[idx+4];
1481:         val      += 5;
1482:       }
1483:       MPI_Startall_isend(len,nsends,swaits);
1484:       */

1486:       /* this version packs and sends one at a time */
1487:       val  = svalues;
1488:       for (i=0; i<nsends; i++) {
1489:         iend = sstarts[i+1]-sstarts[i];

1491:         for (j=0; j<iend; j++) {
1492:           idx     = *indices++;
1493:           val[0] = xv[idx];
1494:           val[1] = xv[idx+1];
1495:           val[2] = xv[idx+2];
1496:           val[3] = xv[idx+3];
1497:           val[4] = xv[idx+4];
1498:           val[5] = xv[idx+5];
1499:           val    += 6;
1500:         }
1501:         MPI_Start_isend(6*iend,swaits+i);
1502:       }
1503:     }
1504:   }

1506:   /* take care of local scatters */
1507:   if (gen_to->local.n) {
1508:     PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
1509:     PetscInt n       = gen_to->local.n,il,ir;
1510:     if (addv == INSERT_VALUES) {
1511:       if (gen_to->local.is_copy) {
1512:         PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1513:       } else {
1514:         for (i=0; i<n; i++) {
1515:           il = fslots[i]; ir = tslots[i];
1516:           yv[il]   = xv[ir];
1517:           yv[il+1] = xv[ir+1];
1518:           yv[il+2] = xv[ir+2];
1519:           yv[il+3] = xv[ir+3];
1520:           yv[il+4] = xv[ir+4];
1521:           yv[il+5] = xv[ir+5];
1522:         }
1523:       }
1524:     }  else if (addv == ADD_VALUES) {
1525:       for (i=0; i<n; i++) {
1526:         il = fslots[i]; ir = tslots[i];
1527:         yv[il]   += xv[ir];
1528:         yv[il+1] += xv[ir+1];
1529:         yv[il+2] += xv[ir+2];
1530:         yv[il+3] += xv[ir+3];
1531:         yv[il+4] += xv[ir+4];
1532:         yv[il+5] += xv[ir+5];
1533:       }
1534: #if !defined(PETSC_USE_COMPLEX)
1535:     }  else if (addv == MAX_VALUES) {
1536:       for (i=0; i<n; i++) {
1537:         il = fslots[i]; ir = tslots[i];
1538:         yv[il]   = PetscMax(yv[il],xv[ir]);
1539:         yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1540:         yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1541:         yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1542:         yv[il+4] = PetscMax(yv[il+4],xv[ir+4]);
1543:         yv[il+5] = PetscMax(yv[il+5],xv[ir+5]);
1544:       }
1545: #endif
1546:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1547:   }
1548:   VecRestoreArray(xin,&xv);
1549:   if (xin != yin) {VecRestoreArray(yin,&yv);}
1550:   return(0);
1551: }

1553: /* --------------------------------------------------------------------------------------*/

1557: PetscErrorCode VecScatterEnd_PtoP_6(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1558: {
1559:   VecScatter_MPI_General *gen_to,*gen_from;
1560:   PetscScalar            *rvalues,*yv,*val;
1561:   PetscErrorCode         ierr;
1562:   PetscInt               nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1563:   PetscMPIInt            imdex;
1564:   MPI_Request            *rwaits,*swaits;
1565:   MPI_Status             rstatus,*sstatus;

1568:   if (mode & SCATTER_LOCAL) return(0);
1569:   VecGetArray(yin,&yv);

1571:   if (mode & SCATTER_REVERSE) {
1572:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
1573:     gen_from = (VecScatter_MPI_General*)ctx->todata;
1574:     rwaits   = gen_from->rev_requests;
1575:     swaits   = gen_to->rev_requests;
1576:     sstatus  = gen_from->sstatus;
1577:   } else {
1578:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
1579:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1580:     rwaits   = gen_from->requests;
1581:     swaits   = gen_to->requests;
1582:     sstatus  = gen_to->sstatus;
1583:   }
1584:   rvalues  = gen_from->values;
1585:   nrecvs   = gen_from->n;
1586:   nsends   = gen_to->n;
1587:   indices  = gen_from->indices;
1588:   rstarts  = gen_from->starts;

1590:   /*  wait on receives */
1591:   count = nrecvs;
1592:   while (count) {
1593:     MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1594:     /* unpack receives into our local space */
1595:     val      = rvalues + 6*rstarts[imdex];
1596:     lindices = indices + rstarts[imdex];
1597:     n        = rstarts[imdex+1] - rstarts[imdex];
1598:     if (addv == INSERT_VALUES) {
1599:       for (i=0; i<n; i++) {
1600:         idx       = lindices[i];
1601:         yv[idx]   = val[0];
1602:         yv[idx+1] = val[1];
1603:         yv[idx+2] = val[2];
1604:         yv[idx+3] = val[3];
1605:         yv[idx+4] = val[4];
1606:         yv[idx+5] = val[5];
1607:         val      += 6;
1608:       }
1609:     } else if (addv == ADD_VALUES) {
1610:       for (i=0; i<n; i++) {
1611:         idx       = lindices[i];
1612:         yv[idx]   += val[0];
1613:         yv[idx+1] += val[1];
1614:         yv[idx+2] += val[2];
1615:         yv[idx+3] += val[3];
1616:         yv[idx+4] += val[4];
1617:         yv[idx+5] += val[5];
1618:         val       += 6;
1619:       }
1620: #if !defined(PETSC_USE_COMPLEX)
1621:     } else if (addv == MAX_VALUES) {
1622:       for (i=0; i<n; i++) {
1623:         idx       = lindices[i];
1624:         yv[idx]   = PetscMax(yv[idx],val[0]);
1625:         yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1626:         yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1627:         yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1628:         yv[idx+4] = PetscMax(yv[idx+4],val[4]);
1629:         yv[idx+5] = PetscMax(yv[idx+5],val[5]);
1630:         val       += 6;
1631:       }
1632: #endif
1633:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1634:     count--;
1635:   }
1636:   /* wait on sends */
1637:   if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
1638:   VecRestoreArray(yin,&yv);
1639:   return(0);
1640: }

1642: /* --------------------------------------------------------------------------------------*/

1646: PetscErrorCode VecScatterBegin_PtoP_5(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1647: {
1648:   VecScatter_MPI_General *gen_to,*gen_from;
1649:   PetscScalar            *xv,*yv,*val,*svalues;
1650:   MPI_Request            *rwaits,*swaits;
1651:   PetscErrorCode         ierr;
1652:   PetscInt               i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;

1655:   VecGetArray(xin,&xv);
1656:   if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1657:   if (mode & SCATTER_REVERSE) {
1658:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
1659:     gen_from = (VecScatter_MPI_General*)ctx->todata;
1660:     rwaits   = gen_from->rev_requests;
1661:     swaits   = gen_to->rev_requests;
1662:   } else {
1663:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
1664:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1665:     rwaits   = gen_from->requests;
1666:     swaits   = gen_to->requests;
1667:   }
1668:   svalues  = gen_to->values;
1669:   nrecvs   = gen_from->n;
1670:   nsends   = gen_to->n;
1671:   indices  = gen_to->indices;
1672:   sstarts  = gen_to->starts;

1674:   if (!(mode & SCATTER_LOCAL)) {

1676:     if (gen_to->sendfirst) {
1677:       /* this version packs and sends one at a time */
1678:       val  = svalues;
1679:       for (i=0; i<nsends; i++) {
1680:         iend = sstarts[i+1]-sstarts[i];

1682:         for (j=0; j<iend; j++) {
1683:           idx     = *indices++;
1684:           val[0] = xv[idx];
1685:           val[1] = xv[idx+1];
1686:           val[2] = xv[idx+2];
1687:           val[3] = xv[idx+3];
1688:           val[4] = xv[idx+4];
1689:           val    += 5;
1690:         }
1691:         MPI_Start_isend(5*iend,swaits+i);
1692:       }
1693:     }

1695:     if (!gen_from->use_readyreceiver) {
1696:       /* post receives since they were not posted in VecScatterPostRecvs()   */
1697:       if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
1698:     }

1700:     if (!gen_to->sendfirst) {
1701:       /* this version packs all the messages together and sends */
1702:       /*
1703:       len  = 5*sstarts[nsends];
1704:       val  = svalues;
1705:       for (i=0; i<len; i += 5) {
1706:         idx     = *indices++;
1707:         val[0] = xv[idx];
1708:         val[1] = xv[idx+1];
1709:         val[2] = xv[idx+2];
1710:         val[3] = xv[idx+3];
1711:         val[4] = xv[idx+4];
1712:         val      += 5;
1713:       }
1714:       MPI_Startall_isend(len,nsends,swaits);
1715:       */

1717:       /* this version packs and sends one at a time */
1718:       val  = svalues;
1719:       for (i=0; i<nsends; i++) {
1720:         iend = sstarts[i+1]-sstarts[i];

1722:         for (j=0; j<iend; j++) {
1723:           idx     = *indices++;
1724:           val[0] = xv[idx];
1725:           val[1] = xv[idx+1];
1726:           val[2] = xv[idx+2];
1727:           val[3] = xv[idx+3];
1728:           val[4] = xv[idx+4];
1729:           val    += 5;
1730:         }
1731:         MPI_Start_isend(5*iend,swaits+i);
1732:       }
1733:     }
1734:   }

1736:   /* take care of local scatters */
1737:   if (gen_to->local.n) {
1738:     PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
1739:     PetscInt n       = gen_to->local.n,il,ir;
1740:     if (addv == INSERT_VALUES) {
1741:       if (gen_to->local.is_copy) {
1742:         PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1743:       } else {
1744:         for (i=0; i<n; i++) {
1745:           il = fslots[i]; ir = tslots[i];
1746:           yv[il]   = xv[ir];
1747:           yv[il+1] = xv[ir+1];
1748:           yv[il+2] = xv[ir+2];
1749:           yv[il+3] = xv[ir+3];
1750:           yv[il+4] = xv[ir+4];
1751:         }
1752:       }
1753:     }  else if (addv == ADD_VALUES) {
1754:       for (i=0; i<n; i++) {
1755:         il = fslots[i]; ir = tslots[i];
1756:         yv[il]   += xv[ir];
1757:         yv[il+1] += xv[ir+1];
1758:         yv[il+2] += xv[ir+2];
1759:         yv[il+3] += xv[ir+3];
1760:         yv[il+4] += xv[ir+4];
1761:       }
1762: #if !defined(PETSC_USE_COMPLEX)
1763:     }  else if (addv == MAX_VALUES) {
1764:       for (i=0; i<n; i++) {
1765:         il = fslots[i]; ir = tslots[i];
1766:         yv[il]   = PetscMax(yv[il],xv[ir]);
1767:         yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1768:         yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1769:         yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1770:         yv[il+4] = PetscMax(yv[il+4],xv[ir+4]);
1771:       }
1772: #endif
1773:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1774:   }
1775:   VecRestoreArray(xin,&xv);
1776:   if (xin != yin) {VecRestoreArray(yin,&yv);}
1777:   return(0);
1778: }

1780: /* --------------------------------------------------------------------------------------*/

1784: PetscErrorCode VecScatterEnd_PtoP_5(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1785: {
1786:   VecScatter_MPI_General *gen_to,*gen_from;
1787:   PetscScalar            *rvalues,*yv,*val;
1788:   PetscErrorCode         ierr;
1789:   PetscInt               nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1790:   PetscMPIInt            imdex;
1791:   MPI_Request            *rwaits,*swaits;
1792:   MPI_Status             rstatus,*sstatus;

1795:   if (mode & SCATTER_LOCAL) return(0);
1796:   VecGetArray(yin,&yv);

1798:   if (mode & SCATTER_REVERSE) {
1799:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
1800:     gen_from = (VecScatter_MPI_General*)ctx->todata;
1801:     rwaits   = gen_from->rev_requests;
1802:     swaits   = gen_to->rev_requests;
1803:     sstatus  = gen_from->sstatus;
1804:   } else {
1805:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
1806:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1807:     rwaits   = gen_from->requests;
1808:     swaits   = gen_to->requests;
1809:     sstatus  = gen_to->sstatus;
1810:   }
1811:   rvalues  = gen_from->values;
1812:   nrecvs   = gen_from->n;
1813:   nsends   = gen_to->n;
1814:   indices  = gen_from->indices;
1815:   rstarts  = gen_from->starts;

1817:   /*  wait on receives */
1818:   count = nrecvs;
1819:   while (count) {
1820:     MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1821:     /* unpack receives into our local space */
1822:     val      = rvalues + 5*rstarts[imdex];
1823:     lindices = indices + rstarts[imdex];
1824:     n        = rstarts[imdex+1] - rstarts[imdex];
1825:     if (addv == INSERT_VALUES) {
1826:       for (i=0; i<n; i++) {
1827:         idx       = lindices[i];
1828:         yv[idx]   = val[0];
1829:         yv[idx+1] = val[1];
1830:         yv[idx+2] = val[2];
1831:         yv[idx+3] = val[3];
1832:         yv[idx+4] = val[4];
1833:         val      += 5;
1834:       }
1835:     } else if (addv == ADD_VALUES) {
1836:       for (i=0; i<n; i++) {
1837:         idx       = lindices[i];
1838:         yv[idx]   += val[0];
1839:         yv[idx+1] += val[1];
1840:         yv[idx+2] += val[2];
1841:         yv[idx+3] += val[3];
1842:         yv[idx+4] += val[4];
1843:         val       += 5;
1844:       }
1845: #if !defined(PETSC_USE_COMPLEX)
1846:     } else if (addv == MAX_VALUES) {
1847:       for (i=0; i<n; i++) {
1848:         idx       = lindices[i];
1849:         yv[idx]   = PetscMax(yv[idx],val[0]);
1850:         yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1851:         yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1852:         yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1853:         yv[idx+4] = PetscMax(yv[idx+4],val[4]);
1854:         val       += 5;
1855:       }
1856: #endif
1857:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1858:     count--;
1859:   }
1860:   /* wait on sends */
1861:   if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
1862:   VecRestoreArray(yin,&yv);
1863:   return(0);
1864: }

1866: /* --------------------------------------------------------------------------------------*/

1870: PetscErrorCode VecScatterBegin_PtoP_4(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1871: {
1872:   VecScatter_MPI_General *gen_to,*gen_from;
1873:   PetscScalar            *xv,*yv,*val,*svalues;
1874:   MPI_Request            *rwaits,*swaits;
1875:   PetscInt               *indices,*sstarts,iend,i,j,nrecvs,nsends,idx,len;
1876:   PetscErrorCode         ierr;

1879:   VecGetArray(xin,&xv);
1880:   if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}

1882:   if (mode & SCATTER_REVERSE) {
1883:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
1884:     gen_from = (VecScatter_MPI_General*)ctx->todata;
1885:     rwaits   = gen_from->rev_requests;
1886:     swaits   = gen_to->rev_requests;
1887:   } else {
1888:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
1889:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1890:     rwaits   = gen_from->requests;
1891:     swaits   = gen_to->requests;
1892:   }
1893:   svalues  = gen_to->values;
1894:   nrecvs   = gen_from->n;
1895:   nsends   = gen_to->n;
1896:   indices  = gen_to->indices;
1897:   sstarts  = gen_to->starts;

1899:   if (!(mode & SCATTER_LOCAL)) {

1901:     if (!gen_from->use_readyreceiver && !gen_to->sendfirst) {
1902:       /* post receives since they were not posted in VecScatterPostRecvs()   */
1903:       if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
1904:     }

1906:     if (ctx->packtogether) {
1907:       /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
1908:       len  = 4*sstarts[nsends];
1909:       val  = svalues;
1910:       for (i=0; i<len; i += 4) {
1911:         idx    = *indices++;
1912:         val[0] = xv[idx];
1913:         val[1] = xv[idx+1];
1914:         val[2] = xv[idx+2];
1915:         val[3] = xv[idx+3];
1916:         val    += 4;
1917:       }
1918:       if (nsends) {MPI_Startall_isend(len,nsends,swaits);}
1919:     } else {
1920:       /* this version packs and sends one at a time, default */
1921:       val  = svalues;
1922:       for (i=0; i<nsends; i++) {
1923:         iend = sstarts[i+1]-sstarts[i];

1925:         for (j=0; j<iend; j++) {
1926:           idx     = *indices++;
1927:           val[0] = xv[idx];
1928:           val[1] = xv[idx+1];
1929:           val[2] = xv[idx+2];
1930:           val[3] = xv[idx+3];
1931:           val    += 4;
1932:         }
1933:         MPI_Start_isend(4*iend,swaits+i);
1934:       }
1935:     }

1937:     if (!gen_from->use_readyreceiver && gen_to->sendfirst) {
1938:       /* post receives since they were not posted in VecScatterPostRecvs()   */
1939:       if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
1940:     }
1941:   }

1943:   /* take care of local scatters */
1944:   if (gen_to->local.n) {
1945:     PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
1946:     PetscInt n       = gen_to->local.n,il,ir;
1947:     if (addv == INSERT_VALUES) {
1948:       if (gen_to->local.is_copy) {
1949:         PetscMemcpy(yv+gen_from->local.copy_start,xv+gen_to->local.copy_start,gen_to->local.copy_length);
1950:       } else {
1951:         for (i=0; i<n; i++) {
1952:           il = fslots[i]; ir = tslots[i];
1953:           yv[il]   = xv[ir];
1954:           yv[il+1] = xv[ir+1];
1955:           yv[il+2] = xv[ir+2];
1956:           yv[il+3] = xv[ir+3];
1957:         }
1958:       }
1959:     }  else if (addv == ADD_VALUES) {
1960:       for (i=0; i<n; i++) {
1961:         il = fslots[i]; ir = tslots[i];
1962:         yv[il]   += xv[ir];
1963:         yv[il+1] += xv[ir+1];
1964:         yv[il+2] += xv[ir+2];
1965:         yv[il+3] += xv[ir+3];
1966:       }
1967: #if !defined(PETSC_USE_COMPLEX)
1968:     }  else if (addv == MAX_VALUES) {
1969:       for (i=0; i<n; i++) {
1970:         il = fslots[i]; ir = tslots[i];
1971:         yv[il]   = PetscMax(yv[il],xv[ir]);
1972:         yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1973:         yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1974:         yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1975:       }
1976: #endif
1977:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1978:   }
1979:   VecRestoreArray(xin,&xv);
1980:   if (xin != yin) {VecRestoreArray(yin,&yv);}
1981:   return(0);
1982: }

1984: /* --------------------------------------------------------------------------------------*/

1988: PetscErrorCode VecScatterEnd_PtoP_4(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1989: {
1990:   VecScatter_MPI_General *gen_to,*gen_from;
1991:   PetscScalar            *rvalues,*yv,*val;
1992:   PetscErrorCode         ierr;
1993:   PetscInt               nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1994:   PetscMPIInt            imdex;
1995:   MPI_Request            *rwaits,*swaits;
1996:   MPI_Status             *rstatus,*sstatus;

1999:   if (mode & SCATTER_LOCAL) return(0);
2000:   VecGetArray(yin,&yv);

2002:   if (mode & SCATTER_REVERSE) {
2003:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
2004:     gen_from = (VecScatter_MPI_General*)ctx->todata;
2005:     rwaits   = gen_from->rev_requests;
2006:     swaits   = gen_to->rev_requests;
2007:     sstatus  = gen_from->sstatus;
2008:     rstatus  = gen_from->rstatus;
2009:   } else {
2010:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
2011:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
2012:     rwaits   = gen_from->requests;
2013:     swaits   = gen_to->requests;
2014:     sstatus  = gen_to->sstatus;
2015:     rstatus  = gen_to->rstatus;
2016:   }
2017:   rvalues  = gen_from->values;
2018:   nrecvs   = gen_from->n;
2019:   nsends   = gen_to->n;
2020:   indices  = gen_from->indices;
2021:   rstarts  = gen_from->starts;

2023:   /*  wait on receives */
2024:   count = nrecvs;
2025:   if (ctx->packtogether) { /* receive all messages, then unpack all, when -vecscatter_packtogether used */
2026:     if (nrecvs) {MPI_Waitall(nrecvs,rwaits,rstatus);}
2027:     n        = rstarts[count];
2028:     val      = rvalues;
2029:     lindices = indices;
2030:     if (addv == INSERT_VALUES) {
2031:       for (i=0; i<n; i++) {
2032:         idx       = lindices[i];
2033:         yv[idx]   = val[0];
2034:         yv[idx+1] = val[1];
2035:         yv[idx+2] = val[2];
2036:         yv[idx+3] = val[3];
2037:         val      += 4;
2038:       }
2039:     } else if (addv == ADD_VALUES) {
2040:       for (i=0; i<n; i++) {
2041:         idx       = lindices[i];
2042:         yv[idx]   += val[0];
2043:         yv[idx+1] += val[1];
2044:         yv[idx+2] += val[2];
2045:         yv[idx+3] += val[3];
2046:         val       += 4;
2047:       }
2048: #if !defined(PETSC_USE_COMPLEX)
2049:     } else if (addv == MAX_VALUES) {
2050:       for (i=0; i<n; i++) {
2051:         idx       = lindices[i];
2052:         yv[idx]   = PetscMax(yv[idx],val[0]);
2053:         yv[idx+1] = PetscMax(yv[idx+1],val[1]);
2054:         yv[idx+2] = PetscMax(yv[idx+2],val[2]);
2055:         yv[idx+3] = PetscMax(yv[idx+3],val[3]);
2056:         val       += 4;
2057:       }
2058: #endif
2059:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
2060:   } else { /* unpack each message as it arrives, default version */
2061:     while (count) {
2062:       MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus[0]);
2063:       /* unpack receives into our local space */
2064:       val      = rvalues + 4*rstarts[imdex];
2065:       lindices = indices + rstarts[imdex];
2066:       n        = rstarts[imdex+1] - rstarts[imdex];
2067:       if (addv == INSERT_VALUES) {
2068:         for (i=0; i<n; i++) {
2069:           idx       = lindices[i];
2070:           yv[idx]   = val[0];
2071:           yv[idx+1] = val[1];
2072:           yv[idx+2] = val[2];
2073:           yv[idx+3] = val[3];
2074:           val      += 4;
2075:         }
2076:       } else if (addv == ADD_VALUES) {
2077:         for (i=0; i<n; i++) {
2078:           idx       = lindices[i];
2079:           yv[idx]   += val[0];
2080:           yv[idx+1] += val[1];
2081:           yv[idx+2] += val[2];
2082:           yv[idx+3] += val[3];
2083:           val       += 4;
2084:         }
2085: #if !defined(PETSC_USE_COMPLEX)
2086:       } else if (addv == MAX_VALUES) {
2087:         for (i=0; i<n; i++) {
2088:           idx       = lindices[i];
2089:           yv[idx]   = PetscMax(yv[idx],val[0]);
2090:           yv[idx+1] = PetscMax(yv[idx+1],val[1]);
2091:           yv[idx+2] = PetscMax(yv[idx+2],val[2]);
2092:           yv[idx+3] = PetscMax(yv[idx+3],val[3]);
2093:           val       += 4;
2094:         }
2095: #endif
2096:       }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
2097:       count--;
2098:     }
2099:   }

2101:   /* wait on sends */
2102:   if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
2103:   VecRestoreArray(yin,&yv);
2104:   return(0);
2105: }

2107: /* --------------------------------------------------------------------------------------*/

2111: PetscErrorCode VecScatterBegin_PtoP_3(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
2112: {
2113:   VecScatter_MPI_General *gen_to,*gen_from;
2114:   PetscScalar            *xv,*yv,*val,*svalues;
2115:   MPI_Request            *rwaits,*swaits;
2116:   PetscErrorCode         ierr;
2117:   PetscInt               i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;

2120:   VecGetArray(xin,&xv);
2121:   if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}

2123:   if (mode & SCATTER_REVERSE) {
2124:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
2125:     gen_from = (VecScatter_MPI_General*)ctx->todata;
2126:     rwaits   = gen_from->rev_requests;
2127:     swaits   = gen_to->rev_requests;
2128:   } else {
2129:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
2130:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
2131:     rwaits   = gen_from->requests;
2132:     swaits   = gen_to->requests;
2133:   }
2134:   svalues  = gen_to->values;
2135:   nrecvs   = gen_from->n;
2136:   nsends   = gen_to->n;
2137:   indices  = gen_to->indices;
2138:   sstarts  = gen_to->starts;

2140:   if (!(mode & SCATTER_LOCAL)) {

2142:     if (gen_to->sendfirst) {
2143:       /* this version packs and sends one at a time */
2144:       val  = svalues;
2145:       for (i=0; i<nsends; i++) {
2146:         iend = sstarts[i+1]-sstarts[i];

2148:         for (j=0; j<iend; j++) {
2149:           idx     = *indices++;
2150:           val[0] = xv[idx];
2151:           val[1] = xv[idx+1];
2152:           val[2] = xv[idx+2];
2153:           val    += 3;
2154:         }
2155:         MPI_Start_isend(3*iend,swaits+i);
2156:       }
2157:     }

2159:     if (!gen_from->use_readyreceiver) {
2160:       /* post receives since they were not posted in VecScatterPostRecvs()   */
2161:       if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
2162:     }

2164:     if (!gen_to->sendfirst) {
2165:       /* this version packs all the messages together and sends */
2166:       /*
2167:       len  = 3*sstarts[nsends];
2168:       val  = svalues;
2169:       for (i=0; i<len; i += 3) {
2170:         idx     = *indices++;
2171:         val[0] = xv[idx];
2172:         val[1] = xv[idx+1];
2173:         val[2] = xv[idx+2];
2174:         val      += 3;
2175:       }
2176:       MPI_Startall_isend(len,nsends,swaits);
2177:       */

2179:       /* this version packs and sends one at a time */
2180:       val  = svalues;
2181:       for (i=0; i<nsends; i++) {
2182:         iend = sstarts[i+1]-sstarts[i];

2184:         for (j=0; j<iend; j++) {
2185:           idx     = *indices++;
2186:           val[0] = xv[idx];
2187:           val[1] = xv[idx+1];
2188:           val[2] = xv[idx+2];
2189:           val    += 3;
2190:         }
2191:         MPI_Start_isend(3*iend,swaits+i);
2192:       }
2193:     }
2194:   }

2196:   /* take care of local scatters */
2197:   if (gen_to->local.n) {
2198:     PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
2199:     PetscInt n       = gen_to->local.n,il,ir;
2200:     if (addv == INSERT_VALUES) {
2201:       if (gen_to->local.is_copy) {
2202:         PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
2203:       } else {
2204:         for (i=0; i<n; i++) {
2205:           il = fslots[i]; ir = tslots[i];
2206:           yv[il]   = xv[ir];
2207:           yv[il+1] = xv[ir+1];
2208:           yv[il+2] = xv[ir+2];
2209:         }
2210:       }
2211:     }  else if (addv == ADD_VALUES) {
2212:       for (i=0; i<n; i++) {
2213:         il = fslots[i]; ir = tslots[i];
2214:         yv[il]   += xv[ir];
2215:         yv[il+1] += xv[ir+1];
2216:         yv[il+2] += xv[ir+2];
2217:       }
2218: #if !defined(PETSC_USE_COMPLEX)
2219:     }  else if (addv == MAX_VALUES) {
2220:       for (i=0; i<n; i++) {
2221:         il = fslots[i]; ir = tslots[i];
2222:         yv[il]   = PetscMax(yv[il],xv[ir]);
2223:         yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
2224:         yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
2225:       }
2226: #endif
2227:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
2228:   }
2229:   VecRestoreArray(xin,&xv);
2230:   if (xin != yin) {VecRestoreArray(yin,&yv);}
2231:   return(0);
2232: }

2234: /* --------------------------------------------------------------------------------------*/

2238: PetscErrorCode VecScatterEnd_PtoP_3(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
2239: {
2240:   VecScatter_MPI_General *gen_to,*gen_from;
2241:   PetscScalar            *rvalues,*yv,*val;
2242:   PetscErrorCode         ierr;
2243:   PetscInt               nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
2244:   PetscMPIInt            imdex;
2245:   MPI_Request            *rwaits,*swaits;
2246:   MPI_Status             rstatus,*sstatus;

2249:   if (mode & SCATTER_LOCAL) return(0);
2250:   VecGetArray(yin,&yv);

2252:   if (mode & SCATTER_REVERSE) {
2253:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
2254:     gen_from = (VecScatter_MPI_General*)ctx->todata;
2255:     rwaits   = gen_from->rev_requests;
2256:     swaits   = gen_to->rev_requests;
2257:     sstatus  = gen_from->sstatus;
2258:   } else {
2259:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
2260:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
2261:     rwaits   = gen_from->requests;
2262:     swaits   = gen_to->requests;
2263:     sstatus  = gen_to->sstatus;
2264:   }
2265:   rvalues  = gen_from->values;
2266:   nrecvs   = gen_from->n;
2267:   nsends   = gen_to->n;
2268:   indices  = gen_from->indices;
2269:   rstarts  = gen_from->starts;

2271:   /*  wait on receives */
2272:   count = nrecvs;
2273:   while (count) {
2274:     MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
2275:     /* unpack receives into our local space */
2276:     val      = rvalues + 3*rstarts[imdex];
2277:     lindices = indices + rstarts[imdex];
2278:     n        = rstarts[imdex+1] - rstarts[imdex];
2279:     if (addv == INSERT_VALUES) {
2280:       for (i=0; i<n; i++) {
2281:         idx       = lindices[i];
2282:         yv[idx]   = val[0];
2283:         yv[idx+1] = val[1];
2284:         yv[idx+2] = val[2];
2285:         val      += 3;
2286:       }
2287:     } else if (addv == ADD_VALUES) {
2288:       for (i=0; i<n; i++) {
2289:         idx       = lindices[i];
2290:         yv[idx]   += val[0];
2291:         yv[idx+1] += val[1];
2292:         yv[idx+2] += val[2];
2293:         val       += 3;
2294:       }
2295: #if !defined(PETSC_USE_COMPLEX)
2296:     } else if (addv == MAX_VALUES) {
2297:       for (i=0; i<n; i++) {
2298:         idx       = lindices[i];
2299:         yv[idx]   = PetscMax(yv[idx],val[0]);
2300:         yv[idx+1] = PetscMax(yv[idx+1],val[1]);
2301:         yv[idx+2] = PetscMax(yv[idx+2],val[2]);
2302:         val       += 3;
2303:       }
2304: #endif
2305:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
2306:     count--;
2307:   }
2308:   /* wait on sends */
2309:   if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
2310:   VecRestoreArray(yin,&yv);
2311:   return(0);
2312: }

2314: /* --------------------------------------------------------------------------------------*/

2318: PetscErrorCode VecScatterBegin_PtoP_2(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
2319: {
2320:   VecScatter_MPI_General *gen_to,*gen_from;
2321:   PetscScalar            *xv,*yv,*val,*svalues;
2322:   MPI_Request            *rwaits,*swaits;
2323:   PetscErrorCode         ierr;
2324:   PetscInt               i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;

2327:   VecGetArray(xin,&xv);
2328:   if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
2329:   if (mode & SCATTER_REVERSE) {
2330:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
2331:     gen_from = (VecScatter_MPI_General*)ctx->todata;
2332:     rwaits   = gen_from->rev_requests;
2333:     swaits   = gen_to->rev_requests;
2334:   } else {
2335:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
2336:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
2337:     rwaits   = gen_from->requests;
2338:     swaits   = gen_to->requests;
2339:   }
2340:   svalues  = gen_to->values;
2341:   nrecvs   = gen_from->n;
2342:   nsends   = gen_to->n;
2343:   indices  = gen_to->indices;
2344:   sstarts  = gen_to->starts;

2346:   if (!(mode & SCATTER_LOCAL)) {

2348:     if (gen_to->sendfirst) {
2349:       /* this version packs and sends one at a time */
2350:       val  = svalues;
2351:       for (i=0; i<nsends; i++) {
2352:         iend = sstarts[i+1]-sstarts[i];

2354:         for (j=0; j<iend; j++) {
2355:           idx     = *indices++;
2356:           val[0] = xv[idx];
2357:           val[1] = xv[idx+1];
2358:           val    += 2;
2359:         }
2360:         MPI_Start_isend(2*iend,swaits+i);
2361:       }
2362:     }

2364:     if (!gen_from->use_readyreceiver) {
2365:       /* post receives since they were not posted in VecScatterPostRecvs()   */
2366:       if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
2367:     }

2369:     if (!gen_to->sendfirst) {
2370:       /* this version packs all the messages together and sends */
2371:       /*
2372:       len  = 2*sstarts[nsends];
2373:       val  = svalues;
2374:       for (i=0; i<len; i += 2) {
2375:         idx     = *indices++;
2376:         val[0] = xv[idx];
2377:         val[1] = xv[idx+1];
2378:         val      += 2;
2379:       }
2380:       MPI_Startall_isend(len,nsends,swaits);
2381:       */

2383:       /* this version packs and sends one at a time */
2384:       val  = svalues;
2385:       for (i=0; i<nsends; i++) {
2386:         iend = sstarts[i+1]-sstarts[i];

2388:         for (j=0; j<iend; j++) {
2389:           idx     = *indices++;
2390:           val[0] = xv[idx];
2391:           val[1] = xv[idx+1];
2392:           val    += 2;
2393:         }
2394:         MPI_Start_isend(2*iend,swaits+i);
2395:       }
2396:     }
2397:   }

2399:   /* take care of local scatters */
2400:   if (gen_to->local.n) {
2401:     PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
2402:     PetscInt n       = gen_to->local.n,il,ir;
2403:     if (addv == INSERT_VALUES) {
2404:       if (gen_to->local.is_copy) {
2405:         PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
2406:       } else {
2407:         for (i=0; i<n; i++) {
2408:           il = fslots[i]; ir = tslots[i];
2409:           yv[il]   = xv[ir];
2410:           yv[il+1] = xv[ir+1];
2411:         }
2412:       }
2413:     }  else if (addv == ADD_VALUES) {
2414:       for (i=0; i<n; i++) {
2415:         il = fslots[i]; ir = tslots[i];
2416:         yv[il]   += xv[ir];
2417:         yv[il+1] += xv[ir+1];
2418:       }
2419: #if !defined(PETSC_USE_COMPLEX)
2420:     }  else if (addv == MAX_VALUES) {
2421:       for (i=0; i<n; i++) {
2422:         il = fslots[i]; ir = tslots[i];
2423:         yv[il]   = PetscMax(yv[il],xv[ir]);
2424:         yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
2425:       }
2426: #endif
2427:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
2428:   }
2429:   VecRestoreArray(xin,&xv);
2430:   if (xin != yin) {VecRestoreArray(yin,&yv);}
2431:   return(0);
2432: }

2434: /* --------------------------------------------------------------------------------------*/

2438: PetscErrorCode VecScatterEnd_PtoP_2(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
2439: {
2440:   VecScatter_MPI_General *gen_to,*gen_from;
2441:   PetscScalar            *rvalues,*yv,*val;
2442:   PetscErrorCode         ierr;
2443:   PetscInt               nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
2444:   PetscMPIInt            imdex;
2445:   MPI_Request            *rwaits,*swaits;
2446:   MPI_Status             rstatus,*sstatus;

2449:   if (mode & SCATTER_LOCAL) return(0);
2450:   VecGetArray(yin,&yv);

2452:   if (mode & SCATTER_REVERSE) {
2453:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
2454:     gen_from = (VecScatter_MPI_General*)ctx->todata;
2455:     rwaits   = gen_from->rev_requests;
2456:     swaits   = gen_to->rev_requests;
2457:     sstatus  = gen_from->sstatus;
2458:   } else {
2459:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
2460:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
2461:     rwaits   = gen_from->requests;
2462:     swaits   = gen_to->requests;
2463:     sstatus  = gen_to->sstatus;
2464:   }
2465:   rvalues  = gen_from->values;
2466:   nrecvs   = gen_from->n;
2467:   nsends   = gen_to->n;
2468:   indices  = gen_from->indices;
2469:   rstarts  = gen_from->starts;

2471:   /*  wait on receives */
2472:   count = nrecvs;
2473:   while (count) {
2474:     MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
2475:     /* unpack receives into our local space */
2476:     val      = rvalues + 2*rstarts[imdex];
2477:     lindices = indices + rstarts[imdex];
2478:     n        = rstarts[imdex+1] - rstarts[imdex];
2479:     if (addv == INSERT_VALUES) {
2480:       for (i=0; i<n; i++) {
2481:         idx       = lindices[i];
2482:         yv[idx]   = val[0];
2483:         yv[idx+1] = val[1];
2484:         val      += 2;
2485:       }
2486:     } else if (addv == ADD_VALUES) {
2487:       for (i=0; i<n; i++) {
2488:         idx       = lindices[i];
2489:         yv[idx]   += val[0];
2490:         yv[idx+1] += val[1];
2491:         val       += 2;
2492:       }
2493: #if !defined(PETSC_USE_COMPLEX)
2494:     } else if (addv == MAX_VALUES) {
2495:       for (i=0; i<n; i++) {
2496:         idx       = lindices[i];
2497:         yv[idx]   = PetscMax(yv[idx],val[0]);
2498:         yv[idx+1] = PetscMax(yv[idx+1],val[1]);
2499:         val       += 2;
2500:       }
2501: #endif
2502:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
2503:     count--;
2504:   }
2505:   /* wait on sends */
2506:   if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
2507:   VecRestoreArray(yin,&yv);
2508:   return(0);
2509: }

2511: /* ---------------------------------------------------------------------------------*/

2515: PetscErrorCode VecScatterDestroy_PtoP_X(VecScatter ctx)
2516: {
2517:   VecScatter_MPI_General *gen_to   = (VecScatter_MPI_General*)ctx->todata;
2518:   VecScatter_MPI_General *gen_from = (VecScatter_MPI_General*)ctx->fromdata;
2519:   PetscErrorCode         ierr;
2520:   PetscInt               i;

2523:   if (gen_to->use_readyreceiver) {
2524:     /*
2525:        Since we have already posted sends we must cancel them before freeing 
2526:        the requests
2527:     */
2528:     for (i=0; i<gen_from->n; i++) {
2529:       MPI_Cancel(gen_from->requests+i);
2530:     }
2531:   }

2533:   if (gen_to->local.vslots)              {PetscFree2(gen_to->local.vslots,gen_from->local.vslots);}
2534:   if (gen_to->local.slots_nonmatching)  {PetscFree2(gen_to->local.slots_nonmatching,gen_from->local.slots_nonmatching);}

2536:   /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
2537:   /* 
2538:      IBM's PE version of MPI has a bug where freeing these guys will screw up later
2539:      message passing.
2540:   */
2541: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
2542:   for (i=0; i<gen_to->n; i++) {
2543:     MPI_Request_free(gen_to->requests + i);
2544:     MPI_Request_free(gen_to->rev_requests + i);
2545:   }

2547:   /*
2548:       MPICH could not properly cancel requests thus with ready receiver mode we
2549:     cannot free the requests. It may be fixed now, if not then put the following 
2550:     code inside a if !gen_to->use_readyreceiver) {
2551:   */
2552:   for (i=0; i<gen_from->n; i++) {
2553:     MPI_Request_free(gen_from->requests + i);
2554:     MPI_Request_free(gen_from->rev_requests + i);
2555:   }
2556: #endif
2557: 
2558:   PetscFree7(gen_to->values,gen_to->requests,gen_to->indices,gen_to->starts,gen_to->procs,gen_to->sstatus,gen_to->rstatus);
2559:   PetscFree2(gen_to->rev_requests,gen_from->rev_requests);
2560:   PetscFree5(gen_from->values,gen_from->requests,gen_from->indices,gen_from->starts,gen_from->procs);
2561:   PetscFree(gen_to);
2562:   PetscFree(gen_from);
2563:   PetscHeaderDestroy(ctx);
2564:   return(0);
2565: }

2567: /* ==========================================================================================*/

2569: /*              create parallel to sequential scatter context                           */
2570: /*
2571:    bs indicates how many elements there are in each block. Normally this would be 1.
2572: */
2575: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,PetscInt *inidx,PetscInt ny,PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2576: {
2577:   VecScatter_MPI_General *from,*to;
2578:   PetscErrorCode         ierr;
2579:   PetscMPIInt            size,rank,imdex,tag,n;
2580:   PetscInt               *source,*lens,*owners;
2581:   PetscInt               *lowner,*start,lengthy;
2582:   PetscInt               *nprocs,i,j,idx,nsends,nrecvs;
2583:   PetscInt               *owner,*starts,count,slen;
2584:   PetscInt               *rvalues,*svalues,base,nmax,*values,*indx,nprocslocal,lastidx;
2585:   MPI_Comm               comm;
2586:   MPI_Request            *send_waits,*recv_waits;
2587:   MPI_Status             recv_status,*send_status;
2588:   PetscMap               map;
2589: #if defined(PETSC_DEBUG)
2590:   PetscTruth             found = PETSC_FALSE;
2591: #endif
2592: 
2594:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2595:   PetscObjectGetComm((PetscObject)xin,&comm);
2596:   MPI_Comm_rank(comm,&rank);
2597:   MPI_Comm_size(comm,&size);
2598:   VecGetPetscMap(xin,&map);
2599:   PetscMapGetGlobalRange(map,&owners);
2600:   VecGetSize(yin,&lengthy);

2602:   /*  first count number of contributors to each processor */
2603:   PetscMalloc2(2*size,PetscInt,&nprocs,nx,PetscInt,&owner);
2604:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
2605:   j       = 0;
2606:   lastidx = -1;
2607:   for (i=0; i<nx; i++) {
2608:     /* if indices are NOT locally sorted, need to start search at the beginning */
2609:     if (lastidx > (idx = inidx[i])) j = 0;
2610:     lastidx = idx;
2611:     for (; j<size; j++) {
2612:       if (idx >= owners[j] && idx < owners[j+1]) {
2613:         nprocs[2*j]++;
2614:         nprocs[2*j+1] = 1;
2615:         owner[i] = j;
2616: #if defined(PETSC_DEBUG)
2617:         found = PETSC_TRUE;
2618: #endif
2619:         break;
2620:       }
2621:     }
2622: #if defined(PETSC_DEBUG)
2623:     if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2624:     found = PETSC_FALSE;
2625: #endif
2626:   }
2627:   nprocslocal    = nprocs[2*rank];
2628:   nprocs[2*rank] = nprocs[2*rank+1] = 0;
2629:   nsends         = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}

2631:   /* inform other processors of number of messages and max length*/
2632:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

2634:   /* post receives:   */
2635:   PetscMalloc4(nrecvs*nmax,PetscInt,&rvalues,nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source,nrecvs,MPI_Request,&recv_waits);
2636:   for (i=0; i<nrecvs; i++) {
2637:     MPI_Irecv((rvalues+nmax*i),nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2638:   }

2640:   /* do sends:
2641:       1) starts[i] gives the starting index in svalues for stuff going to 
2642:          the ith processor
2643:   */
2644:   PetscMalloc3(nx,PetscInt,&svalues,nsends,MPI_Request,&send_waits,size+1,PetscInt,&starts);
2645:   starts[0]  = 0;
2646:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2647:   for (i=0; i<nx; i++) {
2648:     if (owner[i] != rank) {
2649:       svalues[starts[owner[i]]++] = inidx[i];
2650:     }
2651:   }

2653:   starts[0] = 0;
2654:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2655:   count = 0;
2656:   for (i=0; i<size; i++) {
2657:     if (nprocs[2*i+1]) {
2658:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
2659:     }
2660:   }

2662:   /*  wait on receives */
2663:   count  = nrecvs;
2664:   slen   = 0;
2665:   while (count) {
2666:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2667:     /* unpack receives into our local space */
2668:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2669:     source[imdex]  = recv_status.MPI_SOURCE;
2670:     lens[imdex]    = n;
2671:     slen          += n;
2672:     count--;
2673:   }
2674: 
2675:   /* allocate entire send scatter context */
2676:   PetscNew(VecScatter_MPI_General,&to);
2677:   PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst);
2678:   to->n = nrecvs;
2679:   PetscMalloc7(bs*slen,PetscScalar,&to->values,nrecvs,MPI_Request,&to->requests,slen,PetscInt,&to->indices,nrecvs+1,PetscInt,&to->starts,
2680:                        nrecvs,PetscMPIInt,&to->procs,PetscMax(to->n,nsends),MPI_Status,&to->sstatus,PetscMax(to->n,nsends),MPI_Status,
2681:                       &to->rstatus);
2682:   ctx->todata   = (void*)to;
2683:   to->starts[0] = 0;

2685:   if (nrecvs) {
2686:     PetscMalloc(nrecvs*sizeof(PetscInt),&indx);
2687:     for (i=0; i<nrecvs; i++) indx[i] = i;
2688:     PetscSortIntWithPermutation(nrecvs,source,indx);

2690:     /* move the data into the send scatter */
2691:     base = owners[rank];
2692:     for (i=0; i<nrecvs; i++) {
2693:       to->starts[i+1] = to->starts[i] + lens[indx[i]];
2694:       to->procs[i]    = source[indx[i]];
2695:       values = rvalues + indx[i]*nmax;
2696:       for (j=0; j<lens[indx[i]]; j++) {
2697:         to->indices[to->starts[i] + j] = values[j] - base;
2698:       }
2699:     }
2700:     PetscFree(indx);
2701:   }
2702:   PetscFree4(rvalues,lens,source,recv_waits);
2703: 
2704:   /* allocate entire receive scatter context */
2705:   PetscNew(VecScatter_MPI_General,&from);
2706:   PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&from->sendfirst);
2707:   from->n        = nsends;

2709:   PetscMalloc5(ny*bs,PetscScalar,&from->values,nsends,MPI_Request,&from->requests,ny,PetscInt,&from->indices,
2710:                       nsends+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
2711:   ctx->fromdata  = (void*)from;

2713:   /* move data into receive scatter */
2714:   PetscMalloc2(size,PetscInt,&lowner,nsends+1,PetscInt,&start);
2715:   count = 0; from->starts[0] = start[0] = 0;
2716:   for (i=0; i<size; i++) {
2717:     if (nprocs[2*i+1]) {
2718:       lowner[i]            = count;
2719:       from->procs[count++] = i;
2720:       from->starts[count]  = start[count] = start[count-1] + nprocs[2*i];
2721:     }
2722:   }
2723:   for (i=0; i<nx; i++) {
2724:     if (owner[i] != rank) {
2725:       from->indices[start[lowner[owner[i]]]++] = inidy[i];
2726:       if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2727:     }
2728:   }
2729:   PetscFree2(lowner,start);
2730:   PetscFree2(nprocs,owner);
2731: 
2732:   /* wait on sends */
2733:   if (nsends) {
2734:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
2735:     MPI_Waitall(nsends,send_waits,send_status);
2736:     PetscFree(send_status);
2737:   }
2738:   PetscFree3(svalues,send_waits,starts);

2740:   if (nprocslocal) {
2741:     PetscInt nt = from->local.n = to->local.n = nprocslocal;
2742:     /* we have a scatter to ourselves */
2743:     PetscMalloc2(nt,PetscInt,&to->local.vslots,nt,PetscInt,&from->local.vslots);
2744:     nt   = 0;
2745:     for (i=0; i<nx; i++) {
2746:       idx = inidx[i];
2747:       if (idx >= owners[rank] && idx < owners[rank+1]) {
2748:         to->local.vslots[nt]     = idx - owners[rank];
2749:         from->local.vslots[nt++] = inidy[i];
2750:         if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2751:       }
2752:     }
2753:   } else {
2754:     from->local.n     = 0;
2755:     from->local.vslots = 0;
2756:     to->local.n       = 0;
2757:     to->local.vslots   = 0;
2758:   }
2759:   from->local.nonmatching_computed = PETSC_FALSE;
2760:   from->local.n_nonmatching        = 0;
2761:   from->local.slots_nonmatching    = 0;
2762:   to->local.nonmatching_computed   = PETSC_FALSE;
2763:   to->local.n_nonmatching          = 0;
2764:   to->local.slots_nonmatching      = 0;

2766:   to->type   = VEC_SCATTER_MPI_GENERAL;
2767:   from->type = VEC_SCATTER_MPI_GENERAL;

2769:   from->bs = bs;
2770:   to->bs   = bs;
2771:   if (bs > 1) {
2772:     PetscTruth  flg,flgs = PETSC_FALSE;
2773:     PetscInt    *sstarts = to->starts,  *rstarts = from->starts;
2774:     PetscMPIInt *sprocs  = to->procs,   *rprocs  = from->procs;
2775:     MPI_Request *swaits  = to->requests,*rwaits  = from->requests;
2776:     MPI_Request *rev_swaits,*rev_rwaits;
2777:     PetscScalar *Ssvalues = to->values, *Srvalues = from->values;

2779:     tag      = ctx->tag;
2780:     comm     = ctx->comm;

2782:     /* allocate additional wait variables for the "reverse" scatter */
2783:     PetscMalloc2(nrecvs,MPI_Request,&rev_rwaits,nsends,MPI_Request,&rev_swaits);
2784:     to->rev_requests   = rev_rwaits;
2785:     from->rev_requests = rev_swaits;

2787:     /* Register the receives that you will use later (sends for scatter reverse) */
2788:     PetscOptionsHasName(PETSC_NULL,"-vecscatter_ssend",&flgs);
2789:     if (flgs) {
2790:       PetscLogInfo((0,"VecScatterCreate_PtoS:Using VecScatter Ssend mode\n"));
2791:     }
2792:     for (i=0; i<from->n; i++) {
2793:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2794:       if (!flgs) {
2795:         MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
2796:       } else {
2797:         MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
2798:       }
2799:     }

2801:     PetscOptionsHasName(PETSC_NULL,"-vecscatter_rr",&flg);
2802:     if (flg) {
2803:       ctx->postrecvs           = VecScatterPostRecvs_PtoP_X;
2804:       to->use_readyreceiver    = PETSC_TRUE;
2805:       from->use_readyreceiver  = PETSC_TRUE;
2806:       for (i=0; i<to->n; i++) {
2807:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2808:       }
2809:       PetscLogInfo((0,"VecScatterCreate_PtoS:Using VecScatter ready receiver mode\n"));
2810:     } else {
2811:       ctx->postrecvs           = 0;
2812:       to->use_readyreceiver    = PETSC_FALSE;
2813:       from->use_readyreceiver  = PETSC_FALSE;
2814:       for (i=0; i<to->n; i++) {
2815:         if (!flgs) {
2816:           MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2817:         } else {
2818:           MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2819:         }
2820:       }
2821:     }
2822:     /* Register receives for scatter reverse */
2823:     for (i=0; i<to->n; i++) {
2824:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
2825:     }

2827:     PetscLogInfo((0,"VecScatterCreate_PtoS:Using blocksize %D scatter\n",bs));
2828:     ctx->destroy   = VecScatterDestroy_PtoP_X;
2829:     ctx->copy      = VecScatterCopy_PtoP_X;
2830:     switch (bs) {
2831:     case 12:
2832:       ctx->begin     = VecScatterBegin_PtoP_12;
2833:       ctx->end       = VecScatterEnd_PtoP_12;
2834:       break;
2835:     case 8:
2836:       ctx->begin     = VecScatterBegin_PtoP_8;
2837:       ctx->end       = VecScatterEnd_PtoP_8;
2838:       break;
2839:     case 7:
2840:       ctx->begin     = VecScatterBegin_PtoP_7;
2841:       ctx->end       = VecScatterEnd_PtoP_7;
2842:       break;
2843:     case 6:
2844:       ctx->begin     = VecScatterBegin_PtoP_6;
2845:       ctx->end       = VecScatterEnd_PtoP_6;
2846:       break;
2847:     case 5:
2848:       ctx->begin     = VecScatterBegin_PtoP_5;
2849:       ctx->end       = VecScatterEnd_PtoP_5;
2850:       break;
2851:     case 4:
2852:       ctx->begin     = VecScatterBegin_PtoP_4;
2853:       ctx->end       = VecScatterEnd_PtoP_4;
2854:       break;
2855:     case 3:
2856:       ctx->begin     = VecScatterBegin_PtoP_3;
2857:       ctx->end       = VecScatterEnd_PtoP_3;
2858:       break;
2859:     case 2:
2860:       ctx->begin     = VecScatterBegin_PtoP_2;
2861:       ctx->end       = VecScatterEnd_PtoP_2;
2862:       break;
2863:     default:
2864:       SETERRQ(PETSC_ERR_SUP,"Blocksize not supported");
2865:     }
2866:   } else {
2867:     PetscLogInfo((0,"VecScatterCreate_PtoS:Using nonblocked scatter\n"));
2868:     ctx->postrecvs = 0;
2869:     ctx->destroy   = VecScatterDestroy_PtoP;
2870:     ctx->begin     = VecScatterBegin_PtoP;
2871:     ctx->end       = VecScatterEnd_PtoP;
2872:     ctx->copy      = VecScatterCopy_PtoP;
2873:   }
2874:   ctx->view      = VecScatterView_MPI;

2876:   /* Check if the local scatter is actually a copy; important special case */
2877:   if (nprocslocal) {
2878:     VecScatterLocalOptimizeCopy_Private(&to->local,&from->local,bs);
2879:   }
2880:   return(0);
2881: }

2883: /* ------------------------------------------------------------------------------------*/
2884: /*
2885:          Scatter from local Seq vectors to a parallel vector. 
2886: */
2889: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,PetscInt *inidx,PetscInt ny,PetscInt *inidy,Vec yin,PetscInt bs,VecScatter ctx)
2890: {
2891:   VecScatter_MPI_General *from,*to;
2892:   PetscInt               *source,nprocslocal,*lens,*owners = yin->map->range;
2893:   PetscMPIInt            rank = yin->stash.rank,size = yin->stash.size,tag,imdex,n;
2894:   PetscErrorCode         ierr;
2895:   PetscInt               *lowner,*start;
2896:   PetscInt               *nprocs,i,j,idx,nsends,nrecvs;
2897:   PetscInt               *owner,*starts,count,slen;
2898:   PetscInt               *rvalues,*svalues,base,nmax,*values,lastidx;
2899:   MPI_Comm               comm = yin->comm;
2900:   MPI_Request            *send_waits,*recv_waits;
2901:   MPI_Status             recv_status,*send_status;
2902: #if defined(PETSC_DEBUG)
2903:   PetscTruth             found = PETSC_FALSE;
2904: #endif

2907:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2908:   PetscMalloc5(2*size,PetscInt,&nprocs,nx,PetscInt,&owner,size,PetscInt,&lowner,size,PetscInt,&start,size+1,PetscInt,&starts);

2910:   /*  count number of contributors to each processor */
2911:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
2912:   j       = 0;
2913:   lastidx = -1;
2914:   for (i=0; i<nx; i++) {
2915:     /* if indices are NOT locally sorted, need to start search at the beginning */
2916:     if (lastidx > (idx = inidy[i])) j = 0;
2917:     lastidx = idx;
2918:     for (; j<size; j++) {
2919:       if (idx >= owners[j] && idx < owners[j+1]) {
2920:         nprocs[2*j]++;
2921:         nprocs[2*j+1] = 1;
2922:         owner[i] = j;
2923: #if defined(PETSC_DEBUG)
2924:         found = PETSC_TRUE;
2925: #endif
2926:         break;
2927:       }
2928:     }
2929: #if defined(PETSC_DEBUG)
2930:     if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2931:     found = PETSC_FALSE;
2932: #endif
2933:   }
2934:   nprocslocal    = nprocs[2*rank];
2935:   nprocs[2*rank] = nprocs[2*rank+1] = 0;
2936:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}

2938:   /* inform other processors of number of messages and max length*/
2939:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

2941:   /* post receives:   */
2942:   PetscMalloc6(nrecvs*nmax,PetscInt,&rvalues,nrecvs,MPI_Request,&recv_waits,nx,PetscInt,&svalues,nsends,MPI_Request,&send_waits,nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);

2944:   for (i=0; i<nrecvs; i++) {
2945:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2946:   }

2948:   /* do sends:
2949:       1) starts[i] gives the starting index in svalues for stuff going to 
2950:          the ith processor
2951:   */

2953:   starts[0]  = 0;
2954:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2955:   for (i=0; i<nx; i++) {
2956:     if (owner[i] != rank) {
2957:       svalues[starts[owner[i]]++] = inidy[i];
2958:     }
2959:   }

2961:   /* reset starts because it is destroyed above */
2962:   starts[0]  = 0;
2963:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2964:   count = 0;
2965:   for (i=0; i<size; i++) {
2966:     if (nprocs[2*i+1]) {
2967:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count);
2968:       count++;
2969:     }
2970:   }

2972:   /* allocate entire send scatter context */
2973:   PetscNew(VecScatter_MPI_General,&to);
2974:   ctx->todata   = (void*)to;
2975:   to->sendfirst = PETSC_FALSE;
2976:   PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst);
2977:   to->n         = nsends;
2978: 
2979:   PetscMalloc7(ny,PetscScalar,&to->values,nsends,MPI_Request,&to->requests,ny,PetscInt,&to->indices,nsends+1,PetscInt,&to->starts,
2980:                       nsends,PetscMPIInt,&to->procs,PetscMax(to->n,nrecvs),MPI_Status,&to->sstatus,PetscMax(to->n,nrecvs),MPI_Status,
2981:                       &to->rstatus);

2983:   /* move data into send scatter context */
2984:   count         = 0;
2985:   to->starts[0] = start[0] = 0;
2986:   for (i=0; i<size; i++) {
2987:     if (nprocs[2*i+1]) {
2988:       lowner[i]          = count;
2989:       to->procs[count++] = i;
2990:       to->starts[count]  = start[count] = start[count-1] + nprocs[2*i];
2991:     }
2992:   }
2993:   for (i=0; i<nx; i++) {
2994:     if (owner[i] != rank) {
2995:       to->indices[start[lowner[owner[i]]]++] = inidx[i];
2996:     }
2997:   }
2998:   PetscFree5(nprocs,owner,lowner,start,starts);

3000:   /*  wait on receives */
3001:   count = nrecvs;
3002:   slen  = 0;
3003:   while (count) {
3004:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3005:     /* unpack receives into our local space */
3006:     MPI_Get_count(&recv_status,MPIU_INT,&n);
3007:     source[imdex]  = recv_status.MPI_SOURCE;
3008:     lens[imdex]    = n;
3009:     slen          += n;
3010:     count--;
3011:   }
3012: 
3013:   /* allocate entire receive scatter context */
3014:   PetscNew(VecScatter_MPI_General,&from);
3015:   PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&from->sendfirst);
3016:   from->n        = nrecvs;

3018:   PetscMalloc5(slen,PetscScalar,&from->values,nrecvs,MPI_Request,&from->requests,slen,PetscInt,&from->indices,
3019:                       nrecvs+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
3020:   ctx->fromdata  = (void*)from;

3022:   /* move the data into the receive scatter context*/
3023:   base            = owners[rank];
3024:   from->starts[0] = 0;
3025:   for (i=0; i<nrecvs; i++) {
3026:     from->starts[i+1] = from->starts[i] + lens[i];
3027:     from->procs[i]    = source[i];
3028:     values            = rvalues + i*nmax;
3029:     for (j=0; j<lens[i]; j++) {
3030:       from->indices[from->starts[i] + j] = values[j] - base;
3031:     }
3032:   }
3033: 
3034:   /* wait on sends */
3035:   if (nsends) {
3036:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
3037:     MPI_Waitall(nsends,send_waits,send_status);
3038:     PetscFree(send_status);
3039:   }
3040:   PetscFree6(rvalues,recv_waits,svalues,send_waits,lens,source);

3042:   if (nprocslocal) {
3043:     /* we have a scatter to ourselves */
3044:     PetscInt nt = from->local.n = to->local.n = nprocslocal;
3045:     PetscMalloc2(nt,PetscInt,&to->local.vslots,nt,PetscInt,&from->local.vslots);
3046:     nt   = 0;
3047:     for (i=0; i<ny; i++) {
3048:       idx = inidy[i];
3049:       if (idx >= owners[rank] && idx < owners[rank+1]) {
3050:         from->local.vslots[nt] = idx - owners[rank];
3051:         to->local.vslots[nt++] = inidx[i];
3052:       }
3053:     }
3054:   } else {
3055:     from->local.n     = 0;
3056:     from->local.vslots = 0;
3057:     to->local.n       = 0;
3058:     to->local.vslots   = 0;

3060:   }
3061:   from->local.nonmatching_computed = PETSC_FALSE;
3062:   from->local.n_nonmatching        = 0;
3063:   from->local.slots_nonmatching    = 0;
3064:   to->local.nonmatching_computed   = PETSC_FALSE;
3065:   to->local.n_nonmatching          = 0;
3066:   to->local.slots_nonmatching      = 0;

3068:   to->type   = VEC_SCATTER_MPI_GENERAL;
3069:   from->type = VEC_SCATTER_MPI_GENERAL;

3071:   if (bs > 1) {
3072:     PetscLogInfo((0,"VecScatterCreate_StoP:Using blocksize %D scatter\n",bs));
3073:     ctx->copy        = VecScatterCopy_PtoP_X;
3074:     switch (bs) {
3075:     case 12:
3076:       ctx->begin     = VecScatterBegin_PtoP_12;
3077:       ctx->end       = VecScatterEnd_PtoP_12;
3078:       break;
3079:     case 8:
3080:       ctx->begin     = VecScatterBegin_PtoP_8;
3081:       ctx->end       = VecScatterEnd_PtoP_8;
3082:       break;
3083:     case 7:
3084:       ctx->begin     = VecScatterBegin_PtoP_7;
3085:       ctx->end       = VecScatterEnd_PtoP_7;
3086:       break;
3087:     case 6:
3088:       ctx->begin     = VecScatterBegin_PtoP_6;
3089:       ctx->end       = VecScatterEnd_PtoP_6;
3090:       break;
3091:     case 5:
3092:       ctx->begin     = VecScatterBegin_PtoP_5;
3093:       ctx->end       = VecScatterEnd_PtoP_5;
3094:       break;
3095:     case 4:
3096:       ctx->begin     = VecScatterBegin_PtoP_4;
3097:       ctx->end       = VecScatterEnd_PtoP_4;
3098:       break;
3099:     case 3:
3100:       ctx->begin     = VecScatterBegin_PtoP_3;
3101:       ctx->end       = VecScatterEnd_PtoP_3;
3102:       break;
3103:     case 2:
3104:       ctx->begin     = VecScatterBegin_PtoP_2;
3105:       ctx->end       = VecScatterEnd_PtoP_2;
3106:       break;
3107:     default:
3108:       SETERRQ(PETSC_ERR_SUP,"Blocksize not supported");
3109:     }
3110:   } else {
3111:     PetscLogInfo((0,"VecScatterCreate_StoP:Using nonblocked scatter\n"));
3112:     ctx->begin     = VecScatterBegin_PtoP;
3113:     ctx->end       = VecScatterEnd_PtoP;
3114:     ctx->copy      = VecScatterCopy_PtoP;
3115:   }
3116:   ctx->destroy   = VecScatterDestroy_PtoP;
3117:   ctx->postrecvs = 0;
3118:   ctx->view      = VecScatterView_MPI;

3120:   to->bs   = bs;
3121:   from->bs = bs;

3123:   /* Check if the local scatter is actually a copy; important special case */
3124:   if (nprocslocal) {
3125:     VecScatterLocalOptimizeCopy_Private(&to->local,&from->local,bs);
3126:   }
3127:   return(0);
3128: }

3130: /* ---------------------------------------------------------------------------------*/
3133: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,PetscInt *inidx,PetscInt ny,PetscInt *inidy,Vec xin,Vec yin,VecScatter ctx)
3134: {
3136:   PetscMPIInt    size,rank,tag,imdex,n;
3137:   PetscInt       *lens,*owners = xin->map->range;
3138:   PetscInt       *nprocs,i,j,idx,nsends,nrecvs,*local_inidx,*local_inidy;
3139:   PetscInt       *owner,*starts,count,slen;
3140:   PetscInt       *rvalues,*svalues,base,nmax,*values,lastidx;
3141:   MPI_Comm       comm;
3142:   MPI_Request    *send_waits,*recv_waits;
3143:   MPI_Status     recv_status,*send_status;
3144:   PetscTruth     duplicate = PETSC_FALSE;
3145: #if defined(PETSC_DEBUG)
3146:   PetscTruth     found = PETSC_FALSE;
3147: #endif

3150:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
3151:   PetscObjectGetComm((PetscObject)xin,&comm);
3152:   MPI_Comm_size(comm,&size);
3153:   MPI_Comm_rank(comm,&rank);
3154:   if (size == 1) {
3155:     VecScatterCreate_StoP(nx,inidx,ny,inidy,yin,1,ctx);
3156:     return(0);
3157:   }

3159:   /*
3160:      Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
3161:      They then call the StoPScatterCreate()
3162:   */
3163:   /*  first count number of contributors to each processor */
3164:   PetscMalloc3(2*size,PetscInt,&nprocs,nx,PetscInt,&owner,(size+1),PetscInt,&starts);
3165:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
3166:   lastidx = -1;
3167:   j       = 0;
3168:   for (i=0; i<nx; i++) {
3169:     /* if indices are NOT locally sorted, need to start search at the beginning */
3170:     if (lastidx > (idx = inidx[i])) j = 0;
3171:     lastidx = idx;
3172:     for (; j<size; j++) {
3173:       if (idx >= owners[j] && idx < owners[j+1]) {
3174:         nprocs[2*j]++;
3175:         nprocs[2*j+1] = 1;
3176:         owner[i] = j;
3177: #if defined(PETSC_DEBUG)
3178:         found = PETSC_TRUE;
3179: #endif
3180:         break;
3181:       }
3182:     }
3183: #if defined(PETSC_DEBUG)
3184:     if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
3185:     found = PETSC_FALSE;
3186: #endif
3187:   }
3188:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}

3190:   /* inform other processors of number of messages and max length*/
3191:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

3193:   /* post receives:   */
3194:   PetscMalloc6(2*nrecvs*nmax,PetscInt,&rvalues,2*nx,PetscInt,&svalues,2*nrecvs,PetscInt,&lens,nrecvs,MPI_Request,&recv_waits,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);

3196:   for (i=0; i<nrecvs; i++) {
3197:     MPI_Irecv(rvalues+2*nmax*i,2*nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3198:   }

3200:   /* do sends:
3201:       1) starts[i] gives the starting index in svalues for stuff going to 
3202:          the ith processor
3203:   */
3204:   starts[0]= 0;
3205:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
3206:   for (i=0; i<nx; i++) {
3207:     svalues[2*starts[owner[i]]]       = inidx[i];
3208:     svalues[1 + 2*starts[owner[i]]++] = inidy[i];
3209:   }

3211:   starts[0] = 0;
3212:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
3213:   count = 0;
3214:   for (i=0; i<size; i++) {
3215:     if (nprocs[2*i+1]) {
3216:       MPI_Isend(svalues+2*starts[i],2*nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count);
3217:       count++;
3218:     }
3219:   }
3220:   PetscFree3(nprocs,owner,starts);

3222:   /*  wait on receives */
3223:   count = nrecvs;
3224:   slen  = 0;
3225:   while (count) {
3226:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3227:     /* unpack receives into our local space */
3228:     MPI_Get_count(&recv_status,MPIU_INT,&n);
3229:     lens[imdex]  =  n/2;
3230:     slen         += n/2;
3231:     count--;
3232:   }
3233: 
3234:   PetscMalloc2(slen,PetscInt,&local_inidx,slen,PetscInt,&local_inidy);
3235:   base  = owners[rank];
3236:   count = 0;
3237:   for (i=0; i<nrecvs; i++) {
3238:     values = rvalues + 2*i*nmax;
3239:     for (j=0; j<lens[i]; j++) {
3240:       local_inidx[count]   = values[2*j] - base;
3241:       local_inidy[count++] = values[2*j+1];
3242:     }
3243:   }

3245:   /* wait on sends */
3246:   if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
3247:   PetscFree6(rvalues,svalues,lens,recv_waits,send_waits,send_status);

3249:   /*
3250:      should sort and remove duplicates from local_inidx,local_inidy 
3251:   */

3253: #if defined(do_it_slow)
3254:   /* sort on the from index */
3255:   PetscSortIntWithArray(slen,local_inidx,local_inidy);
3256:   start = 0;
3257:   while (start < slen) {
3258:     count = start+1;
3259:     last  = local_inidx[start];
3260:     while (count < slen && last == local_inidx[count]) count++;
3261:     if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
3262:       /* sort on to index */
3263:       PetscSortInt(count-start,local_inidy+start);
3264:     }
3265:     /* remove duplicates; not most efficient way, but probably good enough */
3266:     i = start;
3267:     while (i < count-1) {
3268:       if (local_inidy[i] != local_inidy[i+1]) {
3269:         i++;
3270:       } else { /* found a duplicate */
3271:         duplicate = PETSC_TRUE;
3272:         for (j=i; j<slen-1; j++) {
3273:           local_inidx[j] = local_inidx[j+1];
3274:           local_inidy[j] = local_inidy[j+1];
3275:         }
3276:         slen--;
3277:         count--;
3278:       }
3279:     }
3280:     start = count;
3281:   }
3282: #endif
3283:   if (duplicate) {
3284:     PetscLogInfo((0,"VecScatterCreate_PtoP:Duplicate to from indices passed in VecScatterCreate(), they are ignored\n"));
3285:   }
3286:   VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,yin,1,ctx);
3287:   PetscFree2(local_inidx,local_inidy);
3288:   return(0);
3289: }