Actual source code: vpscat.c
1: #define PETSCVEC_DLL
3: /*
4: Defines parallel vector scatters.
5: */
7: #include src/vec/is/isimpl.h
8: #include vecimpl.h
9: #include src/vec/impls/dvecimpl.h
10: #include src/vec/impls/mpi/pvecimpl.h
11: #include petscsys.h
15: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
16: {
17: VecScatter_MPI_General *to=(VecScatter_MPI_General*)ctx->todata;
18: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
19: PetscErrorCode ierr;
20: PetscInt i;
21: PetscMPIInt rank;
22: PetscViewerFormat format;
23: PetscTruth iascii;
26: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
27: if (iascii) {
28: MPI_Comm_rank(ctx->comm,&rank);
29: PetscViewerGetFormat(viewer,&format);
30: if (format == PETSC_VIEWER_ASCII_INFO) {
31: PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
33: MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,ctx->comm);
34: MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,ctx->comm);
35: itmp = to->starts[to->n+1];
36: MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,ctx->comm);
37: itmp = from->starts[from->n+1];
38: MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,ctx->comm);
39: MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,ctx->comm);
41: PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
42: PetscViewerASCIIPrintf(viewer," Maximum number sends %D\n",nsend_max);
43: PetscViewerASCIIPrintf(viewer," Maximum number receives %D\n",nrecv_max);
44: PetscViewerASCIIPrintf(viewer," Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
45: PetscViewerASCIIPrintf(viewer," Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
46: PetscViewerASCIIPrintf(viewer," Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
48: } else {
49: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
50: if (to->n) {
51: for (i=0; i<to->n; i++){
52: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length = %D to whom %D\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
53: }
54: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
55: for (i=0; i<to->starts[to->n]; i++){
56: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,to->indices[i]);
57: }
58: }
60: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
61: if (from->n) {
62: for (i=0; i<from->n; i++){
63: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %D\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
64: }
66: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
67: for (i=0; i<from->starts[from->n]; i++){
68: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,from->indices[i]);
69: }
70: }
71: if (to->local.n) {
72: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Indices for local part of scatter\n",rank);
73: for (i=0; i<to->local.n; i++){
74: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]From %D to %D \n",rank,from->local.vslots[i],to->local.vslots[i]);
75: }
76: }
78: PetscViewerFlush(viewer);
79: }
80: } else {
81: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for this scatter",((PetscObject)viewer)->type_name);
82: }
83: return(0);
84: }
86: /* -----------------------------------------------------------------------------------*/
87: /*
88: The next routine determines what part of the local part of the scatter is an
89: exact copy of values into their current location. We check this here and
90: then know that we need not perform that portion of the scatter.
91: */
94: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter_Seq_General *gen_to,VecScatter_Seq_General *gen_from)
95: {
96: PetscInt n = gen_to->n,n_nonmatching = 0,i,*to_slots = gen_to->vslots,*from_slots = gen_from->vslots;
98: PetscInt *nto_slots,*nfrom_slots,j = 0;
99:
101: for (i=0; i<n; i++) {
102: if (to_slots[i] != from_slots[i]) n_nonmatching++;
103: }
105: if (!n_nonmatching) {
106: gen_to->nonmatching_computed = PETSC_TRUE;
107: gen_to->n_nonmatching = gen_from->n_nonmatching = 0;
108: PetscLogInfo((0,"VecScatterLocalOptimize_Private:Reduced %D to 0\n", n));
109: } else if (n_nonmatching == n) {
110: gen_to->nonmatching_computed = PETSC_FALSE;
111: PetscLogInfo((0,"VecScatterLocalOptimize_Private:All values non-matching\n"));
112: } else {
113: gen_to->nonmatching_computed= PETSC_TRUE;
114: gen_to->n_nonmatching = gen_from->n_nonmatching = n_nonmatching;
115: PetscMalloc2(n_nonmatching,PetscInt,&nto_slots,n_nonmatching,PetscInt,&nfrom_slots);
116: gen_to->slots_nonmatching = nto_slots;
117: gen_from->slots_nonmatching = nfrom_slots;
118: for (i=0; i<n; i++) {
119: if (to_slots[i] != from_slots[i]) {
120: nto_slots[j] = to_slots[i];
121: nfrom_slots[j] = from_slots[i];
122: j++;
123: }
124: }
125: PetscLogInfo((0,"VecScatterLocalOptimize_Private:Reduced %D to %D\n",n,n_nonmatching));
126: }
127: return(0);
128: }
130: /* --------------------------------------------------------------------------------------*/
133: PetscErrorCode VecScatterCopy_PtoP(VecScatter in,VecScatter out)
134: {
135: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
136: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
137: PetscErrorCode ierr;
138: PetscInt ny;
141: out->postrecvs = in->postrecvs;
142: out->begin = in->begin;
143: out->end = in->end;
144: out->copy = in->copy;
145: out->destroy = in->destroy;
146: out->view = in->view;
148: /* allocate entire send scatter context */
149: PetscNew(VecScatter_MPI_General,&out_to);
150: PetscNew(VecScatter_MPI_General,&out_from);
152: ny = in_to->starts[in_to->n];
153: out_to->n = in_to->n;
154: out_to->type = in_to->type;
155: out_to->sendfirst = in_to->sendfirst;
156: PetscMalloc7(ny,PetscScalar,&out_to->values,out_to->n,MPI_Request,&out_to->requests,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,
157: out_to->n,PetscMPIInt,&out_to->procs,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,
158: &out_to->rstatus);
159: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
160: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
161: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
163: out->todata = (void*)out_to;
164: out_to->local.n = in_to->local.n;
165: out_to->local.nonmatching_computed = PETSC_FALSE;
166: out_to->local.n_nonmatching = 0;
167: out_to->local.slots_nonmatching = 0;
168: if (in_to->local.n) {
169: PetscMalloc2(in_to->local.n,PetscInt,&out_to->local.vslots,in_from->local.n,PetscInt,&out_from->local.vslots);
170: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
171: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
172: } else {
173: out_to->local.vslots = 0;
174: out_from->local.vslots = 0;
175: }
177: /* allocate entire receive context */
178: out_from->type = in_from->type;
179: ny = in_from->starts[in_from->n];
180: out_from->n = in_from->n;
181: out_from->sendfirst = in_from->sendfirst;
183: PetscMalloc5(ny,PetscScalar,&out_from->values,out_from->n,MPI_Request,&out_from->requests,ny,PetscInt,&out_from->indices,
184: out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
186: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
187: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
188: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
189: out->fromdata = (void*)out_from;
190: out_from->local.n = in_from->local.n;
191: out_from->local.nonmatching_computed = PETSC_FALSE;
192: out_from->local.n_nonmatching = 0;
193: out_from->local.slots_nonmatching = 0;
194: return(0);
195: }
197: /* -------------------------------------------------------------------------------------*/
200: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
201: {
202: VecScatter_MPI_General *gen_to = (VecScatter_MPI_General*)ctx->todata;
203: VecScatter_MPI_General *gen_from = (VecScatter_MPI_General*)ctx->fromdata;
204: PetscErrorCode ierr;
207: CHKMEMQ;
208: if (gen_to->local.vslots) {PetscFree2(gen_to->local.vslots,gen_from->local.vslots);}
209: if (gen_to->local.slots_nonmatching) {PetscFree2(gen_to->local.slots_nonmatching,gen_from->local.slots_nonmatching);}
210: PetscFree7(gen_to->values,gen_to->requests,gen_to->indices,gen_to->starts,gen_to->procs,gen_to->sstatus,gen_to->rstatus);
211: PetscFree5(gen_from->values,gen_from->requests,gen_from->indices,gen_from->starts,gen_from->procs);
212: PetscFree(gen_from);
213: PetscFree(gen_to);
214: PetscHeaderDestroy(ctx);
215: return(0);
216: }
218: /* --------------------------------------------------------------------------------------*/
219: /*
220: Even though the next routines are written with parallel
221: vectors, either xin or yin (but not both) may be Seq
222: vectors, one for each processor.
223:
224: gen_from indices indicate where arriving stuff is stashed
225: gen_to indices indicate where departing stuff came from.
226: the naming can be VERY confusing.
228: */
231: PetscErrorCode VecScatterBegin_PtoP(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
232: {
233: VecScatter_MPI_General *gen_to,*gen_from;
234: MPI_Comm comm = ctx->comm;
235: PetscScalar *xv,*yv,*val,*rvalues,*svalues;
236: MPI_Request *rwaits,*swaits;
237: PetscInt i,j,*indices,*rstarts,*sstarts;
238: PetscMPIInt tag = ctx->tag,*rprocs,*sprocs;
239: PetscErrorCode ierr;
240: PetscInt nrecvs,nsends,iend;
243: CHKMEMQ;
244: VecGetArray(xin,&xv);
245: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
246: if (mode & SCATTER_REVERSE){
247: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
248: gen_from = (VecScatter_MPI_General*)ctx->todata;
249: } else {
250: gen_to = (VecScatter_MPI_General*)ctx->todata;
251: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
252: }
253: rvalues = gen_from->values;
254: svalues = gen_to->values;
255: nrecvs = gen_from->n;
256: nsends = gen_to->n;
257: rwaits = gen_from->requests;
258: swaits = gen_to->requests;
259: indices = gen_to->indices;
260: rstarts = gen_from->starts;
261: sstarts = gen_to->starts;
262: rprocs = gen_from->procs;
263: sprocs = gen_to->procs;
265: if (!(mode & SCATTER_LOCAL)) {
267: if (gen_to->sendfirst) {
268: /* do sends: */
269: for (i=0; i<nsends; i++) {
270: val = svalues + sstarts[i];
271: iend = sstarts[i+1]-sstarts[i];
272: /* pack the message */
273: for (j=0; j<iend; j++) {
274: val[j] = xv[*indices++];
275: }
276: MPI_Isend(val,iend,MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
277: }
278: }
279:
280: /* post receives: */
281: for (i=0; i<nrecvs; i++) {
282: MPI_Irecv(rvalues+rstarts[i],rstarts[i+1]-rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
283: }
285: if (!gen_to->sendfirst) {
286: /* do sends: */
287: for (i=0; i<nsends; i++) {
288: val = svalues + sstarts[i];
289: iend = sstarts[i+1]-sstarts[i];
290: /* pack the message */
291: for (j=0; j<iend; j++) {
292: val[j] = xv[*indices++];
293: }
294: MPI_Isend(val,iend,MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
295: }
296: }
297: }
299: /* take care of local scatters */
300: if (gen_to->local.n && addv == INSERT_VALUES) {
301: if (yv == xv && !gen_to->local.nonmatching_computed) {
302: VecScatterLocalOptimize_Private(&gen_to->local,&gen_from->local);
303: }
304: if (gen_to->local.is_copy) {
305: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
306: } else if (yv != xv || !gen_to->local.nonmatching_computed) {
307: PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
308: PetscInt n = gen_to->local.n;
309: for (i=0; i<n; i++) {yv[fslots[i]] = xv[tslots[i]];}
310: } else {
311: /*
312: In this case, it is copying the values into their old locations, thus we can skip those
313: */
314: PetscInt *tslots = gen_to->local.slots_nonmatching,*fslots = gen_from->local.slots_nonmatching;
315: PetscInt n = gen_to->local.n_nonmatching;
316: for (i=0; i<n; i++) {yv[fslots[i]] = xv[tslots[i]];}
317: }
318: } else if (gen_to->local.n) {
319: PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
320: PetscInt n = gen_to->local.n;
321: if (addv == ADD_VALUES) {
322: for (i=0; i<n; i++) {yv[fslots[i]] += xv[tslots[i]];}
323: #if !defined(PETSC_USE_COMPLEX)
324: } else if (addv == MAX_VALUES) {
325: for (i=0; i<n; i++) {yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[tslots[i]]);}
326: #endif
327: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
328: }
330: VecRestoreArray(xin,&xv);
331: if (xin != yin) {VecRestoreArray(yin,&yv);}
332: CHKMEMQ;
333: return(0);
334: }
336: /* --------------------------------------------------------------------------------------*/
339: PetscErrorCode VecScatterEnd_PtoP(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
340: {
341: VecScatter_MPI_General *gen_to,*gen_from;
342: PetscScalar *rvalues,*yv,*val;
343: PetscErrorCode ierr;
344: PetscInt nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices;
345: PetscMPIInt imdex;
346: MPI_Request *rwaits,*swaits;
347: MPI_Status rstatus,*sstatus;
350: CHKMEMQ;
351: if (mode & SCATTER_LOCAL) return(0);
352: VecGetArray(yin,&yv);
354: if (mode & SCATTER_REVERSE){
355: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
356: gen_from = (VecScatter_MPI_General*)ctx->todata;
357: sstatus = gen_from->sstatus;
358: } else {
359: gen_to = (VecScatter_MPI_General*)ctx->todata;
360: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
361: sstatus = gen_to->sstatus;
362: }
363: rvalues = gen_from->values;
364: nrecvs = gen_from->n;
365: nsends = gen_to->n;
366: rwaits = gen_from->requests;
367: swaits = gen_to->requests;
368: indices = gen_from->indices;
369: rstarts = gen_from->starts;
371: /* wait on receives */
372: count = nrecvs;
373: while (count) {
374: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
375: /* unpack receives into our local space */
376: val = rvalues + rstarts[imdex];
377: n = rstarts[imdex+1]-rstarts[imdex];
378: lindices = indices + rstarts[imdex];
379: if (addv == INSERT_VALUES) {
380: for (i=0; i<n; i++) {
381: yv[lindices[i]] = *val++;
382: }
383: } else if (addv == ADD_VALUES) {
384: for (i=0; i<n; i++) {
385: yv[lindices[i]] += *val++;
386: }
387: #if !defined(PETSC_USE_COMPLEX)
388: } else if (addv == MAX_VALUES) {
389: for (i=0; i<n; i++) {
390: yv[lindices[i]] = PetscMax(yv[lindices[i]],*val); val++;
391: }
392: #endif
393: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
394: count--;
395: }
397: /* wait on sends */
398: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
399: VecRestoreArray(yin,&yv);
400: CHKMEMQ;
401: return(0);
402: }
403: /* ==========================================================================================*/
404: /*
405: Special scatters for fixed block sizes. These provide better performance
406: because the local copying and packing and unpacking are done with loop
407: unrolling to the size of the block.
409: Also uses MPI persistent sends and receives, these (at least in theory)
410: allow MPI to optimize repeated sends and receives of the same type.
411: */
413: /*
414: This is for use with the "ready-receiver" mode. In theory on some
415: machines it could lead to better performance. In practice we've never
416: seen it give better performance. Accessed with the -vecscatter_rr flag.
417: */
420: PetscErrorCode VecScatterPostRecvs_PtoP_X(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
421: {
422: PetscErrorCode ierr;
423: VecScatter_MPI_General *gen_from = (VecScatter_MPI_General*)ctx->fromdata;
426: if (gen_from->n) {MPI_Startall_irecv(gen_from->starts[gen_from->n]*gen_from->bs,gen_from->n,gen_from->requests);}
427: return(0);
428: }
430: /* --------------------------------------------------------------------------------------*/
431: /*
432: Special optimization to see if the local part of the scatter is actually
433: a copy. The scatter routines call PetscMemcpy() instead.
434:
435: */
438: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter_Seq_General *gen_to,VecScatter_Seq_General *gen_from,PetscInt bs)
439: {
440: PetscInt n = gen_to->n,i,*to_slots = gen_to->vslots,*from_slots = gen_from->vslots;
441: PetscInt to_start,from_start;
445: to_start = to_slots[0];
446: from_start = from_slots[0];
448: for (i=1; i<n; i++) {
449: to_start += bs;
450: from_start += bs;
451: if (to_slots[i] != to_start) return(0);
452: if (from_slots[i] != from_start) return(0);
453: }
454: gen_to->is_copy = PETSC_TRUE;
455: gen_to->copy_start = to_slots[0];
456: gen_to->copy_length = bs*sizeof(PetscScalar)*n;
457: gen_from->is_copy = PETSC_TRUE;
458: gen_from->copy_start = from_slots[0];
459: gen_from->copy_length = bs*sizeof(PetscScalar)*n;
461: PetscLogInfo((0,"VecScatterLocalOptimizeCopy_Private:Local scatter is a copy, optimizing for it\n"));
463: return(0);
464: }
466: /* --------------------------------------------------------------------------------------*/
470: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
471: {
472: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
473: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
474: PetscErrorCode ierr;
475: PetscInt ny,bs = in_from->bs;
478: out->postrecvs = in->postrecvs;
479: out->begin = in->begin;
480: out->end = in->end;
481: out->copy = in->copy;
482: out->destroy = in->destroy;
483: out->view = in->view;
485: /* allocate entire send scatter context */
486: PetscNew(VecScatter_MPI_General,&out_to);
487: PetscNew(VecScatter_MPI_General,&out_from);
489: ny = in_to->starts[in_to->n];
490: out_to->n = in_to->n;
491: out_to->type = in_to->type;
492: out_to->sendfirst = in_to->sendfirst;
494: PetscMalloc7(bs*ny,PetscScalar,&out_to->values,out_to->n,MPI_Request,&out_to->requests,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,
495: out_to->n,PetscMPIInt,&out_to->procs,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,
496: &out_to->rstatus);
497: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
498: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
499: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
500:
501: out->todata = (void*)out_to;
502: out_to->local.n = in_to->local.n;
503: out_to->local.nonmatching_computed = PETSC_FALSE;
504: out_to->local.n_nonmatching = 0;
505: out_to->local.slots_nonmatching = 0;
506: if (in_to->local.n) {
507: PetscMalloc2(in_to->local.n,PetscInt,&out_to->local.vslots,in_from->local.n,PetscInt,&out_from->local.vslots);
508: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
509: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
510: } else {
511: out_to->local.vslots = 0;
512: out_from->local.vslots = 0;
513: }
515: /* allocate entire receive context */
516: out_from->type = in_from->type;
517: ny = in_from->starts[in_from->n];
518: out_from->n = in_from->n;
519: out_from->sendfirst = in_from->sendfirst;
521: PetscMalloc5(ny*bs,PetscScalar,&out_from->values,out_from->n,MPI_Request,&out_from->requests,ny,PetscInt,&out_from->indices,
522: out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
523: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
524: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
525: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
526: out->fromdata = (void*)out_from;
527: out_from->local.n = in_from->local.n;
528: out_from->local.nonmatching_computed = PETSC_FALSE;
529: out_from->local.n_nonmatching = 0;
530: out_from->local.slots_nonmatching = 0;
532: /*
533: set up the request arrays for use with isend_init() and irecv_init()
534: */
535: {
536: PetscMPIInt tag;
537: MPI_Comm comm;
538: PetscInt *sstarts = out_to->starts, *rstarts = out_from->starts;
539: PetscMPIInt *sprocs = out_to->procs, *rprocs = out_from->procs;
540: PetscInt i;
541: PetscTruth flg;
542: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
543: MPI_Request *rev_swaits,*rev_rwaits;
544: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
546: PetscMalloc2(in_to->n,MPI_Request,&out_to->rev_requests,in_from->n,MPI_Request,&out_from->rev_requests);
548: rev_rwaits = out_to->rev_requests;
549: rev_swaits = out_from->rev_requests;
551: out_from->bs = out_to->bs = bs;
552: tag = out->tag;
553: comm = out->comm;
555: /* Register the receives that you will use later (sends for scatter reverse) */
556: for (i=0; i<out_from->n; i++) {
557: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
558: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
559: }
561: PetscOptionsHasName(PETSC_NULL,"-vecscatter_rr",&flg);
562: if (flg) {
563: out->postrecvs = VecScatterPostRecvs_PtoP_X;
564: out_to->use_readyreceiver = PETSC_TRUE;
565: out_from->use_readyreceiver = PETSC_TRUE;
566: for (i=0; i<out_to->n; i++) {
567: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
568: }
569: PetscLogInfo((0,"VecScatterCopy_PtoP_X:Using VecScatter ready receiver mode\n"));
570: } else {
571: out->postrecvs = 0;
572: out_to->use_readyreceiver = PETSC_FALSE;
573: out_from->use_readyreceiver = PETSC_FALSE;
574: flg = PETSC_FALSE;
575: PetscOptionsHasName(PETSC_NULL,"-vecscatter_ssend",&flg);
576: if (flg) {
577: PetscLogInfo((0,"VecScatterCopy_PtoP_X:Using VecScatter Ssend mode\n"));
578: }
579: for (i=0; i<out_to->n; i++) {
580: if (!flg) {
581: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
582: } else {
583: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
584: }
585: }
586: }
587: /* Register receives for scatter reverse */
588: for (i=0; i<out_to->n; i++) {
589: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
590: }
591: }
593: return(0);
594: }
596: /* --------------------------------------------------------------------------------------*/
600: PetscErrorCode VecScatterBegin_PtoP_12(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
601: {
602: VecScatter_MPI_General *gen_to,*gen_from;
603: PetscScalar *xv,*yv,*val,*svalues;
604: MPI_Request *rwaits,*swaits;
605: PetscInt *indices,*sstarts,iend,i,j,nrecvs,nsends,idx,len;
606: PetscErrorCode ierr;
609: VecGetArray(xin,&xv);
610: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
612: if (mode & SCATTER_REVERSE) {
613: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
614: gen_from = (VecScatter_MPI_General*)ctx->todata;
615: rwaits = gen_from->rev_requests;
616: swaits = gen_to->rev_requests;
617: } else {
618: gen_to = (VecScatter_MPI_General*)ctx->todata;
619: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
620: rwaits = gen_from->requests;
621: swaits = gen_to->requests;
622: }
623: svalues = gen_to->values;
624: nrecvs = gen_from->n;
625: nsends = gen_to->n;
626: indices = gen_to->indices;
627: sstarts = gen_to->starts;
629: if (!(mode & SCATTER_LOCAL)) {
631: if (!gen_from->use_readyreceiver && !gen_to->sendfirst) {
632: /* post receives since they were not posted in VecScatterPostRecvs() */
633: if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
634: }
635: if (ctx->packtogether) {
636: /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
637: len = 12*sstarts[nsends];
638: val = svalues;
639: for (i=0; i<len; i += 12) {
640: idx = *indices++;
641: val[0] = xv[idx];
642: val[1] = xv[idx+1];
643: val[2] = xv[idx+2];
644: val[3] = xv[idx+3];
645: val[4] = xv[idx+4];
646: val[5] = xv[idx+5];
647: val[6] = xv[idx+6];
648: val[7] = xv[idx+7];
649: val[8] = xv[idx+8];
650: val[9] = xv[idx+9];
651: val[10] = xv[idx+10];
652: val[11] = xv[idx+11];
653: val += 12;
654: }
655: if (nsends) {MPI_Startall_isend(len,nsends,swaits);}
656: } else {
657: /* this version packs and sends one at a time */
658: val = svalues;
659: for (i=0; i<nsends; i++) {
660: iend = sstarts[i+1]-sstarts[i];
662: for (j=0; j<iend; j++) {
663: idx = *indices++;
664: val[0] = xv[idx];
665: val[1] = xv[idx+1];
666: val[2] = xv[idx+2];
667: val[3] = xv[idx+3];
668: val[4] = xv[idx+4];
669: val[5] = xv[idx+5];
670: val[6] = xv[idx+6];
671: val[7] = xv[idx+7];
672: val[8] = xv[idx+8];
673: val[9] = xv[idx+9];
674: val[10] = xv[idx+10];
675: val[11] = xv[idx+11];
676: val += 12;
677: }
678: MPI_Start_isend(12*iend,swaits+i);
679: }
680: }
682: if (!gen_from->use_readyreceiver && gen_to->sendfirst) {
683: /* post receives since they were not posted in VecScatterPostRecvs() */
684: if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
685: }
686: }
688: /* take care of local scatters */
689: if (gen_to->local.n) {
690: PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
691: PetscInt n = gen_to->local.n,il,ir;
692: if (addv == INSERT_VALUES) {
693: if (gen_to->local.is_copy) {
694: PetscMemcpy(yv+gen_from->local.copy_start,xv+gen_to->local.copy_start,gen_to->local.copy_length);
695: } else {
696: for (i=0; i<n; i++) {
697: il = fslots[i]; ir = tslots[i];
698: yv[il] = xv[ir];
699: yv[il+1] = xv[ir+1];
700: yv[il+2] = xv[ir+2];
701: yv[il+3] = xv[ir+3];
702: yv[il+4] = xv[ir+4];
703: yv[il+5] = xv[ir+5];
704: yv[il+6] = xv[ir+6];
705: yv[il+7] = xv[ir+7];
706: yv[il+8] = xv[ir+8];
707: yv[il+9] = xv[ir+9];
708: yv[il+10] = xv[ir+10];
709: yv[il+11] = xv[ir+11];
710: }
711: }
712: } else if (addv == ADD_VALUES) {
713: for (i=0; i<n; i++) {
714: il = fslots[i]; ir = tslots[i];
715: yv[il] += xv[ir];
716: yv[il+1] += xv[ir+1];
717: yv[il+2] += xv[ir+2];
718: yv[il+3] += xv[ir+3];
719: yv[il+4] += xv[ir+4];
720: yv[il+5] += xv[ir+5];
721: yv[il+6] += xv[ir+6];
722: yv[il+7] += xv[ir+7];
723: yv[il+8] += xv[ir+8];
724: yv[il+9] += xv[ir+9];
725: yv[il+10] += xv[ir+10];
726: yv[il+11] += xv[ir+11];
727: }
728: #if !defined(PETSC_USE_COMPLEX)
729: } else if (addv == MAX_VALUES) {
730: for (i=0; i<n; i++) {
731: il = fslots[i]; ir = tslots[i];
732: yv[il] = PetscMax(yv[il],xv[ir]);
733: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
734: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
735: yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
736: yv[il+4] = PetscMax(yv[il+4],xv[ir+4]);
737: yv[il+5] = PetscMax(yv[il+5],xv[ir+5]);
738: yv[il+6] = PetscMax(yv[il+6],xv[ir+6]);
739: yv[il+7] = PetscMax(yv[il+7],xv[ir+7]);
740: yv[il+8] = PetscMax(yv[il+8],xv[ir+8]);
741: yv[il+9] = PetscMax(yv[il+9],xv[ir+9]);
742: yv[il+10] = PetscMax(yv[il+10],xv[ir+10]);
743: yv[il+11] = PetscMax(yv[il+11],xv[ir+11]);
744: }
745: #endif
746: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
747: }
748: VecRestoreArray(xin,&xv);
749: if (xin != yin) {VecRestoreArray(yin,&yv);}
750: return(0);
751: }
753: /* --------------------------------------------------------------------------------------*/
757: PetscErrorCode VecScatterEnd_PtoP_12(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
758: {
759: VecScatter_MPI_General *gen_to,*gen_from;
760: PetscScalar *rvalues,*yv,*val;
761: PetscErrorCode ierr;
762: PetscMPIInt imdex;
763: PetscInt nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
764: MPI_Request *rwaits,*swaits;
765: MPI_Status *rstatus,*sstatus;
768: if (mode & SCATTER_LOCAL) return(0);
769: VecGetArray(yin,&yv);
771: if (mode & SCATTER_REVERSE) {
772: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
773: gen_from = (VecScatter_MPI_General*)ctx->todata;
774: rwaits = gen_from->rev_requests;
775: swaits = gen_to->rev_requests;
776: sstatus = gen_from->sstatus;
777: rstatus = gen_from->rstatus;
778: } else {
779: gen_to = (VecScatter_MPI_General*)ctx->todata;
780: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
781: rwaits = gen_from->requests;
782: swaits = gen_to->requests;
783: sstatus = gen_to->sstatus;
784: rstatus = gen_to->rstatus;
785: }
786: rvalues = gen_from->values;
787: nrecvs = gen_from->n;
788: nsends = gen_to->n;
789: indices = gen_from->indices;
790: rstarts = gen_from->starts;
792: /* wait on receives */
793: count = nrecvs;
794: if (ctx->packtogether) { /* receive all messages, then unpack all, when -vecscatter_packtogether used */
795: if (nrecvs) {MPI_Waitall(nrecvs,rwaits,rstatus);}
796: n = rstarts[count];
797: val = rvalues;
798: lindices = indices;
799: if (addv == INSERT_VALUES) {
800: for (i=0; i<n; i++) {
801: idx = lindices[i];
802: yv[idx] = val[0];
803: yv[idx+1] = val[1];
804: yv[idx+2] = val[2];
805: yv[idx+3] = val[3];
806: yv[idx+4] = val[4];
807: yv[idx+5] = val[5];
808: yv[idx+6] = val[6];
809: yv[idx+7] = val[7];
810: yv[idx+8] = val[8];
811: yv[idx+9] = val[9];
812: yv[idx+10] = val[10];
813: yv[idx+11] = val[11];
814: val += 12;
815: }
816: } else if (addv == ADD_VALUES) {
817: for (i=0; i<n; i++) {
818: idx = lindices[i];
819: yv[idx] += val[0];
820: yv[idx+1] += val[1];
821: yv[idx+2] += val[2];
822: yv[idx+3] += val[3];
823: yv[idx+4] += val[4];
824: yv[idx+5] += val[5];
825: yv[idx+6] += val[6];
826: yv[idx+7] += val[7];
827: yv[idx+8] += val[8];
828: yv[idx+9] += val[9];
829: yv[idx+10] += val[10];
830: yv[idx+11] += val[11];
831: val += 12;
832: }
833: #if !defined(PETSC_USE_COMPLEX)
834: } else if (addv == MAX_VALUES) {
835: for (i=0; i<n; i++) {
836: idx = lindices[i];
837: yv[idx] = PetscMax(yv[idx],val[0]);
838: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
839: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
840: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
841: yv[idx+4] = PetscMax(yv[idx+4],val[4]);
842: yv[idx+5] = PetscMax(yv[idx+5],val[5]);
843: yv[idx+6] = PetscMax(yv[idx+6],val[6]);
844: yv[idx+7] = PetscMax(yv[idx+7],val[7]);
845: yv[idx+8] = PetscMax(yv[idx+8],val[8]);
846: yv[idx+9] = PetscMax(yv[idx+9],val[9]);
847: yv[idx+10] = PetscMax(yv[idx+10],val[10]);
848: yv[idx+11] = PetscMax(yv[idx+11],val[11]);
849: val += 12;
850: }
851: #endif
852: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
853: } else { /* unpack each message as it arrives, default version */
854: while (count) {
855: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus[0]);
856: /* unpack receives into our local space */
857: val = rvalues + 12*rstarts[imdex];
858: lindices = indices + rstarts[imdex];
859: n = rstarts[imdex+1] - rstarts[imdex];
860: if (addv == INSERT_VALUES) {
861: for (i=0; i<n; i++) {
862: idx = lindices[i];
863: yv[idx] = val[0];
864: yv[idx+1] = val[1];
865: yv[idx+2] = val[2];
866: yv[idx+3] = val[3];
867: yv[idx+4] = val[4];
868: yv[idx+5] = val[5];
869: yv[idx+6] = val[6];
870: yv[idx+7] = val[7];
871: yv[idx+8] = val[8];
872: yv[idx+9] = val[9];
873: yv[idx+10] = val[10];
874: yv[idx+11] = val[11];
875: val += 12;
876: }
877: } else if (addv == ADD_VALUES) {
878: for (i=0; i<n; i++) {
879: idx = lindices[i];
880: yv[idx] += val[0];
881: yv[idx+1] += val[1];
882: yv[idx+2] += val[2];
883: yv[idx+3] += val[3];
884: yv[idx+4] += val[4];
885: yv[idx+5] += val[5];
886: yv[idx+6] += val[6];
887: yv[idx+7] += val[7];
888: yv[idx+8] += val[8];
889: yv[idx+9] += val[9];
890: yv[idx+10] += val[10];
891: yv[idx+11] += val[11];
892: val += 12;
893: }
894: #if !defined(PETSC_USE_COMPLEX)
895: } else if (addv == MAX_VALUES) {
896: for (i=0; i<n; i++) {
897: idx = lindices[i];
898: yv[idx] = PetscMax(yv[idx],val[0]);
899: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
900: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
901: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
902: yv[idx+4] = PetscMax(yv[idx+4],val[4]);
903: yv[idx+5] = PetscMax(yv[idx+5],val[5]);
904: yv[idx+6] = PetscMax(yv[idx+6],val[6]);
905: yv[idx+7] = PetscMax(yv[idx+7],val[7]);
906: yv[idx+8] = PetscMax(yv[idx+8],val[8]);
907: yv[idx+9] = PetscMax(yv[idx+9],val[9]);
908: yv[idx+10] = PetscMax(yv[idx+10],val[10]);
909: yv[idx+11] = PetscMax(yv[idx+11],val[11]);
910: val += 12;
911: }
912: #endif
913: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
914: count--;
915: }
916: }
917: /* wait on sends */
918: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
919: VecRestoreArray(yin,&yv);
920: return(0);
921: }
923: /* --------------------------------------------------------------------------------------*/
927: PetscErrorCode VecScatterBegin_PtoP_8(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
928: {
929: VecScatter_MPI_General *gen_to,*gen_from;
930: PetscScalar *xv,*yv,*val,*svalues;
931: MPI_Request *rwaits,*swaits;
932: PetscErrorCode ierr;
933: PetscInt i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;
936: VecGetArray(xin,&xv);
937: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
938: if (mode & SCATTER_REVERSE) {
939: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
940: gen_from = (VecScatter_MPI_General*)ctx->todata;
941: rwaits = gen_from->rev_requests;
942: swaits = gen_to->rev_requests;
943: } else {
944: gen_to = (VecScatter_MPI_General*)ctx->todata;
945: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
946: rwaits = gen_from->requests;
947: swaits = gen_to->requests;
948: }
949: svalues = gen_to->values;
950: nrecvs = gen_from->n;
951: nsends = gen_to->n;
952: indices = gen_to->indices;
953: sstarts = gen_to->starts;
955: if (!(mode & SCATTER_LOCAL)) {
957: if (gen_to->sendfirst) {
958: /* this version packs and sends one at a time */
959: val = svalues;
960: for (i=0; i<nsends; i++) {
961: iend = sstarts[i+1]-sstarts[i];
963: for (j=0; j<iend; j++) {
964: idx = *indices++;
965: val[0] = xv[idx];
966: val[1] = xv[idx+1];
967: val[2] = xv[idx+2];
968: val[3] = xv[idx+3];
969: val[4] = xv[idx+4];
970: val[5] = xv[idx+5];
971: val[6] = xv[idx+6];
972: val[7] = xv[idx+7];
973: val += 8;
974: }
975: MPI_Start_isend(8*iend,swaits+i);
976: }
977: }
979: if (!gen_from->use_readyreceiver) {
980: /* post receives since they were not posted in VecScatterPostRecvs() */
981: if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
982: }
984: if (!gen_to->sendfirst) {
985: /* this version packs all the messages together and sends */
986: /*
987: len = 5*sstarts[nsends];
988: val = svalues;
989: for (i=0; i<len; i += 5) {
990: idx = *indices++;
991: val[0] = xv[idx];
992: val[1] = xv[idx+1];
993: val[2] = xv[idx+2];
994: val[3] = xv[idx+3];
995: val[4] = xv[idx+4];
996: val += 5;
997: }
998: MPI_Startall_isend(len,nsends,swaits);
999: */
1001: /* this version packs and sends one at a time */
1002: val = svalues;
1003: for (i=0; i<nsends; i++) {
1004: iend = sstarts[i+1]-sstarts[i];
1006: for (j=0; j<iend; j++) {
1007: idx = *indices++;
1008: val[0] = xv[idx];
1009: val[1] = xv[idx+1];
1010: val[2] = xv[idx+2];
1011: val[3] = xv[idx+3];
1012: val[4] = xv[idx+4];
1013: val[5] = xv[idx+5];
1014: val[6] = xv[idx+6];
1015: val[7] = xv[idx+7];
1016: val += 8;
1017: }
1018: MPI_Start_isend(8*iend,swaits+i);
1019: }
1020: }
1021: }
1023: /* take care of local scatters */
1024: if (gen_to->local.n) {
1025: PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
1026: PetscInt n = gen_to->local.n,il,ir;
1027: if (addv == INSERT_VALUES) {
1028: if (gen_to->local.is_copy) {
1029: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1030: } else {
1031: for (i=0; i<n; i++) {
1032: il = fslots[i]; ir = tslots[i];
1033: yv[il] = xv[ir];
1034: yv[il+1] = xv[ir+1];
1035: yv[il+2] = xv[ir+2];
1036: yv[il+3] = xv[ir+3];
1037: yv[il+4] = xv[ir+4];
1038: yv[il+5] = xv[ir+5];
1039: yv[il+6] = xv[ir+6];
1040: yv[il+7] = xv[ir+7];
1041: }
1042: }
1043: } else if (addv == ADD_VALUES) {
1044: for (i=0; i<n; i++) {
1045: il = fslots[i]; ir = tslots[i];
1046: yv[il] += xv[ir];
1047: yv[il+1] += xv[ir+1];
1048: yv[il+2] += xv[ir+2];
1049: yv[il+3] += xv[ir+3];
1050: yv[il+4] += xv[ir+4];
1051: yv[il+5] += xv[ir+5];
1052: yv[il+6] += xv[ir+6];
1053: yv[il+7] += xv[ir+7];
1054: }
1055: #if !defined(PETSC_USE_COMPLEX)
1056: } else if (addv == MAX_VALUES) {
1057: for (i=0; i<n; i++) {
1058: il = fslots[i]; ir = tslots[i];
1059: yv[il] = PetscMax(yv[il],xv[ir]);
1060: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1061: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1062: yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1063: yv[il+4] = PetscMax(yv[il+4],xv[ir+4]);
1064: yv[il+5] = PetscMax(yv[il+5],xv[ir+5]);
1065: yv[il+6] = PetscMax(yv[il+6],xv[ir+6]);
1066: yv[il+7] = PetscMax(yv[il+7],xv[ir+7]);
1067: }
1068: #endif
1069: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1070: }
1071: VecRestoreArray(xin,&xv);
1072: if (xin != yin) {VecRestoreArray(yin,&yv);}
1073: return(0);
1074: }
1076: /* --------------------------------------------------------------------------------------*/
1080: PetscErrorCode VecScatterEnd_PtoP_8(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1081: {
1082: VecScatter_MPI_General *gen_to,*gen_from;
1083: PetscScalar *rvalues,*yv,*val;
1084: PetscErrorCode ierr;
1085: PetscInt nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1086: PetscMPIInt imdex;
1087: MPI_Request *rwaits,*swaits;
1088: MPI_Status rstatus,*sstatus;
1091: if (mode & SCATTER_LOCAL) return(0);
1092: VecGetArray(yin,&yv);
1094: if (mode & SCATTER_REVERSE) {
1095: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1096: gen_from = (VecScatter_MPI_General*)ctx->todata;
1097: rwaits = gen_from->rev_requests;
1098: swaits = gen_to->rev_requests;
1099: sstatus = gen_from->sstatus;
1100: } else {
1101: gen_to = (VecScatter_MPI_General*)ctx->todata;
1102: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1103: rwaits = gen_from->requests;
1104: swaits = gen_to->requests;
1105: sstatus = gen_to->sstatus;
1106: }
1107: rvalues = gen_from->values;
1108: nrecvs = gen_from->n;
1109: nsends = gen_to->n;
1110: indices = gen_from->indices;
1111: rstarts = gen_from->starts;
1113: /* wait on receives */
1114: count = nrecvs;
1115: while (count) {
1116: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1117: /* unpack receives into our local space */
1118: val = rvalues + 8*rstarts[imdex];
1119: lindices = indices + rstarts[imdex];
1120: n = rstarts[imdex+1] - rstarts[imdex];
1121: if (addv == INSERT_VALUES) {
1122: for (i=0; i<n; i++) {
1123: idx = lindices[i];
1124: yv[idx] = val[0];
1125: yv[idx+1] = val[1];
1126: yv[idx+2] = val[2];
1127: yv[idx+3] = val[3];
1128: yv[idx+4] = val[4];
1129: yv[idx+5] = val[5];
1130: yv[idx+6] = val[6];
1131: yv[idx+7] = val[7];
1132: val += 8;
1133: }
1134: } else if (addv == ADD_VALUES) {
1135: for (i=0; i<n; i++) {
1136: idx = lindices[i];
1137: yv[idx] += val[0];
1138: yv[idx+1] += val[1];
1139: yv[idx+2] += val[2];
1140: yv[idx+3] += val[3];
1141: yv[idx+4] += val[4];
1142: yv[idx+5] += val[5];
1143: yv[idx+6] += val[6];
1144: yv[idx+7] += val[7];
1145: val += 8;
1146: }
1147: #if !defined(PETSC_USE_COMPLEX)
1148: } else if (addv == MAX_VALUES) {
1149: for (i=0; i<n; i++) {
1150: idx = lindices[i];
1151: yv[idx] = PetscMax(yv[idx],val[0]);
1152: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1153: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1154: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1155: yv[idx+4] = PetscMax(yv[idx+4],val[4]);
1156: yv[idx+5] = PetscMax(yv[idx+5],val[5]);
1157: yv[idx+6] = PetscMax(yv[idx+6],val[6]);
1158: yv[idx+7] = PetscMax(yv[idx+7],val[7]);
1159: val += 8;
1160: }
1161: #endif
1162: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1163: count--;
1164: }
1165: /* wait on sends */
1166: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
1167: VecRestoreArray(yin,&yv);
1168: return(0);
1169: }
1170: /* --------------------------------------------------------------------------------------*/
1174: PetscErrorCode VecScatterBegin_PtoP_7(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1175: {
1176: VecScatter_MPI_General *gen_to,*gen_from;
1177: PetscScalar *xv,*yv,*val,*svalues;
1178: MPI_Request *rwaits,*swaits;
1179: PetscErrorCode ierr;
1180: PetscInt i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;
1183: VecGetArray(xin,&xv);
1184: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1185: if (mode & SCATTER_REVERSE) {
1186: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1187: gen_from = (VecScatter_MPI_General*)ctx->todata;
1188: rwaits = gen_from->rev_requests;
1189: swaits = gen_to->rev_requests;
1190: } else {
1191: gen_to = (VecScatter_MPI_General*)ctx->todata;
1192: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1193: rwaits = gen_from->requests;
1194: swaits = gen_to->requests;
1195: }
1196: svalues = gen_to->values;
1197: nrecvs = gen_from->n;
1198: nsends = gen_to->n;
1199: indices = gen_to->indices;
1200: sstarts = gen_to->starts;
1202: if (!(mode & SCATTER_LOCAL)) {
1204: if (gen_to->sendfirst) {
1205: /* this version packs and sends one at a time */
1206: val = svalues;
1207: for (i=0; i<nsends; i++) {
1208: iend = sstarts[i+1]-sstarts[i];
1210: for (j=0; j<iend; j++) {
1211: idx = *indices++;
1212: val[0] = xv[idx];
1213: val[1] = xv[idx+1];
1214: val[2] = xv[idx+2];
1215: val[3] = xv[idx+3];
1216: val[4] = xv[idx+4];
1217: val[5] = xv[idx+5];
1218: val[6] = xv[idx+6];
1219: val += 7;
1220: }
1221: MPI_Start_isend(7*iend,swaits+i);
1222: }
1223: }
1225: if (!gen_from->use_readyreceiver) {
1226: /* post receives since they were not posted in VecScatterPostRecvs() */
1227: if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
1228: }
1230: if (!gen_to->sendfirst) {
1231: /* this version packs all the messages together and sends */
1232: /*
1233: len = 5*sstarts[nsends];
1234: val = svalues;
1235: for (i=0; i<len; i += 5) {
1236: idx = *indices++;
1237: val[0] = xv[idx];
1238: val[1] = xv[idx+1];
1239: val[2] = xv[idx+2];
1240: val[3] = xv[idx+3];
1241: val[4] = xv[idx+4];
1242: val += 5;
1243: }
1244: MPI_Startall_isend(len,nsends,swaits);
1245: */
1247: /* this version packs and sends one at a time */
1248: val = svalues;
1249: for (i=0; i<nsends; i++) {
1250: iend = sstarts[i+1]-sstarts[i];
1252: for (j=0; j<iend; j++) {
1253: idx = *indices++;
1254: val[0] = xv[idx];
1255: val[1] = xv[idx+1];
1256: val[2] = xv[idx+2];
1257: val[3] = xv[idx+3];
1258: val[4] = xv[idx+4];
1259: val[5] = xv[idx+5];
1260: val[6] = xv[idx+6];
1261: val += 7;
1262: }
1263: MPI_Start_isend(7*iend,swaits+i);
1264: }
1265: }
1266: }
1268: /* take care of local scatters */
1269: if (gen_to->local.n) {
1270: PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
1271: PetscInt n = gen_to->local.n,il,ir;
1272: if (addv == INSERT_VALUES) {
1273: if (gen_to->local.is_copy) {
1274: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1275: } else {
1276: for (i=0; i<n; i++) {
1277: il = fslots[i]; ir = tslots[i];
1278: yv[il] = xv[ir];
1279: yv[il+1] = xv[ir+1];
1280: yv[il+2] = xv[ir+2];
1281: yv[il+3] = xv[ir+3];
1282: yv[il+4] = xv[ir+4];
1283: yv[il+5] = xv[ir+5];
1284: yv[il+6] = xv[ir+6];
1285: }
1286: }
1287: } else if (addv == ADD_VALUES) {
1288: for (i=0; i<n; i++) {
1289: il = fslots[i]; ir = tslots[i];
1290: yv[il] += xv[ir];
1291: yv[il+1] += xv[ir+1];
1292: yv[il+2] += xv[ir+2];
1293: yv[il+3] += xv[ir+3];
1294: yv[il+4] += xv[ir+4];
1295: yv[il+5] += xv[ir+5];
1296: yv[il+6] += xv[ir+6];
1297: }
1298: #if !defined(PETSC_USE_COMPLEX)
1299: } else if (addv == MAX_VALUES) {
1300: for (i=0; i<n; i++) {
1301: il = fslots[i]; ir = tslots[i];
1302: yv[il] = PetscMax(yv[il],xv[ir]);
1303: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1304: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1305: yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1306: yv[il+4] = PetscMax(yv[il+4],xv[ir+4]);
1307: yv[il+5] = PetscMax(yv[il+5],xv[ir+5]);
1308: yv[il+6] = PetscMax(yv[il+6],xv[ir+6]);
1309: }
1310: #endif
1311: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1312: }
1313: VecRestoreArray(xin,&xv);
1314: if (xin != yin) {VecRestoreArray(yin,&yv);}
1315: return(0);
1316: }
1318: /* --------------------------------------------------------------------------------------*/
1322: PetscErrorCode VecScatterEnd_PtoP_7(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1323: {
1324: VecScatter_MPI_General *gen_to,*gen_from;
1325: PetscScalar *rvalues,*yv,*val;
1326: PetscErrorCode ierr;
1327: PetscInt nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1328: PetscMPIInt imdex;
1329: MPI_Request *rwaits,*swaits;
1330: MPI_Status rstatus,*sstatus;
1333: if (mode & SCATTER_LOCAL) return(0);
1334: VecGetArray(yin,&yv);
1336: if (mode & SCATTER_REVERSE) {
1337: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1338: gen_from = (VecScatter_MPI_General*)ctx->todata;
1339: rwaits = gen_from->rev_requests;
1340: swaits = gen_to->rev_requests;
1341: sstatus = gen_from->sstatus;
1342: } else {
1343: gen_to = (VecScatter_MPI_General*)ctx->todata;
1344: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1345: rwaits = gen_from->requests;
1346: swaits = gen_to->requests;
1347: sstatus = gen_to->sstatus;
1348: }
1349: rvalues = gen_from->values;
1350: nrecvs = gen_from->n;
1351: nsends = gen_to->n;
1352: indices = gen_from->indices;
1353: rstarts = gen_from->starts;
1355: /* wait on receives */
1356: count = nrecvs;
1357: while (count) {
1358: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1359: /* unpack receives into our local space */
1360: val = rvalues + 7*rstarts[imdex];
1361: lindices = indices + rstarts[imdex];
1362: n = rstarts[imdex+1] - rstarts[imdex];
1363: if (addv == INSERT_VALUES) {
1364: for (i=0; i<n; i++) {
1365: idx = lindices[i];
1366: yv[idx] = val[0];
1367: yv[idx+1] = val[1];
1368: yv[idx+2] = val[2];
1369: yv[idx+3] = val[3];
1370: yv[idx+4] = val[4];
1371: yv[idx+5] = val[5];
1372: yv[idx+6] = val[6];
1373: val += 7;
1374: }
1375: } else if (addv == ADD_VALUES) {
1376: for (i=0; i<n; i++) {
1377: idx = lindices[i];
1378: yv[idx] += val[0];
1379: yv[idx+1] += val[1];
1380: yv[idx+2] += val[2];
1381: yv[idx+3] += val[3];
1382: yv[idx+4] += val[4];
1383: yv[idx+5] += val[5];
1384: yv[idx+6] += val[6];
1385: val += 7;
1386: }
1387: #if !defined(PETSC_USE_COMPLEX)
1388: } else if (addv == MAX_VALUES) {
1389: for (i=0; i<n; i++) {
1390: idx = lindices[i];
1391: yv[idx] = PetscMax(yv[idx],val[0]);
1392: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1393: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1394: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1395: yv[idx+4] = PetscMax(yv[idx+4],val[4]);
1396: yv[idx+5] = PetscMax(yv[idx+5],val[5]);
1397: yv[idx+6] = PetscMax(yv[idx+6],val[6]);
1398: val += 7;
1399: }
1400: #endif
1401: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1402: count--;
1403: }
1404: /* wait on sends */
1405: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
1406: VecRestoreArray(yin,&yv);
1407: return(0);
1408: }
1410: /* --------------------------------------------------------------------------------------*/
1414: PetscErrorCode VecScatterBegin_PtoP_6(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1415: {
1416: VecScatter_MPI_General *gen_to,*gen_from;
1417: PetscScalar *xv,*yv,*val,*svalues;
1418: MPI_Request *rwaits,*swaits;
1419: PetscErrorCode ierr;
1420: PetscInt i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;
1423: VecGetArray(xin,&xv);
1424: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1425: if (mode & SCATTER_REVERSE) {
1426: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1427: gen_from = (VecScatter_MPI_General*)ctx->todata;
1428: rwaits = gen_from->rev_requests;
1429: swaits = gen_to->rev_requests;
1430: } else {
1431: gen_to = (VecScatter_MPI_General*)ctx->todata;
1432: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1433: rwaits = gen_from->requests;
1434: swaits = gen_to->requests;
1435: }
1436: svalues = gen_to->values;
1437: nrecvs = gen_from->n;
1438: nsends = gen_to->n;
1439: indices = gen_to->indices;
1440: sstarts = gen_to->starts;
1442: if (!(mode & SCATTER_LOCAL)) {
1444: if (gen_to->sendfirst) {
1445: /* this version packs and sends one at a time */
1446: val = svalues;
1447: for (i=0; i<nsends; i++) {
1448: iend = sstarts[i+1]-sstarts[i];
1450: for (j=0; j<iend; j++) {
1451: idx = *indices++;
1452: val[0] = xv[idx];
1453: val[1] = xv[idx+1];
1454: val[2] = xv[idx+2];
1455: val[3] = xv[idx+3];
1456: val[4] = xv[idx+4];
1457: val[5] = xv[idx+5];
1458: val += 6;
1459: }
1460: MPI_Start_isend(6*iend,swaits+i);
1461: }
1462: }
1464: if (!gen_from->use_readyreceiver) {
1465: /* post receives since they were not posted in VecScatterPostRecvs() */
1466: if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
1467: }
1469: if (!gen_to->sendfirst) {
1470: /* this version packs all the messages together and sends */
1471: /*
1472: len = 5*sstarts[nsends];
1473: val = svalues;
1474: for (i=0; i<len; i += 5) {
1475: idx = *indices++;
1476: val[0] = xv[idx];
1477: val[1] = xv[idx+1];
1478: val[2] = xv[idx+2];
1479: val[3] = xv[idx+3];
1480: val[4] = xv[idx+4];
1481: val += 5;
1482: }
1483: MPI_Startall_isend(len,nsends,swaits);
1484: */
1486: /* this version packs and sends one at a time */
1487: val = svalues;
1488: for (i=0; i<nsends; i++) {
1489: iend = sstarts[i+1]-sstarts[i];
1491: for (j=0; j<iend; j++) {
1492: idx = *indices++;
1493: val[0] = xv[idx];
1494: val[1] = xv[idx+1];
1495: val[2] = xv[idx+2];
1496: val[3] = xv[idx+3];
1497: val[4] = xv[idx+4];
1498: val[5] = xv[idx+5];
1499: val += 6;
1500: }
1501: MPI_Start_isend(6*iend,swaits+i);
1502: }
1503: }
1504: }
1506: /* take care of local scatters */
1507: if (gen_to->local.n) {
1508: PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
1509: PetscInt n = gen_to->local.n,il,ir;
1510: if (addv == INSERT_VALUES) {
1511: if (gen_to->local.is_copy) {
1512: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1513: } else {
1514: for (i=0; i<n; i++) {
1515: il = fslots[i]; ir = tslots[i];
1516: yv[il] = xv[ir];
1517: yv[il+1] = xv[ir+1];
1518: yv[il+2] = xv[ir+2];
1519: yv[il+3] = xv[ir+3];
1520: yv[il+4] = xv[ir+4];
1521: yv[il+5] = xv[ir+5];
1522: }
1523: }
1524: } else if (addv == ADD_VALUES) {
1525: for (i=0; i<n; i++) {
1526: il = fslots[i]; ir = tslots[i];
1527: yv[il] += xv[ir];
1528: yv[il+1] += xv[ir+1];
1529: yv[il+2] += xv[ir+2];
1530: yv[il+3] += xv[ir+3];
1531: yv[il+4] += xv[ir+4];
1532: yv[il+5] += xv[ir+5];
1533: }
1534: #if !defined(PETSC_USE_COMPLEX)
1535: } else if (addv == MAX_VALUES) {
1536: for (i=0; i<n; i++) {
1537: il = fslots[i]; ir = tslots[i];
1538: yv[il] = PetscMax(yv[il],xv[ir]);
1539: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1540: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1541: yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1542: yv[il+4] = PetscMax(yv[il+4],xv[ir+4]);
1543: yv[il+5] = PetscMax(yv[il+5],xv[ir+5]);
1544: }
1545: #endif
1546: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1547: }
1548: VecRestoreArray(xin,&xv);
1549: if (xin != yin) {VecRestoreArray(yin,&yv);}
1550: return(0);
1551: }
1553: /* --------------------------------------------------------------------------------------*/
1557: PetscErrorCode VecScatterEnd_PtoP_6(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1558: {
1559: VecScatter_MPI_General *gen_to,*gen_from;
1560: PetscScalar *rvalues,*yv,*val;
1561: PetscErrorCode ierr;
1562: PetscInt nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1563: PetscMPIInt imdex;
1564: MPI_Request *rwaits,*swaits;
1565: MPI_Status rstatus,*sstatus;
1568: if (mode & SCATTER_LOCAL) return(0);
1569: VecGetArray(yin,&yv);
1571: if (mode & SCATTER_REVERSE) {
1572: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1573: gen_from = (VecScatter_MPI_General*)ctx->todata;
1574: rwaits = gen_from->rev_requests;
1575: swaits = gen_to->rev_requests;
1576: sstatus = gen_from->sstatus;
1577: } else {
1578: gen_to = (VecScatter_MPI_General*)ctx->todata;
1579: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1580: rwaits = gen_from->requests;
1581: swaits = gen_to->requests;
1582: sstatus = gen_to->sstatus;
1583: }
1584: rvalues = gen_from->values;
1585: nrecvs = gen_from->n;
1586: nsends = gen_to->n;
1587: indices = gen_from->indices;
1588: rstarts = gen_from->starts;
1590: /* wait on receives */
1591: count = nrecvs;
1592: while (count) {
1593: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1594: /* unpack receives into our local space */
1595: val = rvalues + 6*rstarts[imdex];
1596: lindices = indices + rstarts[imdex];
1597: n = rstarts[imdex+1] - rstarts[imdex];
1598: if (addv == INSERT_VALUES) {
1599: for (i=0; i<n; i++) {
1600: idx = lindices[i];
1601: yv[idx] = val[0];
1602: yv[idx+1] = val[1];
1603: yv[idx+2] = val[2];
1604: yv[idx+3] = val[3];
1605: yv[idx+4] = val[4];
1606: yv[idx+5] = val[5];
1607: val += 6;
1608: }
1609: } else if (addv == ADD_VALUES) {
1610: for (i=0; i<n; i++) {
1611: idx = lindices[i];
1612: yv[idx] += val[0];
1613: yv[idx+1] += val[1];
1614: yv[idx+2] += val[2];
1615: yv[idx+3] += val[3];
1616: yv[idx+4] += val[4];
1617: yv[idx+5] += val[5];
1618: val += 6;
1619: }
1620: #if !defined(PETSC_USE_COMPLEX)
1621: } else if (addv == MAX_VALUES) {
1622: for (i=0; i<n; i++) {
1623: idx = lindices[i];
1624: yv[idx] = PetscMax(yv[idx],val[0]);
1625: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1626: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1627: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1628: yv[idx+4] = PetscMax(yv[idx+4],val[4]);
1629: yv[idx+5] = PetscMax(yv[idx+5],val[5]);
1630: val += 6;
1631: }
1632: #endif
1633: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1634: count--;
1635: }
1636: /* wait on sends */
1637: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
1638: VecRestoreArray(yin,&yv);
1639: return(0);
1640: }
1642: /* --------------------------------------------------------------------------------------*/
1646: PetscErrorCode VecScatterBegin_PtoP_5(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1647: {
1648: VecScatter_MPI_General *gen_to,*gen_from;
1649: PetscScalar *xv,*yv,*val,*svalues;
1650: MPI_Request *rwaits,*swaits;
1651: PetscErrorCode ierr;
1652: PetscInt i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;
1655: VecGetArray(xin,&xv);
1656: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1657: if (mode & SCATTER_REVERSE) {
1658: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1659: gen_from = (VecScatter_MPI_General*)ctx->todata;
1660: rwaits = gen_from->rev_requests;
1661: swaits = gen_to->rev_requests;
1662: } else {
1663: gen_to = (VecScatter_MPI_General*)ctx->todata;
1664: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1665: rwaits = gen_from->requests;
1666: swaits = gen_to->requests;
1667: }
1668: svalues = gen_to->values;
1669: nrecvs = gen_from->n;
1670: nsends = gen_to->n;
1671: indices = gen_to->indices;
1672: sstarts = gen_to->starts;
1674: if (!(mode & SCATTER_LOCAL)) {
1676: if (gen_to->sendfirst) {
1677: /* this version packs and sends one at a time */
1678: val = svalues;
1679: for (i=0; i<nsends; i++) {
1680: iend = sstarts[i+1]-sstarts[i];
1682: for (j=0; j<iend; j++) {
1683: idx = *indices++;
1684: val[0] = xv[idx];
1685: val[1] = xv[idx+1];
1686: val[2] = xv[idx+2];
1687: val[3] = xv[idx+3];
1688: val[4] = xv[idx+4];
1689: val += 5;
1690: }
1691: MPI_Start_isend(5*iend,swaits+i);
1692: }
1693: }
1695: if (!gen_from->use_readyreceiver) {
1696: /* post receives since they were not posted in VecScatterPostRecvs() */
1697: if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
1698: }
1700: if (!gen_to->sendfirst) {
1701: /* this version packs all the messages together and sends */
1702: /*
1703: len = 5*sstarts[nsends];
1704: val = svalues;
1705: for (i=0; i<len; i += 5) {
1706: idx = *indices++;
1707: val[0] = xv[idx];
1708: val[1] = xv[idx+1];
1709: val[2] = xv[idx+2];
1710: val[3] = xv[idx+3];
1711: val[4] = xv[idx+4];
1712: val += 5;
1713: }
1714: MPI_Startall_isend(len,nsends,swaits);
1715: */
1717: /* this version packs and sends one at a time */
1718: val = svalues;
1719: for (i=0; i<nsends; i++) {
1720: iend = sstarts[i+1]-sstarts[i];
1722: for (j=0; j<iend; j++) {
1723: idx = *indices++;
1724: val[0] = xv[idx];
1725: val[1] = xv[idx+1];
1726: val[2] = xv[idx+2];
1727: val[3] = xv[idx+3];
1728: val[4] = xv[idx+4];
1729: val += 5;
1730: }
1731: MPI_Start_isend(5*iend,swaits+i);
1732: }
1733: }
1734: }
1736: /* take care of local scatters */
1737: if (gen_to->local.n) {
1738: PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
1739: PetscInt n = gen_to->local.n,il,ir;
1740: if (addv == INSERT_VALUES) {
1741: if (gen_to->local.is_copy) {
1742: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1743: } else {
1744: for (i=0; i<n; i++) {
1745: il = fslots[i]; ir = tslots[i];
1746: yv[il] = xv[ir];
1747: yv[il+1] = xv[ir+1];
1748: yv[il+2] = xv[ir+2];
1749: yv[il+3] = xv[ir+3];
1750: yv[il+4] = xv[ir+4];
1751: }
1752: }
1753: } else if (addv == ADD_VALUES) {
1754: for (i=0; i<n; i++) {
1755: il = fslots[i]; ir = tslots[i];
1756: yv[il] += xv[ir];
1757: yv[il+1] += xv[ir+1];
1758: yv[il+2] += xv[ir+2];
1759: yv[il+3] += xv[ir+3];
1760: yv[il+4] += xv[ir+4];
1761: }
1762: #if !defined(PETSC_USE_COMPLEX)
1763: } else if (addv == MAX_VALUES) {
1764: for (i=0; i<n; i++) {
1765: il = fslots[i]; ir = tslots[i];
1766: yv[il] = PetscMax(yv[il],xv[ir]);
1767: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1768: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1769: yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1770: yv[il+4] = PetscMax(yv[il+4],xv[ir+4]);
1771: }
1772: #endif
1773: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1774: }
1775: VecRestoreArray(xin,&xv);
1776: if (xin != yin) {VecRestoreArray(yin,&yv);}
1777: return(0);
1778: }
1780: /* --------------------------------------------------------------------------------------*/
1784: PetscErrorCode VecScatterEnd_PtoP_5(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1785: {
1786: VecScatter_MPI_General *gen_to,*gen_from;
1787: PetscScalar *rvalues,*yv,*val;
1788: PetscErrorCode ierr;
1789: PetscInt nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1790: PetscMPIInt imdex;
1791: MPI_Request *rwaits,*swaits;
1792: MPI_Status rstatus,*sstatus;
1795: if (mode & SCATTER_LOCAL) return(0);
1796: VecGetArray(yin,&yv);
1798: if (mode & SCATTER_REVERSE) {
1799: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1800: gen_from = (VecScatter_MPI_General*)ctx->todata;
1801: rwaits = gen_from->rev_requests;
1802: swaits = gen_to->rev_requests;
1803: sstatus = gen_from->sstatus;
1804: } else {
1805: gen_to = (VecScatter_MPI_General*)ctx->todata;
1806: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1807: rwaits = gen_from->requests;
1808: swaits = gen_to->requests;
1809: sstatus = gen_to->sstatus;
1810: }
1811: rvalues = gen_from->values;
1812: nrecvs = gen_from->n;
1813: nsends = gen_to->n;
1814: indices = gen_from->indices;
1815: rstarts = gen_from->starts;
1817: /* wait on receives */
1818: count = nrecvs;
1819: while (count) {
1820: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1821: /* unpack receives into our local space */
1822: val = rvalues + 5*rstarts[imdex];
1823: lindices = indices + rstarts[imdex];
1824: n = rstarts[imdex+1] - rstarts[imdex];
1825: if (addv == INSERT_VALUES) {
1826: for (i=0; i<n; i++) {
1827: idx = lindices[i];
1828: yv[idx] = val[0];
1829: yv[idx+1] = val[1];
1830: yv[idx+2] = val[2];
1831: yv[idx+3] = val[3];
1832: yv[idx+4] = val[4];
1833: val += 5;
1834: }
1835: } else if (addv == ADD_VALUES) {
1836: for (i=0; i<n; i++) {
1837: idx = lindices[i];
1838: yv[idx] += val[0];
1839: yv[idx+1] += val[1];
1840: yv[idx+2] += val[2];
1841: yv[idx+3] += val[3];
1842: yv[idx+4] += val[4];
1843: val += 5;
1844: }
1845: #if !defined(PETSC_USE_COMPLEX)
1846: } else if (addv == MAX_VALUES) {
1847: for (i=0; i<n; i++) {
1848: idx = lindices[i];
1849: yv[idx] = PetscMax(yv[idx],val[0]);
1850: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1851: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1852: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1853: yv[idx+4] = PetscMax(yv[idx+4],val[4]);
1854: val += 5;
1855: }
1856: #endif
1857: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1858: count--;
1859: }
1860: /* wait on sends */
1861: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
1862: VecRestoreArray(yin,&yv);
1863: return(0);
1864: }
1866: /* --------------------------------------------------------------------------------------*/
1870: PetscErrorCode VecScatterBegin_PtoP_4(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1871: {
1872: VecScatter_MPI_General *gen_to,*gen_from;
1873: PetscScalar *xv,*yv,*val,*svalues;
1874: MPI_Request *rwaits,*swaits;
1875: PetscInt *indices,*sstarts,iend,i,j,nrecvs,nsends,idx,len;
1876: PetscErrorCode ierr;
1879: VecGetArray(xin,&xv);
1880: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1882: if (mode & SCATTER_REVERSE) {
1883: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1884: gen_from = (VecScatter_MPI_General*)ctx->todata;
1885: rwaits = gen_from->rev_requests;
1886: swaits = gen_to->rev_requests;
1887: } else {
1888: gen_to = (VecScatter_MPI_General*)ctx->todata;
1889: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1890: rwaits = gen_from->requests;
1891: swaits = gen_to->requests;
1892: }
1893: svalues = gen_to->values;
1894: nrecvs = gen_from->n;
1895: nsends = gen_to->n;
1896: indices = gen_to->indices;
1897: sstarts = gen_to->starts;
1899: if (!(mode & SCATTER_LOCAL)) {
1901: if (!gen_from->use_readyreceiver && !gen_to->sendfirst) {
1902: /* post receives since they were not posted in VecScatterPostRecvs() */
1903: if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
1904: }
1906: if (ctx->packtogether) {
1907: /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
1908: len = 4*sstarts[nsends];
1909: val = svalues;
1910: for (i=0; i<len; i += 4) {
1911: idx = *indices++;
1912: val[0] = xv[idx];
1913: val[1] = xv[idx+1];
1914: val[2] = xv[idx+2];
1915: val[3] = xv[idx+3];
1916: val += 4;
1917: }
1918: if (nsends) {MPI_Startall_isend(len,nsends,swaits);}
1919: } else {
1920: /* this version packs and sends one at a time, default */
1921: val = svalues;
1922: for (i=0; i<nsends; i++) {
1923: iend = sstarts[i+1]-sstarts[i];
1925: for (j=0; j<iend; j++) {
1926: idx = *indices++;
1927: val[0] = xv[idx];
1928: val[1] = xv[idx+1];
1929: val[2] = xv[idx+2];
1930: val[3] = xv[idx+3];
1931: val += 4;
1932: }
1933: MPI_Start_isend(4*iend,swaits+i);
1934: }
1935: }
1937: if (!gen_from->use_readyreceiver && gen_to->sendfirst) {
1938: /* post receives since they were not posted in VecScatterPostRecvs() */
1939: if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
1940: }
1941: }
1943: /* take care of local scatters */
1944: if (gen_to->local.n) {
1945: PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
1946: PetscInt n = gen_to->local.n,il,ir;
1947: if (addv == INSERT_VALUES) {
1948: if (gen_to->local.is_copy) {
1949: PetscMemcpy(yv+gen_from->local.copy_start,xv+gen_to->local.copy_start,gen_to->local.copy_length);
1950: } else {
1951: for (i=0; i<n; i++) {
1952: il = fslots[i]; ir = tslots[i];
1953: yv[il] = xv[ir];
1954: yv[il+1] = xv[ir+1];
1955: yv[il+2] = xv[ir+2];
1956: yv[il+3] = xv[ir+3];
1957: }
1958: }
1959: } else if (addv == ADD_VALUES) {
1960: for (i=0; i<n; i++) {
1961: il = fslots[i]; ir = tslots[i];
1962: yv[il] += xv[ir];
1963: yv[il+1] += xv[ir+1];
1964: yv[il+2] += xv[ir+2];
1965: yv[il+3] += xv[ir+3];
1966: }
1967: #if !defined(PETSC_USE_COMPLEX)
1968: } else if (addv == MAX_VALUES) {
1969: for (i=0; i<n; i++) {
1970: il = fslots[i]; ir = tslots[i];
1971: yv[il] = PetscMax(yv[il],xv[ir]);
1972: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1973: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1974: yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1975: }
1976: #endif
1977: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1978: }
1979: VecRestoreArray(xin,&xv);
1980: if (xin != yin) {VecRestoreArray(yin,&yv);}
1981: return(0);
1982: }
1984: /* --------------------------------------------------------------------------------------*/
1988: PetscErrorCode VecScatterEnd_PtoP_4(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1989: {
1990: VecScatter_MPI_General *gen_to,*gen_from;
1991: PetscScalar *rvalues,*yv,*val;
1992: PetscErrorCode ierr;
1993: PetscInt nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1994: PetscMPIInt imdex;
1995: MPI_Request *rwaits,*swaits;
1996: MPI_Status *rstatus,*sstatus;
1999: if (mode & SCATTER_LOCAL) return(0);
2000: VecGetArray(yin,&yv);
2002: if (mode & SCATTER_REVERSE) {
2003: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
2004: gen_from = (VecScatter_MPI_General*)ctx->todata;
2005: rwaits = gen_from->rev_requests;
2006: swaits = gen_to->rev_requests;
2007: sstatus = gen_from->sstatus;
2008: rstatus = gen_from->rstatus;
2009: } else {
2010: gen_to = (VecScatter_MPI_General*)ctx->todata;
2011: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
2012: rwaits = gen_from->requests;
2013: swaits = gen_to->requests;
2014: sstatus = gen_to->sstatus;
2015: rstatus = gen_to->rstatus;
2016: }
2017: rvalues = gen_from->values;
2018: nrecvs = gen_from->n;
2019: nsends = gen_to->n;
2020: indices = gen_from->indices;
2021: rstarts = gen_from->starts;
2023: /* wait on receives */
2024: count = nrecvs;
2025: if (ctx->packtogether) { /* receive all messages, then unpack all, when -vecscatter_packtogether used */
2026: if (nrecvs) {MPI_Waitall(nrecvs,rwaits,rstatus);}
2027: n = rstarts[count];
2028: val = rvalues;
2029: lindices = indices;
2030: if (addv == INSERT_VALUES) {
2031: for (i=0; i<n; i++) {
2032: idx = lindices[i];
2033: yv[idx] = val[0];
2034: yv[idx+1] = val[1];
2035: yv[idx+2] = val[2];
2036: yv[idx+3] = val[3];
2037: val += 4;
2038: }
2039: } else if (addv == ADD_VALUES) {
2040: for (i=0; i<n; i++) {
2041: idx = lindices[i];
2042: yv[idx] += val[0];
2043: yv[idx+1] += val[1];
2044: yv[idx+2] += val[2];
2045: yv[idx+3] += val[3];
2046: val += 4;
2047: }
2048: #if !defined(PETSC_USE_COMPLEX)
2049: } else if (addv == MAX_VALUES) {
2050: for (i=0; i<n; i++) {
2051: idx = lindices[i];
2052: yv[idx] = PetscMax(yv[idx],val[0]);
2053: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
2054: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
2055: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
2056: val += 4;
2057: }
2058: #endif
2059: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
2060: } else { /* unpack each message as it arrives, default version */
2061: while (count) {
2062: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus[0]);
2063: /* unpack receives into our local space */
2064: val = rvalues + 4*rstarts[imdex];
2065: lindices = indices + rstarts[imdex];
2066: n = rstarts[imdex+1] - rstarts[imdex];
2067: if (addv == INSERT_VALUES) {
2068: for (i=0; i<n; i++) {
2069: idx = lindices[i];
2070: yv[idx] = val[0];
2071: yv[idx+1] = val[1];
2072: yv[idx+2] = val[2];
2073: yv[idx+3] = val[3];
2074: val += 4;
2075: }
2076: } else if (addv == ADD_VALUES) {
2077: for (i=0; i<n; i++) {
2078: idx = lindices[i];
2079: yv[idx] += val[0];
2080: yv[idx+1] += val[1];
2081: yv[idx+2] += val[2];
2082: yv[idx+3] += val[3];
2083: val += 4;
2084: }
2085: #if !defined(PETSC_USE_COMPLEX)
2086: } else if (addv == MAX_VALUES) {
2087: for (i=0; i<n; i++) {
2088: idx = lindices[i];
2089: yv[idx] = PetscMax(yv[idx],val[0]);
2090: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
2091: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
2092: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
2093: val += 4;
2094: }
2095: #endif
2096: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
2097: count--;
2098: }
2099: }
2101: /* wait on sends */
2102: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
2103: VecRestoreArray(yin,&yv);
2104: return(0);
2105: }
2107: /* --------------------------------------------------------------------------------------*/
2111: PetscErrorCode VecScatterBegin_PtoP_3(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
2112: {
2113: VecScatter_MPI_General *gen_to,*gen_from;
2114: PetscScalar *xv,*yv,*val,*svalues;
2115: MPI_Request *rwaits,*swaits;
2116: PetscErrorCode ierr;
2117: PetscInt i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;
2120: VecGetArray(xin,&xv);
2121: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
2123: if (mode & SCATTER_REVERSE) {
2124: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
2125: gen_from = (VecScatter_MPI_General*)ctx->todata;
2126: rwaits = gen_from->rev_requests;
2127: swaits = gen_to->rev_requests;
2128: } else {
2129: gen_to = (VecScatter_MPI_General*)ctx->todata;
2130: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
2131: rwaits = gen_from->requests;
2132: swaits = gen_to->requests;
2133: }
2134: svalues = gen_to->values;
2135: nrecvs = gen_from->n;
2136: nsends = gen_to->n;
2137: indices = gen_to->indices;
2138: sstarts = gen_to->starts;
2140: if (!(mode & SCATTER_LOCAL)) {
2142: if (gen_to->sendfirst) {
2143: /* this version packs and sends one at a time */
2144: val = svalues;
2145: for (i=0; i<nsends; i++) {
2146: iend = sstarts[i+1]-sstarts[i];
2148: for (j=0; j<iend; j++) {
2149: idx = *indices++;
2150: val[0] = xv[idx];
2151: val[1] = xv[idx+1];
2152: val[2] = xv[idx+2];
2153: val += 3;
2154: }
2155: MPI_Start_isend(3*iend,swaits+i);
2156: }
2157: }
2159: if (!gen_from->use_readyreceiver) {
2160: /* post receives since they were not posted in VecScatterPostRecvs() */
2161: if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
2162: }
2164: if (!gen_to->sendfirst) {
2165: /* this version packs all the messages together and sends */
2166: /*
2167: len = 3*sstarts[nsends];
2168: val = svalues;
2169: for (i=0; i<len; i += 3) {
2170: idx = *indices++;
2171: val[0] = xv[idx];
2172: val[1] = xv[idx+1];
2173: val[2] = xv[idx+2];
2174: val += 3;
2175: }
2176: MPI_Startall_isend(len,nsends,swaits);
2177: */
2179: /* this version packs and sends one at a time */
2180: val = svalues;
2181: for (i=0; i<nsends; i++) {
2182: iend = sstarts[i+1]-sstarts[i];
2184: for (j=0; j<iend; j++) {
2185: idx = *indices++;
2186: val[0] = xv[idx];
2187: val[1] = xv[idx+1];
2188: val[2] = xv[idx+2];
2189: val += 3;
2190: }
2191: MPI_Start_isend(3*iend,swaits+i);
2192: }
2193: }
2194: }
2196: /* take care of local scatters */
2197: if (gen_to->local.n) {
2198: PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
2199: PetscInt n = gen_to->local.n,il,ir;
2200: if (addv == INSERT_VALUES) {
2201: if (gen_to->local.is_copy) {
2202: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
2203: } else {
2204: for (i=0; i<n; i++) {
2205: il = fslots[i]; ir = tslots[i];
2206: yv[il] = xv[ir];
2207: yv[il+1] = xv[ir+1];
2208: yv[il+2] = xv[ir+2];
2209: }
2210: }
2211: } else if (addv == ADD_VALUES) {
2212: for (i=0; i<n; i++) {
2213: il = fslots[i]; ir = tslots[i];
2214: yv[il] += xv[ir];
2215: yv[il+1] += xv[ir+1];
2216: yv[il+2] += xv[ir+2];
2217: }
2218: #if !defined(PETSC_USE_COMPLEX)
2219: } else if (addv == MAX_VALUES) {
2220: for (i=0; i<n; i++) {
2221: il = fslots[i]; ir = tslots[i];
2222: yv[il] = PetscMax(yv[il],xv[ir]);
2223: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
2224: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
2225: }
2226: #endif
2227: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
2228: }
2229: VecRestoreArray(xin,&xv);
2230: if (xin != yin) {VecRestoreArray(yin,&yv);}
2231: return(0);
2232: }
2234: /* --------------------------------------------------------------------------------------*/
2238: PetscErrorCode VecScatterEnd_PtoP_3(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
2239: {
2240: VecScatter_MPI_General *gen_to,*gen_from;
2241: PetscScalar *rvalues,*yv,*val;
2242: PetscErrorCode ierr;
2243: PetscInt nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
2244: PetscMPIInt imdex;
2245: MPI_Request *rwaits,*swaits;
2246: MPI_Status rstatus,*sstatus;
2249: if (mode & SCATTER_LOCAL) return(0);
2250: VecGetArray(yin,&yv);
2252: if (mode & SCATTER_REVERSE) {
2253: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
2254: gen_from = (VecScatter_MPI_General*)ctx->todata;
2255: rwaits = gen_from->rev_requests;
2256: swaits = gen_to->rev_requests;
2257: sstatus = gen_from->sstatus;
2258: } else {
2259: gen_to = (VecScatter_MPI_General*)ctx->todata;
2260: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
2261: rwaits = gen_from->requests;
2262: swaits = gen_to->requests;
2263: sstatus = gen_to->sstatus;
2264: }
2265: rvalues = gen_from->values;
2266: nrecvs = gen_from->n;
2267: nsends = gen_to->n;
2268: indices = gen_from->indices;
2269: rstarts = gen_from->starts;
2271: /* wait on receives */
2272: count = nrecvs;
2273: while (count) {
2274: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
2275: /* unpack receives into our local space */
2276: val = rvalues + 3*rstarts[imdex];
2277: lindices = indices + rstarts[imdex];
2278: n = rstarts[imdex+1] - rstarts[imdex];
2279: if (addv == INSERT_VALUES) {
2280: for (i=0; i<n; i++) {
2281: idx = lindices[i];
2282: yv[idx] = val[0];
2283: yv[idx+1] = val[1];
2284: yv[idx+2] = val[2];
2285: val += 3;
2286: }
2287: } else if (addv == ADD_VALUES) {
2288: for (i=0; i<n; i++) {
2289: idx = lindices[i];
2290: yv[idx] += val[0];
2291: yv[idx+1] += val[1];
2292: yv[idx+2] += val[2];
2293: val += 3;
2294: }
2295: #if !defined(PETSC_USE_COMPLEX)
2296: } else if (addv == MAX_VALUES) {
2297: for (i=0; i<n; i++) {
2298: idx = lindices[i];
2299: yv[idx] = PetscMax(yv[idx],val[0]);
2300: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
2301: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
2302: val += 3;
2303: }
2304: #endif
2305: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
2306: count--;
2307: }
2308: /* wait on sends */
2309: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
2310: VecRestoreArray(yin,&yv);
2311: return(0);
2312: }
2314: /* --------------------------------------------------------------------------------------*/
2318: PetscErrorCode VecScatterBegin_PtoP_2(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
2319: {
2320: VecScatter_MPI_General *gen_to,*gen_from;
2321: PetscScalar *xv,*yv,*val,*svalues;
2322: MPI_Request *rwaits,*swaits;
2323: PetscErrorCode ierr;
2324: PetscInt i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;
2327: VecGetArray(xin,&xv);
2328: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
2329: if (mode & SCATTER_REVERSE) {
2330: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
2331: gen_from = (VecScatter_MPI_General*)ctx->todata;
2332: rwaits = gen_from->rev_requests;
2333: swaits = gen_to->rev_requests;
2334: } else {
2335: gen_to = (VecScatter_MPI_General*)ctx->todata;
2336: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
2337: rwaits = gen_from->requests;
2338: swaits = gen_to->requests;
2339: }
2340: svalues = gen_to->values;
2341: nrecvs = gen_from->n;
2342: nsends = gen_to->n;
2343: indices = gen_to->indices;
2344: sstarts = gen_to->starts;
2346: if (!(mode & SCATTER_LOCAL)) {
2348: if (gen_to->sendfirst) {
2349: /* this version packs and sends one at a time */
2350: val = svalues;
2351: for (i=0; i<nsends; i++) {
2352: iend = sstarts[i+1]-sstarts[i];
2354: for (j=0; j<iend; j++) {
2355: idx = *indices++;
2356: val[0] = xv[idx];
2357: val[1] = xv[idx+1];
2358: val += 2;
2359: }
2360: MPI_Start_isend(2*iend,swaits+i);
2361: }
2362: }
2364: if (!gen_from->use_readyreceiver) {
2365: /* post receives since they were not posted in VecScatterPostRecvs() */
2366: if (nrecvs) {MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);}
2367: }
2369: if (!gen_to->sendfirst) {
2370: /* this version packs all the messages together and sends */
2371: /*
2372: len = 2*sstarts[nsends];
2373: val = svalues;
2374: for (i=0; i<len; i += 2) {
2375: idx = *indices++;
2376: val[0] = xv[idx];
2377: val[1] = xv[idx+1];
2378: val += 2;
2379: }
2380: MPI_Startall_isend(len,nsends,swaits);
2381: */
2383: /* this version packs and sends one at a time */
2384: val = svalues;
2385: for (i=0; i<nsends; i++) {
2386: iend = sstarts[i+1]-sstarts[i];
2388: for (j=0; j<iend; j++) {
2389: idx = *indices++;
2390: val[0] = xv[idx];
2391: val[1] = xv[idx+1];
2392: val += 2;
2393: }
2394: MPI_Start_isend(2*iend,swaits+i);
2395: }
2396: }
2397: }
2399: /* take care of local scatters */
2400: if (gen_to->local.n) {
2401: PetscInt *tslots = gen_to->local.vslots,*fslots = gen_from->local.vslots;
2402: PetscInt n = gen_to->local.n,il,ir;
2403: if (addv == INSERT_VALUES) {
2404: if (gen_to->local.is_copy) {
2405: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
2406: } else {
2407: for (i=0; i<n; i++) {
2408: il = fslots[i]; ir = tslots[i];
2409: yv[il] = xv[ir];
2410: yv[il+1] = xv[ir+1];
2411: }
2412: }
2413: } else if (addv == ADD_VALUES) {
2414: for (i=0; i<n; i++) {
2415: il = fslots[i]; ir = tslots[i];
2416: yv[il] += xv[ir];
2417: yv[il+1] += xv[ir+1];
2418: }
2419: #if !defined(PETSC_USE_COMPLEX)
2420: } else if (addv == MAX_VALUES) {
2421: for (i=0; i<n; i++) {
2422: il = fslots[i]; ir = tslots[i];
2423: yv[il] = PetscMax(yv[il],xv[ir]);
2424: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
2425: }
2426: #endif
2427: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
2428: }
2429: VecRestoreArray(xin,&xv);
2430: if (xin != yin) {VecRestoreArray(yin,&yv);}
2431: return(0);
2432: }
2434: /* --------------------------------------------------------------------------------------*/
2438: PetscErrorCode VecScatterEnd_PtoP_2(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
2439: {
2440: VecScatter_MPI_General *gen_to,*gen_from;
2441: PetscScalar *rvalues,*yv,*val;
2442: PetscErrorCode ierr;
2443: PetscInt nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
2444: PetscMPIInt imdex;
2445: MPI_Request *rwaits,*swaits;
2446: MPI_Status rstatus,*sstatus;
2449: if (mode & SCATTER_LOCAL) return(0);
2450: VecGetArray(yin,&yv);
2452: if (mode & SCATTER_REVERSE) {
2453: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
2454: gen_from = (VecScatter_MPI_General*)ctx->todata;
2455: rwaits = gen_from->rev_requests;
2456: swaits = gen_to->rev_requests;
2457: sstatus = gen_from->sstatus;
2458: } else {
2459: gen_to = (VecScatter_MPI_General*)ctx->todata;
2460: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
2461: rwaits = gen_from->requests;
2462: swaits = gen_to->requests;
2463: sstatus = gen_to->sstatus;
2464: }
2465: rvalues = gen_from->values;
2466: nrecvs = gen_from->n;
2467: nsends = gen_to->n;
2468: indices = gen_from->indices;
2469: rstarts = gen_from->starts;
2471: /* wait on receives */
2472: count = nrecvs;
2473: while (count) {
2474: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
2475: /* unpack receives into our local space */
2476: val = rvalues + 2*rstarts[imdex];
2477: lindices = indices + rstarts[imdex];
2478: n = rstarts[imdex+1] - rstarts[imdex];
2479: if (addv == INSERT_VALUES) {
2480: for (i=0; i<n; i++) {
2481: idx = lindices[i];
2482: yv[idx] = val[0];
2483: yv[idx+1] = val[1];
2484: val += 2;
2485: }
2486: } else if (addv == ADD_VALUES) {
2487: for (i=0; i<n; i++) {
2488: idx = lindices[i];
2489: yv[idx] += val[0];
2490: yv[idx+1] += val[1];
2491: val += 2;
2492: }
2493: #if !defined(PETSC_USE_COMPLEX)
2494: } else if (addv == MAX_VALUES) {
2495: for (i=0; i<n; i++) {
2496: idx = lindices[i];
2497: yv[idx] = PetscMax(yv[idx],val[0]);
2498: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
2499: val += 2;
2500: }
2501: #endif
2502: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
2503: count--;
2504: }
2505: /* wait on sends */
2506: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
2507: VecRestoreArray(yin,&yv);
2508: return(0);
2509: }
2511: /* ---------------------------------------------------------------------------------*/
2515: PetscErrorCode VecScatterDestroy_PtoP_X(VecScatter ctx)
2516: {
2517: VecScatter_MPI_General *gen_to = (VecScatter_MPI_General*)ctx->todata;
2518: VecScatter_MPI_General *gen_from = (VecScatter_MPI_General*)ctx->fromdata;
2519: PetscErrorCode ierr;
2520: PetscInt i;
2523: if (gen_to->use_readyreceiver) {
2524: /*
2525: Since we have already posted sends we must cancel them before freeing
2526: the requests
2527: */
2528: for (i=0; i<gen_from->n; i++) {
2529: MPI_Cancel(gen_from->requests+i);
2530: }
2531: }
2533: if (gen_to->local.vslots) {PetscFree2(gen_to->local.vslots,gen_from->local.vslots);}
2534: if (gen_to->local.slots_nonmatching) {PetscFree2(gen_to->local.slots_nonmatching,gen_from->local.slots_nonmatching);}
2536: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
2537: /*
2538: IBM's PE version of MPI has a bug where freeing these guys will screw up later
2539: message passing.
2540: */
2541: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
2542: for (i=0; i<gen_to->n; i++) {
2543: MPI_Request_free(gen_to->requests + i);
2544: MPI_Request_free(gen_to->rev_requests + i);
2545: }
2547: /*
2548: MPICH could not properly cancel requests thus with ready receiver mode we
2549: cannot free the requests. It may be fixed now, if not then put the following
2550: code inside a if !gen_to->use_readyreceiver) {
2551: */
2552: for (i=0; i<gen_from->n; i++) {
2553: MPI_Request_free(gen_from->requests + i);
2554: MPI_Request_free(gen_from->rev_requests + i);
2555: }
2556: #endif
2557:
2558: PetscFree7(gen_to->values,gen_to->requests,gen_to->indices,gen_to->starts,gen_to->procs,gen_to->sstatus,gen_to->rstatus);
2559: PetscFree2(gen_to->rev_requests,gen_from->rev_requests);
2560: PetscFree5(gen_from->values,gen_from->requests,gen_from->indices,gen_from->starts,gen_from->procs);
2561: PetscFree(gen_to);
2562: PetscFree(gen_from);
2563: PetscHeaderDestroy(ctx);
2564: return(0);
2565: }
2567: /* ==========================================================================================*/
2569: /* create parallel to sequential scatter context */
2570: /*
2571: bs indicates how many elements there are in each block. Normally this would be 1.
2572: */
2575: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,PetscInt *inidx,PetscInt ny,PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2576: {
2577: VecScatter_MPI_General *from,*to;
2578: PetscErrorCode ierr;
2579: PetscMPIInt size,rank,imdex,tag,n;
2580: PetscInt *source,*lens,*owners;
2581: PetscInt *lowner,*start,lengthy;
2582: PetscInt *nprocs,i,j,idx,nsends,nrecvs;
2583: PetscInt *owner,*starts,count,slen;
2584: PetscInt *rvalues,*svalues,base,nmax,*values,*indx,nprocslocal,lastidx;
2585: MPI_Comm comm;
2586: MPI_Request *send_waits,*recv_waits;
2587: MPI_Status recv_status,*send_status;
2588: PetscMap map;
2589: #if defined(PETSC_DEBUG)
2590: PetscTruth found = PETSC_FALSE;
2591: #endif
2592:
2594: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2595: PetscObjectGetComm((PetscObject)xin,&comm);
2596: MPI_Comm_rank(comm,&rank);
2597: MPI_Comm_size(comm,&size);
2598: VecGetPetscMap(xin,&map);
2599: PetscMapGetGlobalRange(map,&owners);
2600: VecGetSize(yin,&lengthy);
2602: /* first count number of contributors to each processor */
2603: PetscMalloc2(2*size,PetscInt,&nprocs,nx,PetscInt,&owner);
2604: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
2605: j = 0;
2606: lastidx = -1;
2607: for (i=0; i<nx; i++) {
2608: /* if indices are NOT locally sorted, need to start search at the beginning */
2609: if (lastidx > (idx = inidx[i])) j = 0;
2610: lastidx = idx;
2611: for (; j<size; j++) {
2612: if (idx >= owners[j] && idx < owners[j+1]) {
2613: nprocs[2*j]++;
2614: nprocs[2*j+1] = 1;
2615: owner[i] = j;
2616: #if defined(PETSC_DEBUG)
2617: found = PETSC_TRUE;
2618: #endif
2619: break;
2620: }
2621: }
2622: #if defined(PETSC_DEBUG)
2623: if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2624: found = PETSC_FALSE;
2625: #endif
2626: }
2627: nprocslocal = nprocs[2*rank];
2628: nprocs[2*rank] = nprocs[2*rank+1] = 0;
2629: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
2631: /* inform other processors of number of messages and max length*/
2632: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
2634: /* post receives: */
2635: PetscMalloc4(nrecvs*nmax,PetscInt,&rvalues,nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source,nrecvs,MPI_Request,&recv_waits);
2636: for (i=0; i<nrecvs; i++) {
2637: MPI_Irecv((rvalues+nmax*i),nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2638: }
2640: /* do sends:
2641: 1) starts[i] gives the starting index in svalues for stuff going to
2642: the ith processor
2643: */
2644: PetscMalloc3(nx,PetscInt,&svalues,nsends,MPI_Request,&send_waits,size+1,PetscInt,&starts);
2645: starts[0] = 0;
2646: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2647: for (i=0; i<nx; i++) {
2648: if (owner[i] != rank) {
2649: svalues[starts[owner[i]]++] = inidx[i];
2650: }
2651: }
2653: starts[0] = 0;
2654: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2655: count = 0;
2656: for (i=0; i<size; i++) {
2657: if (nprocs[2*i+1]) {
2658: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
2659: }
2660: }
2662: /* wait on receives */
2663: count = nrecvs;
2664: slen = 0;
2665: while (count) {
2666: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2667: /* unpack receives into our local space */
2668: MPI_Get_count(&recv_status,MPIU_INT,&n);
2669: source[imdex] = recv_status.MPI_SOURCE;
2670: lens[imdex] = n;
2671: slen += n;
2672: count--;
2673: }
2674:
2675: /* allocate entire send scatter context */
2676: PetscNew(VecScatter_MPI_General,&to);
2677: PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst);
2678: to->n = nrecvs;
2679: PetscMalloc7(bs*slen,PetscScalar,&to->values,nrecvs,MPI_Request,&to->requests,slen,PetscInt,&to->indices,nrecvs+1,PetscInt,&to->starts,
2680: nrecvs,PetscMPIInt,&to->procs,PetscMax(to->n,nsends),MPI_Status,&to->sstatus,PetscMax(to->n,nsends),MPI_Status,
2681: &to->rstatus);
2682: ctx->todata = (void*)to;
2683: to->starts[0] = 0;
2685: if (nrecvs) {
2686: PetscMalloc(nrecvs*sizeof(PetscInt),&indx);
2687: for (i=0; i<nrecvs; i++) indx[i] = i;
2688: PetscSortIntWithPermutation(nrecvs,source,indx);
2690: /* move the data into the send scatter */
2691: base = owners[rank];
2692: for (i=0; i<nrecvs; i++) {
2693: to->starts[i+1] = to->starts[i] + lens[indx[i]];
2694: to->procs[i] = source[indx[i]];
2695: values = rvalues + indx[i]*nmax;
2696: for (j=0; j<lens[indx[i]]; j++) {
2697: to->indices[to->starts[i] + j] = values[j] - base;
2698: }
2699: }
2700: PetscFree(indx);
2701: }
2702: PetscFree4(rvalues,lens,source,recv_waits);
2703:
2704: /* allocate entire receive scatter context */
2705: PetscNew(VecScatter_MPI_General,&from);
2706: PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&from->sendfirst);
2707: from->n = nsends;
2709: PetscMalloc5(ny*bs,PetscScalar,&from->values,nsends,MPI_Request,&from->requests,ny,PetscInt,&from->indices,
2710: nsends+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
2711: ctx->fromdata = (void*)from;
2713: /* move data into receive scatter */
2714: PetscMalloc2(size,PetscInt,&lowner,nsends+1,PetscInt,&start);
2715: count = 0; from->starts[0] = start[0] = 0;
2716: for (i=0; i<size; i++) {
2717: if (nprocs[2*i+1]) {
2718: lowner[i] = count;
2719: from->procs[count++] = i;
2720: from->starts[count] = start[count] = start[count-1] + nprocs[2*i];
2721: }
2722: }
2723: for (i=0; i<nx; i++) {
2724: if (owner[i] != rank) {
2725: from->indices[start[lowner[owner[i]]]++] = inidy[i];
2726: if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2727: }
2728: }
2729: PetscFree2(lowner,start);
2730: PetscFree2(nprocs,owner);
2731:
2732: /* wait on sends */
2733: if (nsends) {
2734: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
2735: MPI_Waitall(nsends,send_waits,send_status);
2736: PetscFree(send_status);
2737: }
2738: PetscFree3(svalues,send_waits,starts);
2740: if (nprocslocal) {
2741: PetscInt nt = from->local.n = to->local.n = nprocslocal;
2742: /* we have a scatter to ourselves */
2743: PetscMalloc2(nt,PetscInt,&to->local.vslots,nt,PetscInt,&from->local.vslots);
2744: nt = 0;
2745: for (i=0; i<nx; i++) {
2746: idx = inidx[i];
2747: if (idx >= owners[rank] && idx < owners[rank+1]) {
2748: to->local.vslots[nt] = idx - owners[rank];
2749: from->local.vslots[nt++] = inidy[i];
2750: if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2751: }
2752: }
2753: } else {
2754: from->local.n = 0;
2755: from->local.vslots = 0;
2756: to->local.n = 0;
2757: to->local.vslots = 0;
2758: }
2759: from->local.nonmatching_computed = PETSC_FALSE;
2760: from->local.n_nonmatching = 0;
2761: from->local.slots_nonmatching = 0;
2762: to->local.nonmatching_computed = PETSC_FALSE;
2763: to->local.n_nonmatching = 0;
2764: to->local.slots_nonmatching = 0;
2766: to->type = VEC_SCATTER_MPI_GENERAL;
2767: from->type = VEC_SCATTER_MPI_GENERAL;
2769: from->bs = bs;
2770: to->bs = bs;
2771: if (bs > 1) {
2772: PetscTruth flg,flgs = PETSC_FALSE;
2773: PetscInt *sstarts = to->starts, *rstarts = from->starts;
2774: PetscMPIInt *sprocs = to->procs, *rprocs = from->procs;
2775: MPI_Request *swaits = to->requests,*rwaits = from->requests;
2776: MPI_Request *rev_swaits,*rev_rwaits;
2777: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
2779: tag = ctx->tag;
2780: comm = ctx->comm;
2782: /* allocate additional wait variables for the "reverse" scatter */
2783: PetscMalloc2(nrecvs,MPI_Request,&rev_rwaits,nsends,MPI_Request,&rev_swaits);
2784: to->rev_requests = rev_rwaits;
2785: from->rev_requests = rev_swaits;
2787: /* Register the receives that you will use later (sends for scatter reverse) */
2788: PetscOptionsHasName(PETSC_NULL,"-vecscatter_ssend",&flgs);
2789: if (flgs) {
2790: PetscLogInfo((0,"VecScatterCreate_PtoS:Using VecScatter Ssend mode\n"));
2791: }
2792: for (i=0; i<from->n; i++) {
2793: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2794: if (!flgs) {
2795: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
2796: } else {
2797: MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
2798: }
2799: }
2801: PetscOptionsHasName(PETSC_NULL,"-vecscatter_rr",&flg);
2802: if (flg) {
2803: ctx->postrecvs = VecScatterPostRecvs_PtoP_X;
2804: to->use_readyreceiver = PETSC_TRUE;
2805: from->use_readyreceiver = PETSC_TRUE;
2806: for (i=0; i<to->n; i++) {
2807: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2808: }
2809: PetscLogInfo((0,"VecScatterCreate_PtoS:Using VecScatter ready receiver mode\n"));
2810: } else {
2811: ctx->postrecvs = 0;
2812: to->use_readyreceiver = PETSC_FALSE;
2813: from->use_readyreceiver = PETSC_FALSE;
2814: for (i=0; i<to->n; i++) {
2815: if (!flgs) {
2816: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2817: } else {
2818: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2819: }
2820: }
2821: }
2822: /* Register receives for scatter reverse */
2823: for (i=0; i<to->n; i++) {
2824: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
2825: }
2827: PetscLogInfo((0,"VecScatterCreate_PtoS:Using blocksize %D scatter\n",bs));
2828: ctx->destroy = VecScatterDestroy_PtoP_X;
2829: ctx->copy = VecScatterCopy_PtoP_X;
2830: switch (bs) {
2831: case 12:
2832: ctx->begin = VecScatterBegin_PtoP_12;
2833: ctx->end = VecScatterEnd_PtoP_12;
2834: break;
2835: case 8:
2836: ctx->begin = VecScatterBegin_PtoP_8;
2837: ctx->end = VecScatterEnd_PtoP_8;
2838: break;
2839: case 7:
2840: ctx->begin = VecScatterBegin_PtoP_7;
2841: ctx->end = VecScatterEnd_PtoP_7;
2842: break;
2843: case 6:
2844: ctx->begin = VecScatterBegin_PtoP_6;
2845: ctx->end = VecScatterEnd_PtoP_6;
2846: break;
2847: case 5:
2848: ctx->begin = VecScatterBegin_PtoP_5;
2849: ctx->end = VecScatterEnd_PtoP_5;
2850: break;
2851: case 4:
2852: ctx->begin = VecScatterBegin_PtoP_4;
2853: ctx->end = VecScatterEnd_PtoP_4;
2854: break;
2855: case 3:
2856: ctx->begin = VecScatterBegin_PtoP_3;
2857: ctx->end = VecScatterEnd_PtoP_3;
2858: break;
2859: case 2:
2860: ctx->begin = VecScatterBegin_PtoP_2;
2861: ctx->end = VecScatterEnd_PtoP_2;
2862: break;
2863: default:
2864: SETERRQ(PETSC_ERR_SUP,"Blocksize not supported");
2865: }
2866: } else {
2867: PetscLogInfo((0,"VecScatterCreate_PtoS:Using nonblocked scatter\n"));
2868: ctx->postrecvs = 0;
2869: ctx->destroy = VecScatterDestroy_PtoP;
2870: ctx->begin = VecScatterBegin_PtoP;
2871: ctx->end = VecScatterEnd_PtoP;
2872: ctx->copy = VecScatterCopy_PtoP;
2873: }
2874: ctx->view = VecScatterView_MPI;
2876: /* Check if the local scatter is actually a copy; important special case */
2877: if (nprocslocal) {
2878: VecScatterLocalOptimizeCopy_Private(&to->local,&from->local,bs);
2879: }
2880: return(0);
2881: }
2883: /* ------------------------------------------------------------------------------------*/
2884: /*
2885: Scatter from local Seq vectors to a parallel vector.
2886: */
2889: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,PetscInt *inidx,PetscInt ny,PetscInt *inidy,Vec yin,PetscInt bs,VecScatter ctx)
2890: {
2891: VecScatter_MPI_General *from,*to;
2892: PetscInt *source,nprocslocal,*lens,*owners = yin->map->range;
2893: PetscMPIInt rank = yin->stash.rank,size = yin->stash.size,tag,imdex,n;
2894: PetscErrorCode ierr;
2895: PetscInt *lowner,*start;
2896: PetscInt *nprocs,i,j,idx,nsends,nrecvs;
2897: PetscInt *owner,*starts,count,slen;
2898: PetscInt *rvalues,*svalues,base,nmax,*values,lastidx;
2899: MPI_Comm comm = yin->comm;
2900: MPI_Request *send_waits,*recv_waits;
2901: MPI_Status recv_status,*send_status;
2902: #if defined(PETSC_DEBUG)
2903: PetscTruth found = PETSC_FALSE;
2904: #endif
2907: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2908: PetscMalloc5(2*size,PetscInt,&nprocs,nx,PetscInt,&owner,size,PetscInt,&lowner,size,PetscInt,&start,size+1,PetscInt,&starts);
2910: /* count number of contributors to each processor */
2911: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
2912: j = 0;
2913: lastidx = -1;
2914: for (i=0; i<nx; i++) {
2915: /* if indices are NOT locally sorted, need to start search at the beginning */
2916: if (lastidx > (idx = inidy[i])) j = 0;
2917: lastidx = idx;
2918: for (; j<size; j++) {
2919: if (idx >= owners[j] && idx < owners[j+1]) {
2920: nprocs[2*j]++;
2921: nprocs[2*j+1] = 1;
2922: owner[i] = j;
2923: #if defined(PETSC_DEBUG)
2924: found = PETSC_TRUE;
2925: #endif
2926: break;
2927: }
2928: }
2929: #if defined(PETSC_DEBUG)
2930: if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2931: found = PETSC_FALSE;
2932: #endif
2933: }
2934: nprocslocal = nprocs[2*rank];
2935: nprocs[2*rank] = nprocs[2*rank+1] = 0;
2936: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
2938: /* inform other processors of number of messages and max length*/
2939: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
2941: /* post receives: */
2942: PetscMalloc6(nrecvs*nmax,PetscInt,&rvalues,nrecvs,MPI_Request,&recv_waits,nx,PetscInt,&svalues,nsends,MPI_Request,&send_waits,nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
2944: for (i=0; i<nrecvs; i++) {
2945: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2946: }
2948: /* do sends:
2949: 1) starts[i] gives the starting index in svalues for stuff going to
2950: the ith processor
2951: */
2953: starts[0] = 0;
2954: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2955: for (i=0; i<nx; i++) {
2956: if (owner[i] != rank) {
2957: svalues[starts[owner[i]]++] = inidy[i];
2958: }
2959: }
2961: /* reset starts because it is destroyed above */
2962: starts[0] = 0;
2963: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2964: count = 0;
2965: for (i=0; i<size; i++) {
2966: if (nprocs[2*i+1]) {
2967: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count);
2968: count++;
2969: }
2970: }
2972: /* allocate entire send scatter context */
2973: PetscNew(VecScatter_MPI_General,&to);
2974: ctx->todata = (void*)to;
2975: to->sendfirst = PETSC_FALSE;
2976: PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst);
2977: to->n = nsends;
2978:
2979: PetscMalloc7(ny,PetscScalar,&to->values,nsends,MPI_Request,&to->requests,ny,PetscInt,&to->indices,nsends+1,PetscInt,&to->starts,
2980: nsends,PetscMPIInt,&to->procs,PetscMax(to->n,nrecvs),MPI_Status,&to->sstatus,PetscMax(to->n,nrecvs),MPI_Status,
2981: &to->rstatus);
2983: /* move data into send scatter context */
2984: count = 0;
2985: to->starts[0] = start[0] = 0;
2986: for (i=0; i<size; i++) {
2987: if (nprocs[2*i+1]) {
2988: lowner[i] = count;
2989: to->procs[count++] = i;
2990: to->starts[count] = start[count] = start[count-1] + nprocs[2*i];
2991: }
2992: }
2993: for (i=0; i<nx; i++) {
2994: if (owner[i] != rank) {
2995: to->indices[start[lowner[owner[i]]]++] = inidx[i];
2996: }
2997: }
2998: PetscFree5(nprocs,owner,lowner,start,starts);
3000: /* wait on receives */
3001: count = nrecvs;
3002: slen = 0;
3003: while (count) {
3004: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3005: /* unpack receives into our local space */
3006: MPI_Get_count(&recv_status,MPIU_INT,&n);
3007: source[imdex] = recv_status.MPI_SOURCE;
3008: lens[imdex] = n;
3009: slen += n;
3010: count--;
3011: }
3012:
3013: /* allocate entire receive scatter context */
3014: PetscNew(VecScatter_MPI_General,&from);
3015: PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&from->sendfirst);
3016: from->n = nrecvs;
3018: PetscMalloc5(slen,PetscScalar,&from->values,nrecvs,MPI_Request,&from->requests,slen,PetscInt,&from->indices,
3019: nrecvs+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
3020: ctx->fromdata = (void*)from;
3022: /* move the data into the receive scatter context*/
3023: base = owners[rank];
3024: from->starts[0] = 0;
3025: for (i=0; i<nrecvs; i++) {
3026: from->starts[i+1] = from->starts[i] + lens[i];
3027: from->procs[i] = source[i];
3028: values = rvalues + i*nmax;
3029: for (j=0; j<lens[i]; j++) {
3030: from->indices[from->starts[i] + j] = values[j] - base;
3031: }
3032: }
3033:
3034: /* wait on sends */
3035: if (nsends) {
3036: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
3037: MPI_Waitall(nsends,send_waits,send_status);
3038: PetscFree(send_status);
3039: }
3040: PetscFree6(rvalues,recv_waits,svalues,send_waits,lens,source);
3042: if (nprocslocal) {
3043: /* we have a scatter to ourselves */
3044: PetscInt nt = from->local.n = to->local.n = nprocslocal;
3045: PetscMalloc2(nt,PetscInt,&to->local.vslots,nt,PetscInt,&from->local.vslots);
3046: nt = 0;
3047: for (i=0; i<ny; i++) {
3048: idx = inidy[i];
3049: if (idx >= owners[rank] && idx < owners[rank+1]) {
3050: from->local.vslots[nt] = idx - owners[rank];
3051: to->local.vslots[nt++] = inidx[i];
3052: }
3053: }
3054: } else {
3055: from->local.n = 0;
3056: from->local.vslots = 0;
3057: to->local.n = 0;
3058: to->local.vslots = 0;
3060: }
3061: from->local.nonmatching_computed = PETSC_FALSE;
3062: from->local.n_nonmatching = 0;
3063: from->local.slots_nonmatching = 0;
3064: to->local.nonmatching_computed = PETSC_FALSE;
3065: to->local.n_nonmatching = 0;
3066: to->local.slots_nonmatching = 0;
3068: to->type = VEC_SCATTER_MPI_GENERAL;
3069: from->type = VEC_SCATTER_MPI_GENERAL;
3071: if (bs > 1) {
3072: PetscLogInfo((0,"VecScatterCreate_StoP:Using blocksize %D scatter\n",bs));
3073: ctx->copy = VecScatterCopy_PtoP_X;
3074: switch (bs) {
3075: case 12:
3076: ctx->begin = VecScatterBegin_PtoP_12;
3077: ctx->end = VecScatterEnd_PtoP_12;
3078: break;
3079: case 8:
3080: ctx->begin = VecScatterBegin_PtoP_8;
3081: ctx->end = VecScatterEnd_PtoP_8;
3082: break;
3083: case 7:
3084: ctx->begin = VecScatterBegin_PtoP_7;
3085: ctx->end = VecScatterEnd_PtoP_7;
3086: break;
3087: case 6:
3088: ctx->begin = VecScatterBegin_PtoP_6;
3089: ctx->end = VecScatterEnd_PtoP_6;
3090: break;
3091: case 5:
3092: ctx->begin = VecScatterBegin_PtoP_5;
3093: ctx->end = VecScatterEnd_PtoP_5;
3094: break;
3095: case 4:
3096: ctx->begin = VecScatterBegin_PtoP_4;
3097: ctx->end = VecScatterEnd_PtoP_4;
3098: break;
3099: case 3:
3100: ctx->begin = VecScatterBegin_PtoP_3;
3101: ctx->end = VecScatterEnd_PtoP_3;
3102: break;
3103: case 2:
3104: ctx->begin = VecScatterBegin_PtoP_2;
3105: ctx->end = VecScatterEnd_PtoP_2;
3106: break;
3107: default:
3108: SETERRQ(PETSC_ERR_SUP,"Blocksize not supported");
3109: }
3110: } else {
3111: PetscLogInfo((0,"VecScatterCreate_StoP:Using nonblocked scatter\n"));
3112: ctx->begin = VecScatterBegin_PtoP;
3113: ctx->end = VecScatterEnd_PtoP;
3114: ctx->copy = VecScatterCopy_PtoP;
3115: }
3116: ctx->destroy = VecScatterDestroy_PtoP;
3117: ctx->postrecvs = 0;
3118: ctx->view = VecScatterView_MPI;
3120: to->bs = bs;
3121: from->bs = bs;
3123: /* Check if the local scatter is actually a copy; important special case */
3124: if (nprocslocal) {
3125: VecScatterLocalOptimizeCopy_Private(&to->local,&from->local,bs);
3126: }
3127: return(0);
3128: }
3130: /* ---------------------------------------------------------------------------------*/
3133: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,PetscInt *inidx,PetscInt ny,PetscInt *inidy,Vec xin,Vec yin,VecScatter ctx)
3134: {
3136: PetscMPIInt size,rank,tag,imdex,n;
3137: PetscInt *lens,*owners = xin->map->range;
3138: PetscInt *nprocs,i,j,idx,nsends,nrecvs,*local_inidx,*local_inidy;
3139: PetscInt *owner,*starts,count,slen;
3140: PetscInt *rvalues,*svalues,base,nmax,*values,lastidx;
3141: MPI_Comm comm;
3142: MPI_Request *send_waits,*recv_waits;
3143: MPI_Status recv_status,*send_status;
3144: PetscTruth duplicate = PETSC_FALSE;
3145: #if defined(PETSC_DEBUG)
3146: PetscTruth found = PETSC_FALSE;
3147: #endif
3150: PetscObjectGetNewTag((PetscObject)ctx,&tag);
3151: PetscObjectGetComm((PetscObject)xin,&comm);
3152: MPI_Comm_size(comm,&size);
3153: MPI_Comm_rank(comm,&rank);
3154: if (size == 1) {
3155: VecScatterCreate_StoP(nx,inidx,ny,inidy,yin,1,ctx);
3156: return(0);
3157: }
3159: /*
3160: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
3161: They then call the StoPScatterCreate()
3162: */
3163: /* first count number of contributors to each processor */
3164: PetscMalloc3(2*size,PetscInt,&nprocs,nx,PetscInt,&owner,(size+1),PetscInt,&starts);
3165: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
3166: lastidx = -1;
3167: j = 0;
3168: for (i=0; i<nx; i++) {
3169: /* if indices are NOT locally sorted, need to start search at the beginning */
3170: if (lastidx > (idx = inidx[i])) j = 0;
3171: lastidx = idx;
3172: for (; j<size; j++) {
3173: if (idx >= owners[j] && idx < owners[j+1]) {
3174: nprocs[2*j]++;
3175: nprocs[2*j+1] = 1;
3176: owner[i] = j;
3177: #if defined(PETSC_DEBUG)
3178: found = PETSC_TRUE;
3179: #endif
3180: break;
3181: }
3182: }
3183: #if defined(PETSC_DEBUG)
3184: if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
3185: found = PETSC_FALSE;
3186: #endif
3187: }
3188: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
3190: /* inform other processors of number of messages and max length*/
3191: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
3193: /* post receives: */
3194: PetscMalloc6(2*nrecvs*nmax,PetscInt,&rvalues,2*nx,PetscInt,&svalues,2*nrecvs,PetscInt,&lens,nrecvs,MPI_Request,&recv_waits,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);
3196: for (i=0; i<nrecvs; i++) {
3197: MPI_Irecv(rvalues+2*nmax*i,2*nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3198: }
3200: /* do sends:
3201: 1) starts[i] gives the starting index in svalues for stuff going to
3202: the ith processor
3203: */
3204: starts[0]= 0;
3205: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
3206: for (i=0; i<nx; i++) {
3207: svalues[2*starts[owner[i]]] = inidx[i];
3208: svalues[1 + 2*starts[owner[i]]++] = inidy[i];
3209: }
3211: starts[0] = 0;
3212: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
3213: count = 0;
3214: for (i=0; i<size; i++) {
3215: if (nprocs[2*i+1]) {
3216: MPI_Isend(svalues+2*starts[i],2*nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count);
3217: count++;
3218: }
3219: }
3220: PetscFree3(nprocs,owner,starts);
3222: /* wait on receives */
3223: count = nrecvs;
3224: slen = 0;
3225: while (count) {
3226: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3227: /* unpack receives into our local space */
3228: MPI_Get_count(&recv_status,MPIU_INT,&n);
3229: lens[imdex] = n/2;
3230: slen += n/2;
3231: count--;
3232: }
3233:
3234: PetscMalloc2(slen,PetscInt,&local_inidx,slen,PetscInt,&local_inidy);
3235: base = owners[rank];
3236: count = 0;
3237: for (i=0; i<nrecvs; i++) {
3238: values = rvalues + 2*i*nmax;
3239: for (j=0; j<lens[i]; j++) {
3240: local_inidx[count] = values[2*j] - base;
3241: local_inidy[count++] = values[2*j+1];
3242: }
3243: }
3245: /* wait on sends */
3246: if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
3247: PetscFree6(rvalues,svalues,lens,recv_waits,send_waits,send_status);
3249: /*
3250: should sort and remove duplicates from local_inidx,local_inidy
3251: */
3253: #if defined(do_it_slow)
3254: /* sort on the from index */
3255: PetscSortIntWithArray(slen,local_inidx,local_inidy);
3256: start = 0;
3257: while (start < slen) {
3258: count = start+1;
3259: last = local_inidx[start];
3260: while (count < slen && last == local_inidx[count]) count++;
3261: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
3262: /* sort on to index */
3263: PetscSortInt(count-start,local_inidy+start);
3264: }
3265: /* remove duplicates; not most efficient way, but probably good enough */
3266: i = start;
3267: while (i < count-1) {
3268: if (local_inidy[i] != local_inidy[i+1]) {
3269: i++;
3270: } else { /* found a duplicate */
3271: duplicate = PETSC_TRUE;
3272: for (j=i; j<slen-1; j++) {
3273: local_inidx[j] = local_inidx[j+1];
3274: local_inidy[j] = local_inidy[j+1];
3275: }
3276: slen--;
3277: count--;
3278: }
3279: }
3280: start = count;
3281: }
3282: #endif
3283: if (duplicate) {
3284: PetscLogInfo((0,"VecScatterCreate_PtoP:Duplicate to from indices passed in VecScatterCreate(), they are ignored\n"));
3285: }
3286: VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,yin,1,ctx);
3287: PetscFree2(local_inidx,local_inidy);
3288: return(0);
3289: }