Actual source code: vpscat.c
1: /*$Id: vpscat.c,v 1.165 2001/09/08 13:16:48 bsmith Exp $*/
2: /*
3: Defines parallel vector scatters.
4: */
6: #include src/vec/is/isimpl.h
7: #include src/vec/vecimpl.h
8: #include src/vec/impls/dvecimpl.h
9: #include src/vec/impls/mpi/pvecimpl.h
10: #include petscsys.h
12: int VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
13: {
14: VecScatter_MPI_General *to=(VecScatter_MPI_General*)ctx->todata;
15: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
16: int i,rank,ierr;
17: PetscViewerFormat format;
18: PetscTruth isascii;
21: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
22: if (isascii) {
23: MPI_Comm_rank(ctx->comm,&rank);
24: PetscViewerGetFormat(viewer,&format);
25: if (format == PETSC_VIEWER_ASCII_INFO) {
26: int nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
28: MPI_Reduce(&to->n,&nsend_max,1,MPI_INT,MPI_MAX,0,ctx->comm);
29: MPI_Reduce(&from->n,&nrecv_max,1,MPI_INT,MPI_MAX,0,ctx->comm);
30: itmp = to->starts[to->n+1];
31: MPI_Reduce(&itmp,&lensend_max,1,MPI_INT,MPI_MAX,0,ctx->comm);
32: itmp = from->starts[from->n+1];
33: MPI_Reduce(&itmp,&lenrecv_max,1,MPI_INT,MPI_MAX,0,ctx->comm);
34: MPI_Reduce(&itmp,&alldata,1,MPI_INT,MPI_SUM,0,ctx->comm);
36: PetscViewerASCIIPrintf(viewer,"VecScatter statisticsn");
37: PetscViewerASCIIPrintf(viewer," Maximum number sends %dn",nsend_max);
38: PetscViewerASCIIPrintf(viewer," Maximum number receives %dn",nrecv_max);
39: PetscViewerASCIIPrintf(viewer," Maximum data sent %dn",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
40: PetscViewerASCIIPrintf(viewer," Maximum data received %dn",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
41: PetscViewerASCIIPrintf(viewer," Total data sent %dn",(int)(alldata*to->bs*sizeof(PetscScalar)));
43: } else {
44: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %d; Number to self = %dn",rank,to->n,to->local.n);
45: if (to->n) {
46: for (i=0; i<to->n; i++){
47: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d length = %d to whom %dn",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
48: }
49: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)n");
50: for (i=0; i<to->starts[to->n]; i++){
51: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%d n",rank,to->indices[i]);
52: }
53: }
55: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Number receives = %d; Number from self = %dn",rank,from->n,from->local.n);
56: if (from->n) {
57: for (i=0; i<from->n; i++){
58: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d length %d from whom %dn",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
59: }
61: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)n");
62: for (i=0; i<from->starts[from->n]; i++){
63: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%d n",rank,from->indices[i]);
64: }
65: }
66: if (to->local.n) {
67: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Indices for local part of scattern",rank);
68: for (i=0; i<to->local.n; i++){
69: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]From %d to %d n",rank,from->local.slots[i],to->local.slots[i]);
70: }
71: }
73: PetscViewerFlush(viewer);
74: }
75: } else {
76: SETERRQ1(1,"Viewer type %s not supported for this scatter",((PetscObject)viewer)->type_name);
77: }
78: return(0);
79: }
81: /* -----------------------------------------------------------------------------------*/
82: /*
83: The next routine determines what part of the local part of the scatter is an
84: exact copy of values into their current location. We check this here and
85: then know that we need not perform that portion of the scatter.
86: */
87: int VecScatterLocalOptimize_Private(VecScatter_Seq_General *gen_to,VecScatter_Seq_General *gen_from)
88: {
89: int n = gen_to->n,n_nonmatching = 0,i,*to_slots = gen_to->slots,*from_slots = gen_from->slots;
90: int *nto_slots,*nfrom_slots,j = 0,ierr;
91:
93: for (i=0; i<n; i++) {
94: if (to_slots[i] != from_slots[i]) n_nonmatching++;
95: }
97: if (!n_nonmatching) {
98: gen_to->nonmatching_computed = PETSC_TRUE;
99: gen_to->n_nonmatching = gen_from->n_nonmatching = 0;
100: PetscLogInfo(0,"VecScatterLocalOptimize_Private:Reduced %d to 0n", n);
101: } else if (n_nonmatching == n) {
102: gen_to->nonmatching_computed = PETSC_FALSE;
103: PetscLogInfo(0,"VecScatterLocalOptimize_Private:All values non-matchingn");
104: } else {
105: gen_to->nonmatching_computed= PETSC_TRUE;
106: gen_to->n_nonmatching = gen_from->n_nonmatching = n_nonmatching;
107: PetscMalloc(n_nonmatching*sizeof(int),&nto_slots);
108: gen_to->slots_nonmatching = nto_slots;
109: PetscMalloc(n_nonmatching*sizeof(int),&nfrom_slots);
110: gen_from->slots_nonmatching = nfrom_slots;
111: for (i=0; i<n; i++) {
112: if (to_slots[i] != from_slots[i]) {
113: nto_slots[j] = to_slots[i];
114: nfrom_slots[j] = from_slots[i];
115: j++;
116: }
117: }
118: PetscLogInfo(0,"VecScatterLocalOptimize_Private:Reduced %d to %dn",n,n_nonmatching);
119: }
120: return(0);
121: }
123: /* --------------------------------------------------------------------------------------*/
124: int VecScatterCopy_PtoP(VecScatter in,VecScatter out)
125: {
126: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
127: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
128: int len,ny,ierr;
131: out->postrecvs = in->postrecvs;
132: out->begin = in->begin;
133: out->end = in->end;
134: out->copy = in->copy;
135: out->destroy = in->destroy;
136: out->view = in->view;
138: /* allocate entire send scatter context */
139: PetscNew(VecScatter_MPI_General,&out_to);
140: PetscMemzero(out_to,sizeof(VecScatter_MPI_General));
141: PetscLogObjectMemory(out,sizeof(VecScatter_MPI_General));
142: ny = in_to->starts[in_to->n];
143: len = ny*(sizeof(int) + sizeof(PetscScalar)) + (in_to->n+1)*sizeof(int) +
144: (in_to->n)*(sizeof(int) + sizeof(MPI_Request));
145: out_to->n = in_to->n;
146: out_to->type = in_to->type;
147: out_to->sendfirst = in_to->sendfirst;
148: PetscMalloc(len,&out_to->values);
149: PetscLogObjectMemory(out,len);
150: out_to->requests = (MPI_Request*)(out_to->values + ny);
151: out_to->indices = (int*)(out_to->requests + out_to->n);
152: out_to->starts = (int*)(out_to->indices + ny);
153: out_to->procs = (int*)(out_to->starts + out_to->n + 1);
154: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(int));
155: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(int));
156: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(int));
157: PetscMalloc(2*(PetscMax(in_to->n,in_from->n)+1)*sizeof(MPI_Status),&out_to->sstatus);
158: out_to->rstatus = out_to->rstatus + PetscMax(in_to->n,in_from->n) + 1;
159: PetscLogObjectMemory(out,2*(PetscMax(in_to->n,in_from->n) + 1)*sizeof(MPI_Status));
160: out->todata = (void*)out_to;
161: out_to->local.n = in_to->local.n;
162: out_to->local.nonmatching_computed = PETSC_FALSE;
163: out_to->local.n_nonmatching = 0;
164: out_to->local.slots_nonmatching = 0;
165: if (in_to->local.n) {
166: PetscMalloc(in_to->local.n*sizeof(int),&out_to->local.slots);
167: PetscMemcpy(out_to->local.slots,in_to->local.slots,in_to->local.n*sizeof(int));
168: PetscLogObjectMemory(out,in_to->local.n*sizeof(int));
169: } else {
170: out_to->local.slots = 0;
171: }
173: /* allocate entire receive context */
174: PetscNew(VecScatter_MPI_General,&out_from);
175: PetscMemzero(out_from,sizeof(VecScatter_MPI_General));
176: out_from->type = in_from->type;
177: PetscLogObjectMemory(out,sizeof(VecScatter_MPI_General));
178: ny = in_from->starts[in_from->n];
179: len = ny*(sizeof(int) + sizeof(PetscScalar)) + (in_from->n+1)*sizeof(int) +
180: (in_from->n)*(sizeof(int) + sizeof(MPI_Request));
181: out_from->n = in_from->n;
182: out_from->sendfirst = in_from->sendfirst;
183: PetscMalloc(len,&out_from->values);
184: PetscLogObjectMemory(out,len);
185: out_from->requests = (MPI_Request*)(out_from->values + ny);
186: out_from->indices = (int*)(out_from->requests + out_from->n);
187: out_from->starts = (int*)(out_from->indices + ny);
188: out_from->procs = (int*)(out_from->starts + out_from->n + 1);
189: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(int));
190: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(int));
191: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(int));
192: out->fromdata = (void*)out_from;
193: out_from->local.n = in_from->local.n;
194: out_from->local.nonmatching_computed = PETSC_FALSE;
195: out_from->local.n_nonmatching = 0;
196: out_from->local.slots_nonmatching = 0;
197: if (in_from->local.n) {
198: PetscMalloc(in_from->local.n*sizeof(int),&out_from->local.slots);
199: PetscLogObjectMemory(out,in_from->local.n*sizeof(int));
200: PetscMemcpy(out_from->local.slots,in_from->local.slots,in_from->local.n*sizeof(int));
201: } else {
202: out_from->local.slots = 0;
203: }
204: return(0);
205: }
207: /* -------------------------------------------------------------------------------------*/
208: int VecScatterDestroy_PtoP(VecScatter ctx)
209: {
210: VecScatter_MPI_General *gen_to = (VecScatter_MPI_General*)ctx->todata;
211: VecScatter_MPI_General *gen_from = (VecScatter_MPI_General*)ctx->fromdata;
215: if (gen_to->local.slots) {PetscFree(gen_to->local.slots);}
216: if (gen_from->local.slots) {PetscFree(gen_from->local.slots);}
217: if (gen_to->local.slots_nonmatching) {PetscFree(gen_to->local.slots_nonmatching);}
218: if (gen_from->local.slots_nonmatching) {PetscFree(gen_from->local.slots_nonmatching);}
219: PetscFree(gen_to->sstatus);
220: PetscFree(gen_to->values);
221: PetscFree(gen_to);
222: PetscFree(gen_from->values);
223: PetscFree(gen_from);
224: PetscLogObjectDestroy(ctx);
225: PetscHeaderDestroy(ctx);
226: return(0);
227: }
229: /* --------------------------------------------------------------------------------------*/
230: /*
231: Even though the next routines are written with parallel
232: vectors, either xin or yin (but not both) may be Seq
233: vectors, one for each processor.
234:
235: gen_from indices indicate where arriving stuff is stashed
236: gen_to indices indicate where departing stuff came from.
237: the naming can be VERY confusing.
239: */
240: int VecScatterBegin_PtoP(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
241: {
242: VecScatter_MPI_General *gen_to,*gen_from;
243: MPI_Comm comm = ctx->comm;
244: PetscScalar *xv,*yv,*val,*rvalues,*svalues;
245: MPI_Request *rwaits,*swaits;
246: int tag = ctx->tag,i,j,*indices,*rstarts,*sstarts,*rprocs,*sprocs;
247: int nrecvs,nsends,iend,ierr;
250: VecGetArray(xin,&xv);
251: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
252: if (mode & SCATTER_REVERSE){
253: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
254: gen_from = (VecScatter_MPI_General*)ctx->todata;
255: } else {
256: gen_to = (VecScatter_MPI_General*)ctx->todata;
257: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
258: }
259: rvalues = gen_from->values;
260: svalues = gen_to->values;
261: nrecvs = gen_from->n;
262: nsends = gen_to->n;
263: rwaits = gen_from->requests;
264: swaits = gen_to->requests;
265: indices = gen_to->indices;
266: rstarts = gen_from->starts;
267: sstarts = gen_to->starts;
268: rprocs = gen_from->procs;
269: sprocs = gen_to->procs;
271: if (!(mode & SCATTER_LOCAL)) {
273: if (gen_to->sendfirst) {
274: /* do sends: */
275: for (i=0; i<nsends; i++) {
276: val = svalues + sstarts[i];
277: iend = sstarts[i+1]-sstarts[i];
278: /* pack the message */
279: for (j=0; j<iend; j++) {
280: val[j] = xv[*indices++];
281: }
282: MPI_Isend(val,iend,MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
283: }
284: }
285:
286: /* post receives: */
287: for (i=0; i<nrecvs; i++) {
288: MPI_Irecv(rvalues+rstarts[i],rstarts[i+1]-rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
289: }
291: if (!gen_to->sendfirst) {
292: /* do sends: */
293: for (i=0; i<nsends; i++) {
294: val = svalues + sstarts[i];
295: iend = sstarts[i+1]-sstarts[i];
296: /* pack the message */
297: for (j=0; j<iend; j++) {
298: val[j] = xv[*indices++];
299: }
300: MPI_Isend(val,iend,MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
301: }
302: }
303: }
305: /* take care of local scatters */
306: if (gen_to->local.n && addv == INSERT_VALUES) {
307: if (yv == xv && !gen_to->local.nonmatching_computed) {
308: VecScatterLocalOptimize_Private(&gen_to->local,&gen_from->local);
309: }
310: if (gen_to->local.is_copy) {
311: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
312: } else if (yv != xv || gen_to->local.nonmatching_computed == -1) {
313: int *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
314: int n = gen_to->local.n;
315: for (i=0; i<n; i++) {yv[fslots[i]] = xv[tslots[i]];}
316: } else {
317: /*
318: In this case, it is copying the values into their old locations, thus we can skip those
319: */
320: int *tslots = gen_to->local.slots_nonmatching,*fslots = gen_from->local.slots_nonmatching;
321: int n = gen_to->local.n_nonmatching;
322: for (i=0; i<n; i++) {yv[fslots[i]] = xv[tslots[i]];}
323: }
324: } else if (gen_to->local.n) {
325: int *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
326: int n = gen_to->local.n;
327: if (addv == ADD_VALUES) {
328: for (i=0; i<n; i++) {yv[fslots[i]] += xv[tslots[i]];}
329: #if !defined(PETSC_USE_COMPLEX)
330: } else if (addv == MAX_VALUES) {
331: for (i=0; i<n; i++) {yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[tslots[i]]);}
332: #endif
333: } else {SETERRQ(1,"Wrong insert option");}
334: }
336: VecRestoreArray(xin,&xv);
337: if (xin != yin) {VecRestoreArray(yin,&yv);}
338: return(0);
339: }
341: /* --------------------------------------------------------------------------------------*/
342: int VecScatterEnd_PtoP(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
343: {
344: VecScatter_MPI_General *gen_to,*gen_from;
345: PetscScalar *rvalues,*yv,*val;
346: int ierr,nrecvs,nsends,i,*indices,count,imdex,n,*rstarts,*lindices;
347: MPI_Request *rwaits,*swaits;
348: MPI_Status rstatus,*sstatus;
351: if (mode & SCATTER_LOCAL) return(0);
352: VecGetArray(yin,&yv);
354: if (mode & SCATTER_REVERSE){
355: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
356: gen_from = (VecScatter_MPI_General*)ctx->todata;
357: sstatus = gen_from->sstatus;
358: } else {
359: gen_to = (VecScatter_MPI_General*)ctx->todata;
360: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
361: sstatus = gen_to->sstatus;
362: }
363: rvalues = gen_from->values;
364: nrecvs = gen_from->n;
365: nsends = gen_to->n;
366: rwaits = gen_from->requests;
367: swaits = gen_to->requests;
368: indices = gen_from->indices;
369: rstarts = gen_from->starts;
371: /* wait on receives */
372: count = nrecvs;
373: while (count) {
374: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
375: /* unpack receives into our local space */
376: val = rvalues + rstarts[imdex];
377: n = rstarts[imdex+1]-rstarts[imdex];
378: lindices = indices + rstarts[imdex];
379: if (addv == INSERT_VALUES) {
380: for (i=0; i<n; i++) {
381: yv[lindices[i]] = *val++;
382: }
383: } else if (addv == ADD_VALUES) {
384: for (i=0; i<n; i++) {
385: yv[lindices[i]] += *val++;
386: }
387: #if !defined(PETSC_USE_COMPLEX)
388: } else if (addv == MAX_VALUES) {
389: for (i=0; i<n; i++) {
390: yv[lindices[i]] = PetscMax(yv[lindices[i]],*val); val++;
391: }
392: #endif
393: } else {SETERRQ(1,"Wrong insert option");}
394: count--;
395: }
397: /* wait on sends */
398: if (nsends) {
399: MPI_Waitall(nsends,swaits,sstatus);
400: }
401: VecRestoreArray(yin,&yv);
402: return(0);
403: }
404: /* ==========================================================================================*/
405: /*
406: Special scatters for fixed block sizes. These provide better performance
407: because the local copying and packing and unpacking are done with loop
408: unrolling to the size of the block.
410: Also uses MPI persistent sends and receives, these (at least in theory)
411: allow MPI to optimize repeated sends and receives of the same type.
412: */
414: /*
415: This is for use with the "ready-receiver" mode. In theory on some
416: machines it could lead to better performance. In practice we've never
417: seen it give better performance. Accessed with the -vecscatter_rr flag.
418: */
419: int VecScatterPostRecvs_PtoP_X(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
420: {
421: VecScatter_MPI_General *gen_from = (VecScatter_MPI_General*)ctx->fromdata;
424: MPI_Startall_irecv(gen_from->starts[gen_from->n]*gen_from->bs,gen_from->n,gen_from->requests);
425: return(0);
426: }
428: /* --------------------------------------------------------------------------------------*/
429: /*
430: Special optimization to see if the local part of the scatter is actually
431: a copy. The scatter routines call PetscMemcpy() instead.
432:
433: */
434: int VecScatterLocalOptimizeCopy_Private(VecScatter_Seq_General *gen_to,VecScatter_Seq_General *gen_from,int bs)
435: {
436: int n = gen_to->n,i,*to_slots = gen_to->slots,*from_slots = gen_from->slots;
437: int to_start,from_start;
438:
440: to_start = to_slots[0];
441: from_start = from_slots[0];
443: for (i=1; i<n; i++) {
444: to_start += bs;
445: from_start += bs;
446: if (to_slots[i] != to_start) return(0);
447: if (from_slots[i] != from_start) return(0);
448: }
449: gen_to->is_copy = PETSC_TRUE;
450: gen_to->copy_start = to_slots[0];
451: gen_to->copy_length = bs*sizeof(PetscScalar)*n;
452: gen_from->is_copy = PETSC_TRUE;
453: gen_from->copy_start = from_slots[0];
454: gen_from->copy_length = bs*sizeof(PetscScalar)*n;
456: PetscLogInfo(0,"VecScatterLocalOptimizeCopy_Private:Local scatter is a copy, optimizing for itn");
458: return(0);
459: }
461: /* --------------------------------------------------------------------------------------*/
463: int VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
464: {
465: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
466: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
467: int len,ny,bs = in_from->bs,ierr;
470: out->postrecvs = in->postrecvs;
471: out->begin = in->begin;
472: out->end = in->end;
473: out->copy = in->copy;
474: out->destroy = in->destroy;
475: out->view = in->view;
477: /* allocate entire send scatter context */
478: PetscNew(VecScatter_MPI_General,&out_to);
479: PetscMemzero(out_to,sizeof(VecScatter_MPI_General));
480: PetscLogObjectMemory(out,sizeof(VecScatter_MPI_General));
481: ny = in_to->starts[in_to->n];
482: len = ny*(sizeof(int) + bs*sizeof(PetscScalar)) + (in_to->n+1)*sizeof(int) +
483: (in_to->n)*(sizeof(int) + sizeof(MPI_Request));
484: out_to->n = in_to->n;
485: out_to->type = in_to->type;
486: out_to->sendfirst = in_to->sendfirst;
488: PetscMalloc(len,&out_to->values);
489: PetscLogObjectMemory(out,len);
490: out_to->requests = (MPI_Request*)(out_to->values + bs*ny);
491: out_to->indices = (int*)(out_to->requests + out_to->n);
492: out_to->starts = (int*)(out_to->indices + ny);
493: out_to->procs = (int*)(out_to->starts + out_to->n + 1);
494: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(int));
495: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(int));
496: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(int));
497: PetscMalloc(2*(PetscMax(in_to->n,in_from->n)+1)*sizeof(MPI_Status),&out_to->sstatus);
498: out_to->rstatus = out_to->sstatus + PetscMax(in_to->n,in_from->n) + 1;
499:
500: PetscLogObjectMemory(out,2*(PetscMax(in_to->n,in_from->n)+1)*sizeof(MPI_Status));
501: out->todata = (void*)out_to;
502: out_to->local.n = in_to->local.n;
503: out_to->local.nonmatching_computed = PETSC_FALSE;
504: out_to->local.n_nonmatching = 0;
505: out_to->local.slots_nonmatching = 0;
506: if (in_to->local.n) {
507: PetscMalloc(in_to->local.n*sizeof(int),out_to->local.slots);
508: PetscMemcpy(out_to->local.slots,in_to->local.slots,in_to->local.n*sizeof(int));
509: PetscLogObjectMemory(out,in_to->local.n*sizeof(int));
510: } else {
511: out_to->local.slots = 0;
512: }
514: /* allocate entire receive context */
515: PetscNew(VecScatter_MPI_General,&out_from);
516: PetscMemzero(out_from,sizeof(VecScatter_MPI_General));
517: out_from->type = in_from->type;
518: PetscLogObjectMemory(out,sizeof(VecScatter_MPI_General));
519: ny = in_from->starts[in_from->n];
520: len = ny*(sizeof(int) + bs*sizeof(PetscScalar)) + (in_from->n+1)*sizeof(int) +
521: (in_from->n)*(sizeof(int) + sizeof(MPI_Request));
522: out_from->n = in_from->n;
523: out_from->sendfirst = in_from->sendfirst;
524: PetscMalloc(len,&out_from->values);
525: PetscLogObjectMemory(out,len);
526: out_from->requests = (MPI_Request*)(out_from->values + bs*ny);
527: out_from->indices = (int*)(out_from->requests + out_from->n);
528: out_from->starts = (int*)(out_from->indices + ny);
529: out_from->procs = (int*)(out_from->starts + out_from->n + 1);
530: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(int));
531: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(int));
532: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(int));
533: out->fromdata = (void*)out_from;
534: out_from->local.n = in_from->local.n;
535: out_from->local.nonmatching_computed = PETSC_FALSE;
536: out_from->local.n_nonmatching = 0;
537: out_from->local.slots_nonmatching = 0;
538: if (in_from->local.n) {
539: PetscMalloc(in_from->local.n*sizeof(int),&out_from->local.slots);
540: PetscLogObjectMemory(out,in_from->local.n*sizeof(int));
541: PetscMemcpy(out_from->local.slots,in_from->local.slots,in_from->local.n*sizeof(int));
542: } else {
543: out_from->local.slots = 0;
544: }
546: /*
547: set up the request arrays for use with isend_init() and irecv_init()
548: */
549: {
550: MPI_Comm comm;
551: int *sstarts = out_to->starts, *rstarts = out_from->starts;
552: int *sprocs = out_to->procs, *rprocs = out_from->procs;
553: int tag,i;
554: PetscTruth flg;
555: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
556: MPI_Request *rev_swaits,*rev_rwaits;
557: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
559: PetscMalloc((in_to->n+1)*sizeof(MPI_Request),&out_to->rev_requests);
560: PetscMalloc((in_from->n+1)*sizeof(MPI_Request),&out_from->rev_requests);
561: PetscLogObjectMemory(out,(in_to->n+in_from->n+2)*sizeof(MPI_Request));
563: rev_rwaits = out_to->rev_requests;
564: rev_swaits = out_from->rev_requests;
566: out_from->bs = out_to->bs = bs;
567: tag = out->tag;
568: comm = out->comm;
570: /* Register the receives that you will use later (sends for scatter reverse) */
571: for (i=0; i<out_from->n; i++) {
572: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,
573: comm,rwaits+i);
574: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,
575: comm,rev_swaits+i);
576: }
578: PetscOptionsHasName(PETSC_NULL,"-vecscatter_rr",&flg);
579: if (flg) {
580: out->postrecvs = VecScatterPostRecvs_PtoP_X;
581: out_to->use_readyreceiver = PETSC_TRUE;
582: out_from->use_readyreceiver = PETSC_TRUE;
583: for (i=0; i<out_to->n; i++) {
584: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,
585: comm,swaits+i);
586: }
587: PetscLogInfo(0,"VecScatterCopy_PtoP_X:Using VecScatter ready receiver moden");
588: } else {
589: out->postrecvs = 0;
590: out_to->use_readyreceiver = PETSC_FALSE;
591: out_from->use_readyreceiver = PETSC_FALSE;
592: flg = PETSC_FALSE;
593: ierr = PetscOptionsHasName(PETSC_NULL,"-vecscatter_ssend",&flg);
594: if (flg) {
595: PetscLogInfo(0,"VecScatterCopy_PtoP_X:Using VecScatter Ssend moden");
596: }
597: for (i=0; i<out_to->n; i++) {
598: if (!flg) {
599: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,
600: comm,swaits+i);
601: } else {
602: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,
603: comm,swaits+i);
604: }
605: }
606: }
607: /* Register receives for scatter reverse */
608: for (i=0; i<out_to->n; i++) {
609: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,
610: comm,rev_rwaits+i);
611: }
612: }
614: return(0);
615: }
617: /* --------------------------------------------------------------------------------------*/
619: int VecScatterBegin_PtoP_12(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
620: {
621: VecScatter_MPI_General *gen_to,*gen_from;
622: PetscScalar *xv,*yv,*val,*svalues;
623: MPI_Request *rwaits,*swaits;
624: int *indices,*sstarts,iend,i,j,nrecvs,nsends,idx,ierr,len;
627: VecGetArray(xin,&xv);
628: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
630: if (mode & SCATTER_REVERSE) {
631: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
632: gen_from = (VecScatter_MPI_General*)ctx->todata;
633: rwaits = gen_from->rev_requests;
634: swaits = gen_to->rev_requests;
635: } else {
636: gen_to = (VecScatter_MPI_General*)ctx->todata;
637: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
638: rwaits = gen_from->requests;
639: swaits = gen_to->requests;
640: }
641: svalues = gen_to->values;
642: nrecvs = gen_from->n;
643: nsends = gen_to->n;
644: indices = gen_to->indices;
645: sstarts = gen_to->starts;
647: if (!(mode & SCATTER_LOCAL)) {
649: if (!gen_from->use_readyreceiver && !gen_to->sendfirst) {
650: /* post receives since they were not posted in VecScatterPostRecvs() */
651: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
652: }
653: if (ctx->packtogether) {
654: /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
655: len = 12*sstarts[nsends];
656: val = svalues;
657: for (i=0; i<len; i += 12) {
658: idx = *indices++;
659: val[0] = xv[idx];
660: val[1] = xv[idx+1];
661: val[2] = xv[idx+2];
662: val[3] = xv[idx+3];
663: val[4] = xv[idx+4];
664: val[5] = xv[idx+5];
665: val[6] = xv[idx+6];
666: val[7] = xv[idx+7];
667: val[8] = xv[idx+8];
668: val[9] = xv[idx+9];
669: val[10] = xv[idx+10];
670: val[11] = xv[idx+11];
671: val += 12;
672: }
673: MPI_Startall_isend(len,nsends,swaits);
674: } else {
675: /* this version packs and sends one at a time */
676: val = svalues;
677: for (i=0; i<nsends; i++) {
678: iend = sstarts[i+1]-sstarts[i];
680: for (j=0; j<iend; j++) {
681: idx = *indices++;
682: val[0] = xv[idx];
683: val[1] = xv[idx+1];
684: val[2] = xv[idx+2];
685: val[3] = xv[idx+3];
686: val[4] = xv[idx+4];
687: val[5] = xv[idx+5];
688: val[6] = xv[idx+6];
689: val[7] = xv[idx+7];
690: val[8] = xv[idx+8];
691: val[9] = xv[idx+9];
692: val[10] = xv[idx+10];
693: val[11] = xv[idx+11];
694: val += 12;
695: }
696: MPI_Start_isend(12*iend,swaits+i);
697: }
698: }
700: if (!gen_from->use_readyreceiver && gen_to->sendfirst) {
701: /* post receives since they were not posted in VecScatterPostRecvs() */
702: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
703: }
704: }
706: /* take care of local scatters */
707: if (gen_to->local.n) {
708: int *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
709: int n = gen_to->local.n,il,ir;
710: if (addv == INSERT_VALUES) {
711: if (gen_to->local.is_copy) {
712: PetscMemcpy(yv+gen_from->local.copy_start,xv+gen_to->local.copy_start,gen_to->local.copy_length);
713: } else {
714: for (i=0; i<n; i++) {
715: il = fslots[i]; ir = tslots[i];
716: yv[il] = xv[ir];
717: yv[il+1] = xv[ir+1];
718: yv[il+2] = xv[ir+2];
719: yv[il+3] = xv[ir+3];
720: yv[il+4] = xv[ir+4];
721: yv[il+5] = xv[ir+5];
722: yv[il+6] = xv[ir+6];
723: yv[il+7] = xv[ir+7];
724: yv[il+8] = xv[ir+8];
725: yv[il+9] = xv[ir+9];
726: yv[il+10] = xv[ir+10];
727: yv[il+11] = xv[ir+11];
728: }
729: }
730: } else if (addv == ADD_VALUES) {
731: for (i=0; i<n; i++) {
732: il = fslots[i]; ir = tslots[i];
733: yv[il] += xv[ir];
734: yv[il+1] += xv[ir+1];
735: yv[il+2] += xv[ir+2];
736: yv[il+3] += xv[ir+3];
737: yv[il+4] += xv[ir+4];
738: yv[il+5] += xv[ir+5];
739: yv[il+6] += xv[ir+6];
740: yv[il+7] += xv[ir+7];
741: yv[il+8] += xv[ir+8];
742: yv[il+9] += xv[ir+9];
743: yv[il+10] += xv[ir+10];
744: yv[il+11] += xv[ir+11];
745: }
746: #if !defined(PETSC_USE_COMPLEX)
747: } else if (addv == MAX_VALUES) {
748: for (i=0; i<n; i++) {
749: il = fslots[i]; ir = tslots[i];
750: yv[il] = PetscMax(yv[il],xv[ir]);
751: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
752: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
753: yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
754: yv[il+4] = PetscMax(yv[il+4],xv[ir+4]);
755: yv[il+5] = PetscMax(yv[il+5],xv[ir+5]);
756: yv[il+6] = PetscMax(yv[il+6],xv[ir+6]);
757: yv[il+7] = PetscMax(yv[il+7],xv[ir+7]);
758: yv[il+8] = PetscMax(yv[il+8],xv[ir+8]);
759: yv[il+9] = PetscMax(yv[il+9],xv[ir+9]);
760: yv[il+10] = PetscMax(yv[il+10],xv[ir+10]);
761: yv[il+11] = PetscMax(yv[il+11],xv[ir+11]);
762: }
763: #endif
764: } else {SETERRQ(1,"Wrong insert option");}
765: }
766: VecRestoreArray(xin,&xv);
767: if (xin != yin) {VecRestoreArray(yin,&yv);}
768: return(0);
769: }
771: /* --------------------------------------------------------------------------------------*/
773: int VecScatterEnd_PtoP_12(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
774: {
775: VecScatter_MPI_General *gen_to,*gen_from;
776: PetscScalar *rvalues,*yv,*val;
777: int ierr,nrecvs,nsends,i,*indices,count,imdex,n,*rstarts,*lindices,idx;
778: MPI_Request *rwaits,*swaits;
779: MPI_Status *rstatus,*sstatus;
782: if (mode & SCATTER_LOCAL) return(0);
783: VecGetArray(yin,&yv);
785: if (mode & SCATTER_REVERSE) {
786: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
787: gen_from = (VecScatter_MPI_General*)ctx->todata;
788: rwaits = gen_from->rev_requests;
789: swaits = gen_to->rev_requests;
790: sstatus = gen_from->sstatus;
791: rstatus = gen_from->rstatus;
792: } else {
793: gen_to = (VecScatter_MPI_General*)ctx->todata;
794: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
795: rwaits = gen_from->requests;
796: swaits = gen_to->requests;
797: sstatus = gen_to->sstatus;
798: rstatus = gen_to->rstatus;
799: }
800: rvalues = gen_from->values;
801: nrecvs = gen_from->n;
802: nsends = gen_to->n;
803: indices = gen_from->indices;
804: rstarts = gen_from->starts;
806: /* wait on receives */
807: count = nrecvs;
808: if (ctx->packtogether) { /* receive all messages, then unpack all, when -vecscatter_packtogether used */
809: ierr = MPI_Waitall(nrecvs,rwaits,rstatus);
810: n = rstarts[count];
811: val = rvalues;
812: lindices = indices;
813: if (addv == INSERT_VALUES) {
814: for (i=0; i<n; i++) {
815: idx = lindices[i];
816: yv[idx] = val[0];
817: yv[idx+1] = val[1];
818: yv[idx+2] = val[2];
819: yv[idx+3] = val[3];
820: yv[idx+4] = val[4];
821: yv[idx+5] = val[5];
822: yv[idx+6] = val[6];
823: yv[idx+7] = val[7];
824: yv[idx+8] = val[8];
825: yv[idx+9] = val[9];
826: yv[idx+10] = val[10];
827: yv[idx+11] = val[11];
828: val += 12;
829: }
830: } else if (addv == ADD_VALUES) {
831: for (i=0; i<n; i++) {
832: idx = lindices[i];
833: yv[idx] += val[0];
834: yv[idx+1] += val[1];
835: yv[idx+2] += val[2];
836: yv[idx+3] += val[3];
837: yv[idx+4] += val[4];
838: yv[idx+5] += val[5];
839: yv[idx+6] += val[6];
840: yv[idx+7] += val[7];
841: yv[idx+8] += val[8];
842: yv[idx+9] += val[9];
843: yv[idx+10] += val[10];
844: yv[idx+11] += val[11];
845: val += 12;
846: }
847: #if !defined(PETSC_USE_COMPLEX)
848: } else if (addv == MAX_VALUES) {
849: for (i=0; i<n; i++) {
850: idx = lindices[i];
851: yv[idx] = PetscMax(yv[idx],val[0]);
852: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
853: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
854: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
855: yv[idx+4] = PetscMax(yv[idx+4],val[4]);
856: yv[idx+5] = PetscMax(yv[idx+5],val[5]);
857: yv[idx+6] = PetscMax(yv[idx+6],val[6]);
858: yv[idx+7] = PetscMax(yv[idx+7],val[7]);
859: yv[idx+8] = PetscMax(yv[idx+8],val[8]);
860: yv[idx+9] = PetscMax(yv[idx+9],val[9]);
861: yv[idx+10] = PetscMax(yv[idx+10],val[10]);
862: yv[idx+11] = PetscMax(yv[idx+11],val[11]);
863: val += 12;
864: }
865: #endif
866: } else {SETERRQ(1,"Wrong insert option");}
867: } else { /* unpack each message as it arrives, default version */
868: while (count) {
869: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus[0]);
870: /* unpack receives into our local space */
871: val = rvalues + 12*rstarts[imdex];
872: lindices = indices + rstarts[imdex];
873: n = rstarts[imdex+1] - rstarts[imdex];
874: if (addv == INSERT_VALUES) {
875: for (i=0; i<n; i++) {
876: idx = lindices[i];
877: yv[idx] = val[0];
878: yv[idx+1] = val[1];
879: yv[idx+2] = val[2];
880: yv[idx+3] = val[3];
881: yv[idx+4] = val[4];
882: yv[idx+5] = val[5];
883: yv[idx+6] = val[6];
884: yv[idx+7] = val[7];
885: yv[idx+8] = val[8];
886: yv[idx+9] = val[9];
887: yv[idx+10] = val[10];
888: yv[idx+11] = val[11];
889: val += 12;
890: }
891: } else if (addv == ADD_VALUES) {
892: for (i=0; i<n; i++) {
893: idx = lindices[i];
894: yv[idx] += val[0];
895: yv[idx+1] += val[1];
896: yv[idx+2] += val[2];
897: yv[idx+3] += val[3];
898: yv[idx+4] += val[4];
899: yv[idx+5] += val[5];
900: yv[idx+6] += val[6];
901: yv[idx+7] += val[7];
902: yv[idx+8] += val[8];
903: yv[idx+9] += val[9];
904: yv[idx+10] += val[10];
905: yv[idx+11] += val[11];
906: val += 12;
907: }
908: #if !defined(PETSC_USE_COMPLEX)
909: } else if (addv == MAX_VALUES) {
910: for (i=0; i<n; i++) {
911: idx = lindices[i];
912: yv[idx] = PetscMax(yv[idx],val[0]);
913: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
914: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
915: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
916: yv[idx+4] = PetscMax(yv[idx+4],val[4]);
917: yv[idx+5] = PetscMax(yv[idx+5],val[5]);
918: yv[idx+6] = PetscMax(yv[idx+6],val[6]);
919: yv[idx+7] = PetscMax(yv[idx+7],val[7]);
920: yv[idx+8] = PetscMax(yv[idx+8],val[8]);
921: yv[idx+9] = PetscMax(yv[idx+9],val[9]);
922: yv[idx+10] = PetscMax(yv[idx+10],val[10]);
923: yv[idx+11] = PetscMax(yv[idx+11],val[11]);
924: val += 12;
925: }
926: #endif
927: } else {SETERRQ(1,"Wrong insert option");}
928: count--;
929: }
930: }
931: /* wait on sends */
932: if (nsends) {
933: MPI_Waitall(nsends,swaits,sstatus);
934: }
935: VecRestoreArray(yin,&yv);
936: return(0);
937: }
939: /* --------------------------------------------------------------------------------------*/
941: int VecScatterBegin_PtoP_5(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
942: {
943: VecScatter_MPI_General *gen_to,*gen_from;
944: PetscScalar *xv,*yv,*val,*svalues;
945: MPI_Request *rwaits,*swaits;
946: int ierr,i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;
949: VecGetArray(xin,&xv);
950: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
951: if (mode & SCATTER_REVERSE) {
952: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
953: gen_from = (VecScatter_MPI_General*)ctx->todata;
954: rwaits = gen_from->rev_requests;
955: swaits = gen_to->rev_requests;
956: } else {
957: gen_to = (VecScatter_MPI_General*)ctx->todata;
958: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
959: rwaits = gen_from->requests;
960: swaits = gen_to->requests;
961: }
962: svalues = gen_to->values;
963: nrecvs = gen_from->n;
964: nsends = gen_to->n;
965: indices = gen_to->indices;
966: sstarts = gen_to->starts;
968: if (!(mode & SCATTER_LOCAL)) {
970: if (gen_to->sendfirst) {
971: /* this version packs and sends one at a time */
972: val = svalues;
973: for (i=0; i<nsends; i++) {
974: iend = sstarts[i+1]-sstarts[i];
976: for (j=0; j<iend; j++) {
977: idx = *indices++;
978: val[0] = xv[idx];
979: val[1] = xv[idx+1];
980: val[2] = xv[idx+2];
981: val[3] = xv[idx+3];
982: val[4] = xv[idx+4];
983: val += 5;
984: }
985: MPI_Start_isend(5*iend,swaits+i);
986: }
987: }
989: if (!gen_from->use_readyreceiver) {
990: /* post receives since they were not posted in VecScatterPostRecvs() */
991: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
992: }
994: if (!gen_to->sendfirst) {
995: /* this version packs all the messages together and sends */
996: /*
997: len = 5*sstarts[nsends];
998: val = svalues;
999: for (i=0; i<len; i += 5) {
1000: idx = *indices++;
1001: val[0] = xv[idx];
1002: val[1] = xv[idx+1];
1003: val[2] = xv[idx+2];
1004: val[3] = xv[idx+3];
1005: val[4] = xv[idx+4];
1006: val += 5;
1007: }
1008: MPI_Startall_isend(len,nsends,swaits);
1009: */
1011: /* this version packs and sends one at a time */
1012: val = svalues;
1013: for (i=0; i<nsends; i++) {
1014: iend = sstarts[i+1]-sstarts[i];
1016: for (j=0; j<iend; j++) {
1017: idx = *indices++;
1018: val[0] = xv[idx];
1019: val[1] = xv[idx+1];
1020: val[2] = xv[idx+2];
1021: val[3] = xv[idx+3];
1022: val[4] = xv[idx+4];
1023: val += 5;
1024: }
1025: MPI_Start_isend(5*iend,swaits+i);
1026: }
1027: }
1028: }
1030: /* take care of local scatters */
1031: if (gen_to->local.n) {
1032: int *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
1033: int n = gen_to->local.n,il,ir;
1034: if (addv == INSERT_VALUES) {
1035: if (gen_to->local.is_copy) {
1036: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1037: } else {
1038: for (i=0; i<n; i++) {
1039: il = fslots[i]; ir = tslots[i];
1040: yv[il] = xv[ir];
1041: yv[il+1] = xv[ir+1];
1042: yv[il+2] = xv[ir+2];
1043: yv[il+3] = xv[ir+3];
1044: yv[il+4] = xv[ir+4];
1045: }
1046: }
1047: } else if (addv == ADD_VALUES) {
1048: for (i=0; i<n; i++) {
1049: il = fslots[i]; ir = tslots[i];
1050: yv[il] += xv[ir];
1051: yv[il+1] += xv[ir+1];
1052: yv[il+2] += xv[ir+2];
1053: yv[il+3] += xv[ir+3];
1054: yv[il+4] += xv[ir+4];
1055: }
1056: #if !defined(PETSC_USE_COMPLEX)
1057: } else if (addv == MAX_VALUES) {
1058: for (i=0; i<n; i++) {
1059: il = fslots[i]; ir = tslots[i];
1060: yv[il] = PetscMax(yv[il],xv[ir]);
1061: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1062: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1063: yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1064: yv[il+4] = PetscMax(yv[il+4],xv[ir+4]);
1065: }
1066: #endif
1067: } else {SETERRQ(1,"Wrong insert option");}
1068: }
1069: VecRestoreArray(xin,&xv);
1070: if (xin != yin) {VecRestoreArray(yin,&yv);}
1071: return(0);
1072: }
1074: /* --------------------------------------------------------------------------------------*/
1076: int VecScatterEnd_PtoP_5(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1077: {
1078: VecScatter_MPI_General *gen_to,*gen_from;
1079: PetscScalar *rvalues,*yv,*val;
1080: int ierr,nrecvs,nsends,i,*indices,count,imdex,n,*rstarts,*lindices,idx;
1081: MPI_Request *rwaits,*swaits;
1082: MPI_Status rstatus,*sstatus;
1085: if (mode & SCATTER_LOCAL) return(0);
1086: VecGetArray(yin,&yv);
1088: if (mode & SCATTER_REVERSE) {
1089: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1090: gen_from = (VecScatter_MPI_General*)ctx->todata;
1091: rwaits = gen_from->rev_requests;
1092: swaits = gen_to->rev_requests;
1093: sstatus = gen_from->sstatus;
1094: } else {
1095: gen_to = (VecScatter_MPI_General*)ctx->todata;
1096: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1097: rwaits = gen_from->requests;
1098: swaits = gen_to->requests;
1099: sstatus = gen_to->sstatus;
1100: }
1101: rvalues = gen_from->values;
1102: nrecvs = gen_from->n;
1103: nsends = gen_to->n;
1104: indices = gen_from->indices;
1105: rstarts = gen_from->starts;
1107: /* wait on receives */
1108: count = nrecvs;
1109: while (count) {
1110: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1111: /* unpack receives into our local space */
1112: val = rvalues + 5*rstarts[imdex];
1113: lindices = indices + rstarts[imdex];
1114: n = rstarts[imdex+1] - rstarts[imdex];
1115: if (addv == INSERT_VALUES) {
1116: for (i=0; i<n; i++) {
1117: idx = lindices[i];
1118: yv[idx] = val[0];
1119: yv[idx+1] = val[1];
1120: yv[idx+2] = val[2];
1121: yv[idx+3] = val[3];
1122: yv[idx+4] = val[4];
1123: val += 5;
1124: }
1125: } else if (addv == ADD_VALUES) {
1126: for (i=0; i<n; i++) {
1127: idx = lindices[i];
1128: yv[idx] += val[0];
1129: yv[idx+1] += val[1];
1130: yv[idx+2] += val[2];
1131: yv[idx+3] += val[3];
1132: yv[idx+4] += val[4];
1133: val += 5;
1134: }
1135: #if !defined(PETSC_USE_COMPLEX)
1136: } else if (addv == MAX_VALUES) {
1137: for (i=0; i<n; i++) {
1138: idx = lindices[i];
1139: yv[idx] = PetscMax(yv[idx],val[0]);
1140: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1141: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1142: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1143: yv[idx+4] = PetscMax(yv[idx+4],val[4]);
1144: val += 5;
1145: }
1146: #endif
1147: } else {SETERRQ(1,"Wrong insert option");}
1148: count--;
1149: }
1150: /* wait on sends */
1151: if (nsends) {
1152: MPI_Waitall(nsends,swaits,sstatus);
1153: }
1154: VecRestoreArray(yin,&yv);
1155: return(0);
1156: }
1158: /* --------------------------------------------------------------------------------------*/
1160: int VecScatterBegin_PtoP_4(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1161: {
1162: VecScatter_MPI_General *gen_to,*gen_from;
1163: PetscScalar *xv,*yv,*val,*svalues;
1164: MPI_Request *rwaits,*swaits;
1165: int *indices,*sstarts,iend,i,j,nrecvs,nsends,idx,ierr,len;
1168: VecGetArray(xin,&xv);
1169: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1171: if (mode & SCATTER_REVERSE) {
1172: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1173: gen_from = (VecScatter_MPI_General*)ctx->todata;
1174: rwaits = gen_from->rev_requests;
1175: swaits = gen_to->rev_requests;
1176: } else {
1177: gen_to = (VecScatter_MPI_General*)ctx->todata;
1178: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1179: rwaits = gen_from->requests;
1180: swaits = gen_to->requests;
1181: }
1182: svalues = gen_to->values;
1183: nrecvs = gen_from->n;
1184: nsends = gen_to->n;
1185: indices = gen_to->indices;
1186: sstarts = gen_to->starts;
1188: if (!(mode & SCATTER_LOCAL)) {
1190: if (!gen_from->use_readyreceiver && !gen_to->sendfirst) {
1191: /* post receives since they were not posted in VecScatterPostRecvs() */
1192: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1193: }
1195: if (ctx->packtogether) {
1196: /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
1197: len = 4*sstarts[nsends];
1198: val = svalues;
1199: for (i=0; i<len; i += 4) {
1200: idx = *indices++;
1201: val[0] = xv[idx];
1202: val[1] = xv[idx+1];
1203: val[2] = xv[idx+2];
1204: val[3] = xv[idx+3];
1205: val += 4;
1206: }
1207: MPI_Startall_isend(len,nsends,swaits);
1208: } else {
1209: /* this version packs and sends one at a time, default */
1210: val = svalues;
1211: for (i=0; i<nsends; i++) {
1212: iend = sstarts[i+1]-sstarts[i];
1214: for (j=0; j<iend; j++) {
1215: idx = *indices++;
1216: val[0] = xv[idx];
1217: val[1] = xv[idx+1];
1218: val[2] = xv[idx+2];
1219: val[3] = xv[idx+3];
1220: val += 4;
1221: }
1222: MPI_Start_isend(4*iend,swaits+i);
1223: }
1224: }
1226: if (!gen_from->use_readyreceiver && gen_to->sendfirst) {
1227: /* post receives since they were not posted in VecScatterPostRecvs() */
1228: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1229: }
1230: }
1232: /* take care of local scatters */
1233: if (gen_to->local.n) {
1234: int *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
1235: int n = gen_to->local.n,il,ir;
1236: if (addv == INSERT_VALUES) {
1237: if (gen_to->local.is_copy) {
1238: PetscMemcpy(yv+gen_from->local.copy_start,xv+gen_to->local.copy_start,gen_to->local.copy_length);
1239: } else {
1240: for (i=0; i<n; i++) {
1241: il = fslots[i]; ir = tslots[i];
1242: yv[il] = xv[ir];
1243: yv[il+1] = xv[ir+1];
1244: yv[il+2] = xv[ir+2];
1245: yv[il+3] = xv[ir+3];
1246: }
1247: }
1248: } else if (addv == ADD_VALUES) {
1249: for (i=0; i<n; i++) {
1250: il = fslots[i]; ir = tslots[i];
1251: yv[il] += xv[ir];
1252: yv[il+1] += xv[ir+1];
1253: yv[il+2] += xv[ir+2];
1254: yv[il+3] += xv[ir+3];
1255: }
1256: #if !defined(PETSC_USE_COMPLEX)
1257: } else if (addv == MAX_VALUES) {
1258: for (i=0; i<n; i++) {
1259: il = fslots[i]; ir = tslots[i];
1260: yv[il] = PetscMax(yv[il],xv[ir]);
1261: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1262: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1263: yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1264: }
1265: #endif
1266: } else {SETERRQ(1,"Wrong insert option");}
1267: }
1268: VecRestoreArray(xin,&xv);
1269: if (xin != yin) {VecRestoreArray(yin,&yv);}
1270: return(0);
1271: }
1273: /* --------------------------------------------------------------------------------------*/
1275: int VecScatterEnd_PtoP_4(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1276: {
1277: VecScatter_MPI_General *gen_to,*gen_from;
1278: PetscScalar *rvalues,*yv,*val;
1279: int ierr,nrecvs,nsends,i,*indices,count,imdex,n,*rstarts,*lindices,idx;
1280: MPI_Request *rwaits,*swaits;
1281: MPI_Status *rstatus,*sstatus;
1284: if (mode & SCATTER_LOCAL) return(0);
1285: VecGetArray(yin,&yv);
1287: if (mode & SCATTER_REVERSE) {
1288: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1289: gen_from = (VecScatter_MPI_General*)ctx->todata;
1290: rwaits = gen_from->rev_requests;
1291: swaits = gen_to->rev_requests;
1292: sstatus = gen_from->sstatus;
1293: rstatus = gen_from->rstatus;
1294: } else {
1295: gen_to = (VecScatter_MPI_General*)ctx->todata;
1296: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1297: rwaits = gen_from->requests;
1298: swaits = gen_to->requests;
1299: sstatus = gen_to->sstatus;
1300: rstatus = gen_to->rstatus;
1301: }
1302: rvalues = gen_from->values;
1303: nrecvs = gen_from->n;
1304: nsends = gen_to->n;
1305: indices = gen_from->indices;
1306: rstarts = gen_from->starts;
1308: /* wait on receives */
1309: count = nrecvs;
1310: if (ctx->packtogether) { /* receive all messages, then unpack all, when -vecscatter_packtogether used */
1311: ierr = MPI_Waitall(nrecvs,rwaits,rstatus);
1312: n = rstarts[count];
1313: val = rvalues;
1314: lindices = indices;
1315: if (addv == INSERT_VALUES) {
1316: for (i=0; i<n; i++) {
1317: idx = lindices[i];
1318: yv[idx] = val[0];
1319: yv[idx+1] = val[1];
1320: yv[idx+2] = val[2];
1321: yv[idx+3] = val[3];
1322: val += 4;
1323: }
1324: } else if (addv == ADD_VALUES) {
1325: for (i=0; i<n; i++) {
1326: idx = lindices[i];
1327: yv[idx] += val[0];
1328: yv[idx+1] += val[1];
1329: yv[idx+2] += val[2];
1330: yv[idx+3] += val[3];
1331: val += 4;
1332: }
1333: #if !defined(PETSC_USE_COMPLEX)
1334: } else if (addv == MAX_VALUES) {
1335: for (i=0; i<n; i++) {
1336: idx = lindices[i];
1337: yv[idx] = PetscMax(yv[idx],val[0]);
1338: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1339: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1340: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1341: val += 4;
1342: }
1343: #endif
1344: } else {SETERRQ(1,"Wrong insert option");}
1345: } else { /* unpack each message as it arrives, default version */
1346: while (count) {
1347: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus[0]);
1348: /* unpack receives into our local space */
1349: val = rvalues + 4*rstarts[imdex];
1350: lindices = indices + rstarts[imdex];
1351: n = rstarts[imdex+1] - rstarts[imdex];
1352: if (addv == INSERT_VALUES) {
1353: for (i=0; i<n; i++) {
1354: idx = lindices[i];
1355: yv[idx] = val[0];
1356: yv[idx+1] = val[1];
1357: yv[idx+2] = val[2];
1358: yv[idx+3] = val[3];
1359: val += 4;
1360: }
1361: } else if (addv == ADD_VALUES) {
1362: for (i=0; i<n; i++) {
1363: idx = lindices[i];
1364: yv[idx] += val[0];
1365: yv[idx+1] += val[1];
1366: yv[idx+2] += val[2];
1367: yv[idx+3] += val[3];
1368: val += 4;
1369: }
1370: #if !defined(PETSC_USE_COMPLEX)
1371: } else if (addv == MAX_VALUES) {
1372: for (i=0; i<n; i++) {
1373: idx = lindices[i];
1374: yv[idx] = PetscMax(yv[idx],val[0]);
1375: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1376: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1377: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1378: val += 4;
1379: }
1380: #endif
1381: } else {SETERRQ(1,"Wrong insert option");}
1382: count--;
1383: }
1384: }
1386: /* wait on sends */
1387: if (nsends) {
1388: MPI_Waitall(nsends,swaits,sstatus);
1389: }
1390: VecRestoreArray(yin,&yv);
1391: return(0);
1392: }
1394: /* --------------------------------------------------------------------------------------*/
1396: int VecScatterBegin_PtoP_3(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1397: {
1398: VecScatter_MPI_General *gen_to,*gen_from;
1399: PetscScalar *xv,*yv,*val,*svalues;
1400: MPI_Request *rwaits,*swaits;
1401: int ierr,i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;
1404: VecGetArray(xin,&xv);
1405: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1407: if (mode & SCATTER_REVERSE) {
1408: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1409: gen_from = (VecScatter_MPI_General*)ctx->todata;
1410: rwaits = gen_from->rev_requests;
1411: swaits = gen_to->rev_requests;
1412: } else {
1413: gen_to = (VecScatter_MPI_General*)ctx->todata;
1414: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1415: rwaits = gen_from->requests;
1416: swaits = gen_to->requests;
1417: }
1418: svalues = gen_to->values;
1419: nrecvs = gen_from->n;
1420: nsends = gen_to->n;
1421: indices = gen_to->indices;
1422: sstarts = gen_to->starts;
1424: if (!(mode & SCATTER_LOCAL)) {
1426: if (gen_to->sendfirst) {
1427: /* this version packs and sends one at a time */
1428: val = svalues;
1429: for (i=0; i<nsends; i++) {
1430: iend = sstarts[i+1]-sstarts[i];
1432: for (j=0; j<iend; j++) {
1433: idx = *indices++;
1434: val[0] = xv[idx];
1435: val[1] = xv[idx+1];
1436: val[2] = xv[idx+2];
1437: val += 3;
1438: }
1439: MPI_Start_isend(3*iend,swaits+i);
1440: }
1441: }
1443: if (!gen_from->use_readyreceiver) {
1444: /* post receives since they were not posted in VecScatterPostRecvs() */
1445: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1446: }
1448: if (!gen_to->sendfirst) {
1449: /* this version packs all the messages together and sends */
1450: /*
1451: len = 3*sstarts[nsends];
1452: val = svalues;
1453: for (i=0; i<len; i += 3) {
1454: idx = *indices++;
1455: val[0] = xv[idx];
1456: val[1] = xv[idx+1];
1457: val[2] = xv[idx+2];
1458: val += 3;
1459: }
1460: MPI_Startall_isend(len,nsends,swaits);
1461: */
1463: /* this version packs and sends one at a time */
1464: val = svalues;
1465: for (i=0; i<nsends; i++) {
1466: iend = sstarts[i+1]-sstarts[i];
1468: for (j=0; j<iend; j++) {
1469: idx = *indices++;
1470: val[0] = xv[idx];
1471: val[1] = xv[idx+1];
1472: val[2] = xv[idx+2];
1473: val += 3;
1474: }
1475: MPI_Start_isend(3*iend,swaits+i);
1476: }
1477: }
1478: }
1480: /* take care of local scatters */
1481: if (gen_to->local.n) {
1482: int *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
1483: int n = gen_to->local.n,il,ir;
1484: if (addv == INSERT_VALUES) {
1485: if (gen_to->local.is_copy) {
1486: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1487: } else {
1488: for (i=0; i<n; i++) {
1489: il = fslots[i]; ir = tslots[i];
1490: yv[il] = xv[ir];
1491: yv[il+1] = xv[ir+1];
1492: yv[il+2] = xv[ir+2];
1493: yv[il+3] = xv[ir+3];
1494: }
1495: }
1496: } else if (addv == ADD_VALUES) {
1497: for (i=0; i<n; i++) {
1498: il = fslots[i]; ir = tslots[i];
1499: yv[il] += xv[ir];
1500: yv[il+1] += xv[ir+1];
1501: yv[il+2] += xv[ir+2];
1502: yv[il+3] += xv[ir+3];
1503: }
1504: #if !defined(PETSC_USE_COMPLEX)
1505: } else if (addv == MAX_VALUES) {
1506: for (i=0; i<n; i++) {
1507: il = fslots[i]; ir = tslots[i];
1508: yv[il] = PetscMax(yv[il],xv[ir]);
1509: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1510: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1511: yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1512: }
1513: #endif
1514: } else {SETERRQ(1,"Wrong insert option");}
1515: }
1516: VecRestoreArray(xin,&xv);
1517: if (xin != yin) {VecRestoreArray(yin,&yv);}
1518: return(0);
1519: }
1521: /* --------------------------------------------------------------------------------------*/
1523: int VecScatterEnd_PtoP_3(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1524: {
1525: VecScatter_MPI_General *gen_to,*gen_from;
1526: PetscScalar *rvalues,*yv,*val;
1527: int ierr,nrecvs,nsends,i,*indices,count,imdex,n,*rstarts,*lindices,idx;
1528: MPI_Request *rwaits,*swaits;
1529: MPI_Status rstatus,*sstatus;
1532: if (mode & SCATTER_LOCAL) return(0);
1533: VecGetArray(yin,&yv);
1535: if (mode & SCATTER_REVERSE) {
1536: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1537: gen_from = (VecScatter_MPI_General*)ctx->todata;
1538: rwaits = gen_from->rev_requests;
1539: swaits = gen_to->rev_requests;
1540: sstatus = gen_from->sstatus;
1541: } else {
1542: gen_to = (VecScatter_MPI_General*)ctx->todata;
1543: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1544: rwaits = gen_from->requests;
1545: swaits = gen_to->requests;
1546: sstatus = gen_to->sstatus;
1547: }
1548: rvalues = gen_from->values;
1549: nrecvs = gen_from->n;
1550: nsends = gen_to->n;
1551: indices = gen_from->indices;
1552: rstarts = gen_from->starts;
1554: /* wait on receives */
1555: count = nrecvs;
1556: while (count) {
1557: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1558: /* unpack receives into our local space */
1559: val = rvalues + 3*rstarts[imdex];
1560: lindices = indices + rstarts[imdex];
1561: n = rstarts[imdex+1] - rstarts[imdex];
1562: if (addv == INSERT_VALUES) {
1563: for (i=0; i<n; i++) {
1564: idx = lindices[i];
1565: yv[idx] = val[0];
1566: yv[idx+1] = val[1];
1567: yv[idx+2] = val[2];
1568: val += 3;
1569: }
1570: } else if (addv == ADD_VALUES) {
1571: for (i=0; i<n; i++) {
1572: idx = lindices[i];
1573: yv[idx] += val[0];
1574: yv[idx+1] += val[1];
1575: yv[idx+2] += val[2];
1576: val += 3;
1577: }
1578: #if !defined(PETSC_USE_COMPLEX)
1579: } else if (addv == MAX_VALUES) {
1580: for (i=0; i<n; i++) {
1581: idx = lindices[i];
1582: yv[idx] = PetscMax(yv[idx],val[0]);
1583: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1584: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1585: val += 3;
1586: }
1587: #endif
1588: } else {SETERRQ(1,"Wrong insert option");}
1589: count--;
1590: }
1591: /* wait on sends */
1592: if (nsends) {
1593: MPI_Waitall(nsends,swaits,sstatus);
1594: }
1595: VecRestoreArray(yin,&yv);
1596: return(0);
1597: }
1599: /* --------------------------------------------------------------------------------------*/
1601: int VecScatterBegin_PtoP_2(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1602: {
1603: VecScatter_MPI_General *gen_to,*gen_from;
1604: PetscScalar *xv,*yv,*val,*svalues;
1605: MPI_Request *rwaits,*swaits;
1606: int ierr,i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;
1609: VecGetArray(xin,&xv);
1610: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1611: if (mode & SCATTER_REVERSE) {
1612: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1613: gen_from = (VecScatter_MPI_General*)ctx->todata;
1614: rwaits = gen_from->rev_requests;
1615: swaits = gen_to->rev_requests;
1616: } else {
1617: gen_to = (VecScatter_MPI_General*)ctx->todata;
1618: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1619: rwaits = gen_from->requests;
1620: swaits = gen_to->requests;
1621: }
1622: svalues = gen_to->values;
1623: nrecvs = gen_from->n;
1624: nsends = gen_to->n;
1625: indices = gen_to->indices;
1626: sstarts = gen_to->starts;
1628: if (!(mode & SCATTER_LOCAL)) {
1630: if (gen_to->sendfirst) {
1631: /* this version packs and sends one at a time */
1632: val = svalues;
1633: for (i=0; i<nsends; i++) {
1634: iend = sstarts[i+1]-sstarts[i];
1636: for (j=0; j<iend; j++) {
1637: idx = *indices++;
1638: val[0] = xv[idx];
1639: val[1] = xv[idx+1];
1640: val += 2;
1641: }
1642: MPI_Start_isend(2*iend,swaits+i);
1643: }
1644: }
1646: if (!gen_from->use_readyreceiver) {
1647: /* post receives since they were not posted in VecScatterPostRecvs() */
1648: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1649: }
1651: if (!gen_to->sendfirst) {
1652: /* this version packs all the messages together and sends */
1653: /*
1654: len = 2*sstarts[nsends];
1655: val = svalues;
1656: for (i=0; i<len; i += 2) {
1657: idx = *indices++;
1658: val[0] = xv[idx];
1659: val[1] = xv[idx+1];
1660: val += 2;
1661: }
1662: MPI_Startall_isend(len,nsends,swaits);
1663: */
1665: /* this version packs and sends one at a time */
1666: val = svalues;
1667: for (i=0; i<nsends; i++) {
1668: iend = sstarts[i+1]-sstarts[i];
1670: for (j=0; j<iend; j++) {
1671: idx = *indices++;
1672: val[0] = xv[idx];
1673: val[1] = xv[idx+1];
1674: val += 2;
1675: }
1676: MPI_Start_isend(2*iend,swaits+i);
1677: }
1678: }
1679: }
1681: /* take care of local scatters */
1682: if (gen_to->local.n) {
1683: int *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
1684: int n = gen_to->local.n,il,ir;
1685: if (addv == INSERT_VALUES) {
1686: if (gen_to->local.is_copy) {
1687: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1688: } else {
1689: for (i=0; i<n; i++) {
1690: il = fslots[i]; ir = tslots[i];
1691: yv[il] = xv[ir];
1692: yv[il+1] = xv[ir+1];
1693: }
1694: }
1695: } else if (addv == ADD_VALUES) {
1696: for (i=0; i<n; i++) {
1697: il = fslots[i]; ir = tslots[i];
1698: yv[il] += xv[ir];
1699: yv[il+1] += xv[ir+1];
1700: }
1701: #if !defined(PETSC_USE_COMPLEX)
1702: } else if (addv == MAX_VALUES) {
1703: for (i=0; i<n; i++) {
1704: il = fslots[i]; ir = tslots[i];
1705: yv[il] = PetscMax(yv[il],xv[ir]);
1706: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1707: }
1708: #endif
1709: } else {SETERRQ(1,"Wrong insert option");}
1710: }
1711: VecRestoreArray(xin,&xv);
1712: if (xin != yin) {VecRestoreArray(yin,&yv);}
1713: return(0);
1714: }
1716: /* --------------------------------------------------------------------------------------*/
1718: int VecScatterEnd_PtoP_2(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1719: {
1720: VecScatter_MPI_General *gen_to,*gen_from;
1721: PetscScalar *rvalues,*yv,*val;
1722: int ierr,nrecvs,nsends,i,*indices,count,imdex,n,*rstarts,*lindices,idx;
1723: MPI_Request *rwaits,*swaits;
1724: MPI_Status rstatus,*sstatus;
1727: if (mode & SCATTER_LOCAL) return(0);
1728: VecGetArray(yin,&yv);
1730: if (mode & SCATTER_REVERSE) {
1731: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1732: gen_from = (VecScatter_MPI_General*)ctx->todata;
1733: rwaits = gen_from->rev_requests;
1734: swaits = gen_to->rev_requests;
1735: sstatus = gen_from->sstatus;
1736: } else {
1737: gen_to = (VecScatter_MPI_General*)ctx->todata;
1738: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1739: rwaits = gen_from->requests;
1740: swaits = gen_to->requests;
1741: sstatus = gen_to->sstatus;
1742: }
1743: rvalues = gen_from->values;
1744: nrecvs = gen_from->n;
1745: nsends = gen_to->n;
1746: indices = gen_from->indices;
1747: rstarts = gen_from->starts;
1749: /* wait on receives */
1750: count = nrecvs;
1751: while (count) {
1752: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1753: /* unpack receives into our local space */
1754: val = rvalues + 2*rstarts[imdex];
1755: lindices = indices + rstarts[imdex];
1756: n = rstarts[imdex+1] - rstarts[imdex];
1757: if (addv == INSERT_VALUES) {
1758: for (i=0; i<n; i++) {
1759: idx = lindices[i];
1760: yv[idx] = val[0];
1761: yv[idx+1] = val[1];
1762: val += 2;
1763: }
1764: } else if (addv == ADD_VALUES) {
1765: for (i=0; i<n; i++) {
1766: idx = lindices[i];
1767: yv[idx] += val[0];
1768: yv[idx+1] += val[1];
1769: val += 2;
1770: }
1771: #if !defined(PETSC_USE_COMPLEX)
1772: } else if (addv == MAX_VALUES) {
1773: for (i=0; i<n; i++) {
1774: idx = lindices[i];
1775: yv[idx] = PetscMax(yv[idx],val[0]);
1776: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1777: val += 2;
1778: }
1779: #endif
1780: } else {SETERRQ(1,"Wrong insert option");}
1781: count--;
1782: }
1783: /* wait on sends */
1784: if (nsends) {
1785: MPI_Waitall(nsends,swaits,sstatus);
1786: }
1787: VecRestoreArray(yin,&yv);
1788: return(0);
1789: }
1791: /* ---------------------------------------------------------------------------------*/
1793: int VecScatterDestroy_PtoP_X(VecScatter ctx)
1794: {
1795: VecScatter_MPI_General *gen_to = (VecScatter_MPI_General*)ctx->todata;
1796: VecScatter_MPI_General *gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1797: int i,ierr;
1800: if (gen_to->use_readyreceiver) {
1801: /*
1802: Since we have already posted sends we must cancel them before freeing
1803: the requests
1804: */
1805: for (i=0; i<gen_from->n; i++) {
1806: MPI_Cancel(gen_from->requests+i);
1807: }
1808: }
1810: if (gen_to->local.slots) {PetscFree(gen_to->local.slots);}
1811: if (gen_from->local.slots) {PetscFree(gen_from->local.slots);}
1812: if (gen_to->local.slots_nonmatching) {PetscFree(gen_to->local.slots_nonmatching);}
1813: if (gen_from->local.slots_nonmatching) {PetscFree(gen_from->local.slots_nonmatching);}
1815: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
1816: /*
1817: IBM's PE version of MPI has a bug where freeing these guys will screw up later
1818: message passing.
1819: */
1820: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
1821: for (i=0; i<gen_to->n; i++) {
1822: MPI_Request_free(gen_to->requests + i);
1823: MPI_Request_free(gen_to->rev_requests + i);
1824: }
1826: /*
1827: MPICH could not properly cancel requests thus with ready receiver mode we
1828: cannot free the requests. It may be fixed now, if not then put the following
1829: code inside a if !gen_to->use_readyreceiver) {
1830: */
1831: for (i=0; i<gen_from->n; i++) {
1832: MPI_Request_free(gen_from->requests + i);
1833: MPI_Request_free(gen_from->rev_requests + i);
1834: }
1835: #endif
1836:
1837: PetscFree(gen_to->sstatus);
1838: PetscFree(gen_to->values);
1839: PetscFree(gen_to->rev_requests);
1840: PetscFree(gen_to);
1841: PetscFree(gen_from->values);
1842: PetscFree(gen_from->rev_requests);
1843: PetscFree(gen_from);
1844: PetscLogObjectDestroy(ctx);
1845: PetscHeaderDestroy(ctx);
1846: return(0);
1847: }
1849: /* ==========================================================================================*/
1851: /* create parallel to sequential scatter context */
1852: /*
1853: bs indicates how many elements there are in each block. Normally
1854: this would be 1.
1855: */
1856: int VecScatterCreate_PtoS(int nx,int *inidx,int ny,int *inidy,Vec xin,Vec yin,int bs,VecScatter ctx)
1857: {
1858: VecScatter_MPI_General *from,*to;
1859: int *source,*lens,rank,*owners;
1860: int size,*lowner,*start,lengthy;
1861: int *nprocs,i,j,n,idx,*procs,nsends,nrecvs,*work;
1862: int *owner,*starts,count,tag,slen,ierr;
1863: int *rvalues,*svalues,base,imdex,nmax,*values,len,*indx,nprocslocal;
1864: MPI_Comm comm;
1865: MPI_Request *send_waits,*recv_waits;
1866: MPI_Status recv_status,*send_status;
1867: PetscMap map;
1868: PetscTruth found;
1869:
1871: PetscObjectGetNewTag((PetscObject)ctx,&tag);
1872: PetscObjectGetComm((PetscObject)xin,&comm);
1873: MPI_Comm_rank(comm,&rank);
1874: MPI_Comm_size(comm,&size);
1875: VecGetPetscMap(xin,&map);
1876: PetscMapGetGlobalRange(map,&owners);
1878: VecGetSize(yin,&lengthy);
1880: /* first count number of contributors to each processor */
1881: PetscMalloc(2*size*sizeof(int),&nprocs);
1882: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
1883: procs = nprocs + size;
1884: PetscMalloc((nx+1)*sizeof(int),&owner);
1885: for (i=0; i<nx; i++) {
1886: idx = inidx[i];
1887: found = PETSC_FALSE;
1888: for (j=0; j<size; j++) {
1889: if (idx >= owners[j] && idx < owners[j+1]) {
1890: nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
1891: }
1892: }
1893: if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %d out of range",idx);
1894: }
1895: nprocslocal = nprocs[rank];
1896: nprocs[rank] = procs[rank] = 0;
1897: nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];}
1899: /* inform other processors of number of messages and max length*/
1900: PetscMalloc(2*size*sizeof(int),&work);
1901: ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
1902: nmax = work[rank];
1903: nrecvs = work[size+rank];
1904: ierr = PetscFree(work);
1906: /* post receives: */
1907: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
1908: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1909: for (i=0; i<nrecvs; i++) {
1910: MPI_Irecv((rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1911: }
1913: /* do sends:
1914: 1) starts[i] gives the starting index in svalues for stuff going to
1915: the ith processor
1916: */
1917: PetscMalloc((nx+1)*sizeof(int),&svalues);
1918: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1919: PetscMalloc((size+1)*sizeof(int),&starts);
1920: starts[0] = 0;
1921: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1922: for (i=0; i<nx; i++) {
1923: if (owner[i] != rank) {
1924: svalues[starts[owner[i]]++] = inidx[i];
1925: }
1926: }
1928: starts[0] = 0;
1929: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1930: count = 0;
1931: for (i=0; i<size; i++) {
1932: if (procs[i]) {
1933: MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
1934: }
1935: }
1936: PetscFree(starts);
1938: base = owners[rank];
1940: /* wait on receives */
1941: ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
1942: source = lens + nrecvs;
1943: count = nrecvs;
1944: slen = 0;
1945: while (count) {
1946: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1947: /* unpack receives into our local space */
1948: MPI_Get_count(&recv_status,MPI_INT,&n);
1949: source[imdex] = recv_status.MPI_SOURCE;
1950: lens[imdex] = n;
1951: slen += n;
1952: count--;
1953: }
1954: PetscFree(recv_waits);
1955:
1956: /* allocate entire send scatter context */
1957: PetscNew(VecScatter_MPI_General,&to);
1958: PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst);
1959: PetscMemzero(to,sizeof(VecScatter_MPI_General));
1960: PetscLogObjectMemory(ctx,sizeof(VecScatter_MPI_General));
1961: len = slen*sizeof(int) + bs*slen*sizeof(PetscScalar) + (nrecvs+1)*sizeof(int) +
1962: nrecvs*(sizeof(int) + sizeof(MPI_Request));
1963: to->n = nrecvs;
1964: PetscMalloc(len,&to->values);
1965: PetscLogObjectMemory(ctx,len);
1966: to->requests = (MPI_Request*)(to->values + bs*slen);
1967: to->indices = (int*)(to->requests + nrecvs);
1968: to->starts = (int*)(to->indices + slen);
1969: to->procs = (int*)(to->starts + nrecvs + 1);
1970: ierr = PetscMalloc(2*(PetscMax(nrecvs,nsends)+1)*sizeof(MPI_Status),&to->sstatus);
1971: to->rstatus = to->sstatus + PetscMax(nrecvs,nsends) + 1;
1972: PetscLogObjectMemory(ctx,2*(PetscMax(nrecvs,nsends)+1)*sizeof(MPI_Status));
1973: ctx->todata = (void*)to;
1974: to->starts[0] = 0;
1976: if (nrecvs) {
1977: PetscMalloc(nrecvs*sizeof(int),&indx);
1978: for (i=0; i<nrecvs; i++) indx[i] = i;
1979: PetscSortIntWithPermutation(nrecvs,source,indx);
1981: /* move the data into the send scatter */
1982: for (i=0; i<nrecvs; i++) {
1983: to->starts[i+1] = to->starts[i] + lens[indx[i]];
1984: to->procs[i] = source[indx[i]];
1985: values = rvalues + indx[i]*nmax;
1986: for (j=0; j<lens[indx[i]]; j++) {
1987: to->indices[to->starts[i] + j] = values[j] - base;
1988: }
1989: }
1990: PetscFree(indx);
1991: }
1992: PetscFree(rvalues);
1993: PetscFree(lens);
1994:
1995: /* allocate entire receive scatter context */
1996: PetscNew(VecScatter_MPI_General,&from);
1997: PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&from->sendfirst);
1998: PetscMemzero(from,sizeof(VecScatter_MPI_General));
1999: PetscLogObjectMemory(ctx,sizeof(VecScatter_MPI_General));
2000: len = ny*sizeof(int) + ny*bs*sizeof(PetscScalar) + (nsends+1)*sizeof(int) +
2001: nsends*(sizeof(int) + sizeof(MPI_Request));
2002: from->n = nsends;
2003: PetscMalloc(len,&from->values);
2004: PetscLogObjectMemory(ctx,len);
2005: from->requests = (MPI_Request*)(from->values + bs*ny);
2006: from->indices = (int*)(from->requests + nsends);
2007: from->starts = (int*)(from->indices + ny);
2008: from->procs = (int*)(from->starts + nsends + 1);
2009: ctx->fromdata = (void*)from;
2011: /* move data into receive scatter */
2012: PetscMalloc((size+nsends+1)*sizeof(int),&lowner);
2013: start = lowner + size;
2014: count = 0; from->starts[0] = start[0] = 0;
2015: for (i=0; i<size; i++) {
2016: if (procs[i]) {
2017: lowner[i] = count;
2018: from->procs[count++] = i;
2019: from->starts[count] = start[count] = start[count-1] + nprocs[i];
2020: }
2021: }
2022: for (i=0; i<nx; i++) {
2023: if (owner[i] != rank) {
2024: from->indices[start[lowner[owner[i]]]++] = inidy[i];
2025: if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2026: }
2027: }
2028: PetscFree(lowner);
2029: PetscFree(owner);
2030: PetscFree(nprocs);
2031:
2032: /* wait on sends */
2033: if (nsends) {
2034: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
2035: MPI_Waitall(nsends,send_waits,send_status);
2036: PetscFree(send_status);
2037: }
2038: PetscFree(send_waits);
2039: PetscFree(svalues);
2041: if (nprocslocal) {
2042: int nt;
2043: /* we have a scatter to ourselves */
2044: from->local.n = to->local.n = nt = nprocslocal;
2045: PetscMalloc(nt*sizeof(int),&from->local.slots);
2046: PetscMalloc(nt*sizeof(int),&to->local.slots);
2047: PetscLogObjectMemory(ctx,2*nt*sizeof(int));
2048: nt = 0;
2049: for (i=0; i<nx; i++) {
2050: idx = inidx[i];
2051: if (idx >= owners[rank] && idx < owners[rank+1]) {
2052: to->local.slots[nt] = idx - owners[rank];
2053: from->local.slots[nt++] = inidy[i];
2054: if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2055: }
2056: }
2057: } else {
2058: from->local.n = 0;
2059: from->local.slots = 0;
2060: to->local.n = 0;
2061: to->local.slots = 0;
2062: }
2063: from->local.nonmatching_computed = PETSC_FALSE;
2064: from->local.n_nonmatching = 0;
2065: from->local.slots_nonmatching = 0;
2066: to->local.nonmatching_computed = PETSC_FALSE;
2067: to->local.n_nonmatching = 0;
2068: to->local.slots_nonmatching = 0;
2070: to->type = VEC_SCATTER_MPI_GENERAL;
2071: from->type = VEC_SCATTER_MPI_GENERAL;
2073: from->bs = bs;
2074: to->bs = bs;
2075: if (bs > 1) {
2076: PetscTruth flg,flgs = PETSC_FALSE;
2077: int *sstarts = to->starts, *rstarts = from->starts;
2078: int *sprocs = to->procs, *rprocs = from->procs;
2079: MPI_Request *swaits = to->requests,*rwaits = from->requests;
2080: MPI_Request *rev_swaits,*rev_rwaits;
2081: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
2083: ctx->destroy = VecScatterDestroy_PtoP_X;
2084: ctx->copy = VecScatterCopy_PtoP_X;
2085: ctx->view = VecScatterView_MPI;
2086:
2087: tag = ctx->tag;
2088: comm = ctx->comm;
2090: /* allocate additional wait variables for the "reverse" scatter */
2091: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&rev_rwaits);
2092: to->rev_requests = rev_rwaits;
2093: PetscMalloc((nsends+1)*sizeof(MPI_Request),&rev_swaits);
2094: from->rev_requests = rev_swaits;
2095: PetscLogObjectMemory(ctx,(nsends+nrecvs+2)*sizeof(MPI_Request));
2097: /* Register the receives that you will use later (sends for scatter reverse) */
2098: PetscOptionsHasName(PETSC_NULL,"-vecscatter_ssend",&flgs);
2099: if (flgs) {
2100: PetscLogInfo(0,"VecScatterCreate_PtoS:Using VecScatter Ssend moden");
2101: }
2102: for (i=0; i<from->n; i++) {
2103: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,
2104: comm,rwaits+i);
2105: if (!flgs) {
2106: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,
2107: comm,rev_swaits+i);
2108: } else {
2109: MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,
2110: comm,rev_swaits+i);
2111: }
2112: }
2114: PetscOptionsHasName(PETSC_NULL,"-vecscatter_rr",&flg);
2115: if (flg) {
2116: ctx->postrecvs = VecScatterPostRecvs_PtoP_X;
2117: to->use_readyreceiver = PETSC_TRUE;
2118: from->use_readyreceiver = PETSC_TRUE;
2119: for (i=0; i<to->n; i++) {
2120: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,
2121: comm,swaits+i);
2122: }
2123: PetscLogInfo(0,"VecScatterCreate_PtoS:Using VecScatter ready receiver moden");
2124: } else {
2125: ctx->postrecvs = 0;
2126: to->use_readyreceiver = PETSC_FALSE;
2127: from->use_readyreceiver = PETSC_FALSE;
2128: for (i=0; i<to->n; i++) {
2129: if (!flgs) {
2130: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,
2131: comm,swaits+i);
2132: } else {
2133: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,
2134: comm,swaits+i);
2135: }
2136: }
2137: }
2138: /* Register receives for scatter reverse */
2139: for (i=0; i<to->n; i++) {
2140: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,
2141: comm,rev_rwaits+i);
2142: }
2144: PetscLogInfo(0,"VecScatterCreate_PtoS:Using blocksize %d scattern",bs);
2145: switch (bs) {
2146: case 12:
2147: ctx->begin = VecScatterBegin_PtoP_12;
2148: ctx->end = VecScatterEnd_PtoP_12;
2149: break;
2150: case 5:
2151: ctx->begin = VecScatterBegin_PtoP_5;
2152: ctx->end = VecScatterEnd_PtoP_5;
2153: break;
2154: case 4:
2155: ctx->begin = VecScatterBegin_PtoP_4;
2156: ctx->end = VecScatterEnd_PtoP_4;
2157: break;
2158: case 3:
2159: ctx->begin = VecScatterBegin_PtoP_3;
2160: ctx->end = VecScatterEnd_PtoP_3;
2161: break;
2162: case 2:
2163: ctx->begin = VecScatterBegin_PtoP_2;
2164: ctx->end = VecScatterEnd_PtoP_2;
2165: break;
2166: default:
2167: SETERRQ(PETSC_ERR_SUP,"Blocksize not supported");
2168: }
2169: } else {
2170: ctx->postrecvs = 0;
2171: ctx->destroy = VecScatterDestroy_PtoP;
2172: ctx->begin = VecScatterBegin_PtoP;
2173: ctx->end = VecScatterEnd_PtoP;
2174: ctx->copy = VecScatterCopy_PtoP;
2175: ctx->view = VecScatterView_MPI;
2176: }
2178: /* Check if the local scatter is actually a copy; important special case */
2179: if (nprocslocal) {
2180: VecScatterLocalOptimizeCopy_Private(&to->local,&from->local,bs);
2181: }
2183: return(0);
2184: }
2186: /* ------------------------------------------------------------------------------------*/
2187: /*
2188: Scatter from local Seq vectors to a parallel vector.
2189: */
2190: int VecScatterCreate_StoP(int nx,int *inidx,int ny,int *inidy,Vec yin,VecScatter ctx)
2191: {
2192: VecScatter_MPI_General *from,*to;
2193: int *source,nprocslocal,*lens,rank = yin->stash.rank,*owners = yin->map->range;
2194: int ierr,size = yin->stash.size,*lowner,*start;
2195: int *nprocs,i,j,n,idx,*procs,nsends,nrecvs,*work;
2196: int *owner,*starts,count,tag,slen;
2197: int *rvalues,*svalues,base,imdex,nmax,*values,len;
2198: PetscTruth found;
2199: MPI_Comm comm = yin->comm;
2200: MPI_Request *send_waits,*recv_waits;
2201: MPI_Status recv_status,*send_status;
2204: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2205: /* first count number of contributors to each processor */
2206: PetscMalloc(2*size*sizeof(int),&nprocs);
2207: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
2208: procs = nprocs + size;
2209: PetscMalloc((nx+1)*sizeof(int),&owner);
2210: for (i=0; i<nx; i++) {
2211: idx = inidy[i];
2212: found = PETSC_FALSE;
2213: for (j=0; j<size; j++) {
2214: if (idx >= owners[j] && idx < owners[j+1]) {
2215: nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
2216: }
2217: }
2218: if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %d out of range",idx);
2219: }
2220: nprocslocal = nprocs[rank];
2221: nprocs[rank] = procs[rank] = 0;
2222: nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];}
2224: /* inform other processors of number of messages and max length*/
2225: ierr = PetscMalloc(2*size*sizeof(int),&work);
2226: ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
2227: nmax = work[rank];
2228: nrecvs = work[size+rank];
2229: ierr = PetscFree(work);
2231: /* post receives: */
2232: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
2233: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
2234: for (i=0; i<nrecvs; i++) {
2235: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2236: }
2238: /* do sends:
2239: 1) starts[i] gives the starting index in svalues for stuff going to
2240: the ith processor
2241: */
2242: PetscMalloc((nx+1)*sizeof(int),&svalues);
2243: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
2244: PetscMalloc((size+1)*sizeof(int),&starts);
2245: starts[0] = 0;
2246: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
2247: for (i=0; i<nx; i++) {
2248: if (owner[i] != rank) {
2249: svalues[starts[owner[i]]++] = inidy[i];
2250: }
2251: }
2253: starts[0] = 0;
2254: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
2255: count = 0;
2256: for (i=0; i<size; i++) {
2257: if (procs[i]) {
2258: MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count);
2259: count++;
2260: }
2261: }
2262: PetscFree(starts);
2264: /* allocate entire send scatter context */
2265: PetscNew(VecScatter_MPI_General,&to);
2266: PetscMemzero(to,sizeof(VecScatter_MPI_General));
2267: PetscLogObjectMemory(ctx,sizeof(VecScatter_MPI_General));
2268: PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst);
2269: len = ny*(sizeof(int) + sizeof(PetscScalar)) + (nsends+1)*sizeof(int) +
2270: nsends*(sizeof(int) + sizeof(MPI_Request));
2271: to->n = nsends;
2272: PetscMalloc(len,&to->values);
2273: PetscLogObjectMemory(ctx,len);
2274: to->requests = (MPI_Request*)(to->values + ny);
2275: to->indices = (int*)(to->requests + nsends);
2276: to->starts = (int*)(to->indices + ny);
2277: to->procs = (int*)(to->starts + nsends + 1);
2278: ierr = PetscMalloc((PetscMax(nsends,nrecvs) + 1)*sizeof(MPI_Status),&to->sstatus);
2279: to->rstatus = to->sstatus + PetscMax(nsends,nrecvs) + 1;
2280: PetscLogObjectMemory(ctx,2*(PetscMax(nsends,nrecvs) + 1)*sizeof(MPI_Status));
2281: ctx->todata = (void*)to;
2283: /* move data into send scatter context */
2284: PetscMalloc((size+nsends+1)*sizeof(int),&lowner);
2285: start = lowner + size;
2286: count = 0;
2287: to->starts[0] = start[0] = 0;
2288: for (i=0; i<size; i++) {
2289: if (procs[i]) {
2290: lowner[i] = count;
2291: to->procs[count++] = i;
2292: to->starts[count] = start[count] = start[count-1] + nprocs[i];
2293: }
2294: }
2295: for (i=0; i<nx; i++) {
2296: if (owner[i] != rank) {
2297: to->indices[start[lowner[owner[i]]]++] = inidx[i];
2298: }
2299: }
2300: PetscFree(lowner);
2301: PetscFree(owner);
2302: PetscFree(nprocs);
2304: base = owners[rank];
2306: /* wait on receives */
2307: ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
2308: source = lens + nrecvs;
2309: count = nrecvs; slen = 0;
2310: while (count) {
2311: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2312: /* unpack receives into our local space */
2313: MPI_Get_count(&recv_status,MPI_INT,&n);
2314: source[imdex] = recv_status.MPI_SOURCE;
2315: lens[imdex] = n;
2316: slen += n;
2317: count--;
2318: }
2319: PetscFree(recv_waits);
2320:
2321: /* allocate entire receive scatter context */
2322: PetscNew(VecScatter_MPI_General,&from);
2323: PetscMemzero(from,sizeof(VecScatter_MPI_General));
2324: PetscLogObjectMemory(ctx,sizeof(VecScatter_MPI_General));
2325: PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&from->sendfirst);
2326: len = slen*(sizeof(int) + sizeof(PetscScalar)) + (nrecvs+1)*sizeof(int) +
2327: nrecvs*(sizeof(int) + sizeof(MPI_Request));
2328: from->n = nrecvs;
2329: PetscMalloc(len,&from->values);
2330: PetscLogObjectMemory(ctx,len);
2331: from->requests = (MPI_Request*)(from->values + slen);
2332: from->indices = (int*)(from->requests + nrecvs);
2333: from->starts = (int*)(from->indices + slen);
2334: from->procs = (int*)(from->starts + nrecvs + 1);
2335: ctx->fromdata = (void*)from;
2337: /* move the data into the receive scatter context*/
2338: from->starts[0] = 0;
2339: for (i=0; i<nrecvs; i++) {
2340: from->starts[i+1] = from->starts[i] + lens[i];
2341: from->procs[i] = source[i];
2342: values = rvalues + i*nmax;
2343: for (j=0; j<lens[i]; j++) {
2344: from->indices[from->starts[i] + j] = values[j] - base;
2345: }
2346: }
2347: PetscFree(rvalues);
2348: PetscFree(lens);
2349:
2350: /* wait on sends */
2351: if (nsends) {
2352: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
2353: MPI_Waitall(nsends,send_waits,send_status);
2354: PetscFree(send_status);
2355: }
2356: PetscFree(send_waits);
2357: PetscFree(svalues);
2359: if (nprocslocal) {
2360: int nt;
2361: /* we have a scatter to ourselves */
2362: from->local.n = to->local.n = nt = nprocslocal;
2363: PetscMalloc(nt*sizeof(int),&from->local.slots);
2364: PetscMalloc(nt*sizeof(int),&to->local.slots);
2365: PetscLogObjectMemory(ctx,2*nt*sizeof(int));
2366: nt = 0;
2367: for (i=0; i<ny; i++) {
2368: idx = inidy[i];
2369: if (idx >= owners[rank] && idx < owners[rank+1]) {
2370: from->local.slots[nt] = idx - owners[rank];
2371: to->local.slots[nt++] = inidx[i];
2372: }
2373: }
2374: } else {
2375: from->local.n = 0;
2376: from->local.slots = 0;
2377: to->local.n = 0;
2378: to->local.slots = 0;
2380: }
2381: from->local.nonmatching_computed = PETSC_FALSE;
2382: from->local.n_nonmatching = 0;
2383: from->local.slots_nonmatching = 0;
2384: to->local.nonmatching_computed = PETSC_FALSE;
2385: to->local.n_nonmatching = 0;
2386: to->local.slots_nonmatching = 0;
2388: to->type = VEC_SCATTER_MPI_GENERAL;
2389: from->type = VEC_SCATTER_MPI_GENERAL;
2391: ctx->destroy = VecScatterDestroy_PtoP;
2392: ctx->postrecvs = 0;
2393: ctx->begin = VecScatterBegin_PtoP;
2394: ctx->end = VecScatterEnd_PtoP;
2395: ctx->copy = 0;
2396: ctx->view = VecScatterView_MPI;
2398: to->bs = 1;
2399: from->bs = 1;
2400: return(0);
2401: }
2403: /* ---------------------------------------------------------------------------------*/
2404: int VecScatterCreate_PtoP(int nx,int *inidx,int ny,int *inidy,Vec xin,Vec yin,VecScatter ctx)
2405: {
2406: int *lens,rank,*owners = xin->map->range,size;
2407: int *nprocs,i,j,n,idx,*procs,nsends,nrecvs,*work,*local_inidx,*local_inidy;
2408: int *owner,*starts,count,tag,slen,ierr;
2409: int *rvalues,*svalues,base,imdex,nmax,*values;
2410: MPI_Comm comm;
2411: MPI_Request *send_waits,*recv_waits;
2412: MPI_Status recv_status;
2413: PetscTruth duplicate = PETSC_FALSE,found;
2416: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2417: PetscObjectGetComm((PetscObject)xin,&comm);
2418: MPI_Comm_size(comm,&size);
2419: MPI_Comm_rank(comm,&rank);
2420: if (size == 1) {
2421: VecScatterCreate_StoP(nx,inidx,ny,inidy,yin,ctx);
2422: return(0);
2423: }
2425: /*
2426: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2427: They then call the StoPScatterCreate()
2428: */
2429: /* first count number of contributors to each processor */
2430: ierr = PetscMalloc(2*size*sizeof(int),&nprocs);
2431: procs = nprocs + size;
2432: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
2433: ierr = PetscMalloc((nx+1)*sizeof(int),&owner);
2434: for (i=0; i<nx; i++) {
2435: idx = inidx[i];
2436: found = PETSC_FALSE;
2437: for (j=0; j<size; j++) {
2438: if (idx >= owners[j] && idx < owners[j+1]) {
2439: nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
2440: }
2441: }
2442: if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %d out of range",idx);
2443: }
2444: nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];}
2446: /* inform other processors of number of messages and max length*/
2447: PetscMalloc(2*size*sizeof(int),&work);
2448: ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
2449: nmax = work[rank];
2450: nrecvs = work[size+rank];
2451: ierr = PetscFree(work);
2453: /* post receives: */
2454: PetscMalloc(2*(nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
2455: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
2456: for (i=0; i<nrecvs; i++) {
2457: MPI_Irecv(rvalues+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2458: }
2460: /* do sends:
2461: 1) starts[i] gives the starting index in svalues for stuff going to
2462: the ith processor
2463: */
2464: PetscMalloc(2*(nx+1)*sizeof(int),&svalues);
2465: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
2466: PetscMalloc((size+1)*sizeof(int),&starts);
2467: starts[0] = 0;
2468: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
2469: for (i=0; i<nx; i++) {
2470: svalues[2*starts[owner[i]]] = inidx[i];
2471: svalues[1 + 2*starts[owner[i]]++] = inidy[i];
2472: }
2473: PetscFree(owner);
2475: starts[0] = 0;
2476: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
2477: count = 0;
2478: for (i=0; i<size; i++) {
2479: if (procs[i]) {
2480: MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPI_INT,i,tag,comm,send_waits+count);
2481: count++;
2482: }
2483: }
2484: PetscFree(starts);
2485: PetscFree(nprocs);
2487: base = owners[rank];
2489: /* wait on receives */
2490: PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
2491: count = nrecvs;
2492: slen = 0;
2493: while (count) {
2494: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2495: /* unpack receives into our local space */
2496: MPI_Get_count(&recv_status,MPI_INT,&n);
2497: lens[imdex] = n/2;
2498: slen += n/2;
2499: count--;
2500: }
2501: PetscFree(recv_waits);
2502:
2503: PetscMalloc(2*(slen+1)*sizeof(int),&local_inidx);
2504: local_inidy = local_inidx + slen;
2506: count = 0;
2507: for (i=0; i<nrecvs; i++) {
2508: values = rvalues + 2*i*nmax;
2509: for (j=0; j<lens[i]; j++) {
2510: local_inidx[count] = values[2*j] - base;
2511: local_inidy[count++] = values[2*j+1];
2512: }
2513: }
2514: PetscFree(rvalues);
2515: PetscFree(lens);
2517: /* wait on sends */
2518: if (nsends) {
2519: MPI_Status *send_status;
2520: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
2521: MPI_Waitall(nsends,send_waits,send_status);
2522: PetscFree(send_status);
2523: }
2524: PetscFree(send_waits);
2525: PetscFree(svalues);
2527: /*
2528: should sort and remove duplicates from local_inidx,local_inidy
2529: */
2531: #if defined(do_it_slow)
2532: /* sort on the from index */
2533: PetscSortIntWithArray(slen,local_inidx,local_inidy);
2534: start = 0;
2535: while (start < slen) {
2536: count = start+1;
2537: last = local_inidx[start];
2538: while (count < slen && last == local_inidx[count]) count++;
2539: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2540: /* sort on to index */
2541: PetscSortInt(count-start,local_inidy+start);
2542: }
2543: /* remove duplicates; not most efficient way, but probably good enough */
2544: i = start;
2545: while (i < count-1) {
2546: if (local_inidy[i] != local_inidy[i+1]) {
2547: i++;
2548: } else { /* found a duplicate */
2549: duplicate = PETSC_TRUE;
2550: for (j=i; j<slen-1; j++) {
2551: local_inidx[j] = local_inidx[j+1];
2552: local_inidy[j] = local_inidy[j+1];
2553: }
2554: slen--;
2555: count--;
2556: /* printf("found dup %d %dn",local_inidx[i],local_inidy[i]);*/
2557: }
2558: }
2559: start = count;
2560: }
2561: #endif
2562: if (duplicate) {
2563: PetscLogInfo(0,"VecScatterCreate_PtoP:Duplicate to from indices passed in VecScatterCreate(), they are ignoredn");
2564: }
2565: VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,yin,ctx);
2566: PetscFree(local_inidx);
2567: return(0);
2568: }