Actual source code: pvec2.c
1: /*$Id: pvec2.c,v 1.57 2001/09/11 16:32:01 bsmith Exp $*/
3: /*
4: Code for some of the parallel vector primatives.
5: */
6: #include src/vec/impls/mpi/pvecimpl.h
7: #include src/inline/dot.h
8: #include petscblaslapack.h
10: #define do_not_use_ethernet
11: int Ethernet_Allreduce(PetscReal *in,PetscReal *out,int n,MPI_Datatype type,MPI_Op op,MPI_Comm comm)
12: {
13: int i,rank,size,ierr;
14: MPI_Status status;
17: MPI_Comm_size(comm,&size);
18: MPI_Comm_rank(comm,&rank);
20: if (rank) {
21: MPI_Recv(out,n,MPIU_REAL,rank-1,837,comm,&status);
22: for (i =0; i<n; i++) in[i] += out[i];
23: }
24: if (rank != size - 1) {
25: MPI_Send(in,n,MPIU_REAL,rank+1,837,comm);
26: }
27: if (rank == size-1) {
28: for (i=0; i<n; i++) out[i] = in[i];
29: } else {
30: MPI_Recv(out,n,MPIU_REAL,rank+1,838,comm,&status);
31: }
32: if (rank) {
33: MPI_Send(out,n,MPIU_REAL,rank-1,838,comm);
34: }
35: return 0;
36: }
39: int VecMDot_MPI(int nv,Vec xin,const Vec y[],PetscScalar *z)
40: {
41: PetscScalar awork[128],*work = awork;
42: int ierr;
45: if (nv > 128) {
46: PetscMalloc(nv*sizeof(PetscScalar),&work);
47: }
48: VecMDot_Seq(nv,xin,y,work);
49: MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,xin->comm);
50: if (nv > 128) {
51: PetscFree(work);
52: }
53: return(0);
54: }
56: int VecMTDot_MPI(int nv,Vec xin,const Vec y[],PetscScalar *z)
57: {
58: PetscScalar awork[128],*work = awork;
59: int ierr;
62: if (nv > 128) {
63: PetscMalloc(nv*sizeof(PetscScalar),&work);
64: }
65: VecMTDot_Seq(nv,xin,y,work);
66: MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,xin->comm);
67: if (nv > 128) {
68: PetscFree(work);
69: }
70: return(0);
71: }
73: int VecNorm_MPI(Vec xin,NormType type,PetscReal *z)
74: {
75: Vec_MPI *x = (Vec_MPI*)xin->data;
76: PetscReal sum,work = 0.0;
77: PetscScalar *xx = x->array;
78: int n = xin->n,ierr;
81: if (type == NORM_2) {
83: #if defined(PETSC_HAVE_SLOW_NRM2)
84: #if defined(PETSC_USE_FORTRAN_KERNEL_NORM)
85: fortrannormsqr_(xx,&n,&work);
86: #elif defined(PETSC_USE_UNROLLED_NORM)
87: switch (n & 0x3) {
88: case 3: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
89: case 2: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
90: case 1: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; n -= 4;
91: }
92: while (n>0) {
93: work += PetscRealPart(xx[0]*PetscConj(xx[0])+xx[1]*PetscConj(xx[1])+
94: xx[2]*PetscConj(xx[2])+xx[3]*PetscConj(xx[3]));
95: xx += 4; n -= 4;
96: }
97: #else
98: {int i; for (i=0; i<n; i++) work += PetscRealPart((xx[i])*(PetscConj(xx[i])));}
99: #endif
100: #else
101: {int one = 1;
102: work = BLnrm2_(&n,xx,&one);
103: work *= work;
104: }
105: #endif
106: MPI_Allreduce(&work,&sum,1,MPIU_REAL,MPI_SUM,xin->comm);
107: *z = sqrt(sum);
108: PetscLogFlops(2*xin->n);
109: } else if (type == NORM_1) {
110: /* Find the local part */
111: VecNorm_Seq(xin,NORM_1,&work);
112: /* Find the global max */
113: MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_SUM,xin->comm);
114: } else if (type == NORM_INFINITY) {
115: /* Find the local max */
116: VecNorm_Seq(xin,NORM_INFINITY,&work);
117: /* Find the global max */
118: MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,xin->comm);
119: } else if (type == NORM_1_AND_2) {
120: PetscReal temp[2];
121: VecNorm_Seq(xin,NORM_1,temp);
122: VecNorm_Seq(xin,NORM_2,temp+1);
123: temp[1] = temp[1]*temp[1];
124: MPI_Allreduce(temp,z,2,MPIU_REAL,MPI_SUM,xin->comm);
125: z[1] = sqrt(z[1]);
126: }
127: return(0);
128: }
130: /*
131: These two functions are the MPI reduction operation used for max and min with index
132: The call below to MPI_Op_create() converts the function Vec[Max,Min]_Local() to the
133: MPI operator Vec[Max,Min]_Local_Op.
134: */
135: MPI_Op VecMax_Local_Op = 0;
136: MPI_Op VecMin_Local_Op = 0;
138: EXTERN_C_BEGIN
139: void VecMax_Local(void *in,void *out,int *cnt,MPI_Datatype *datatype)
140: {
141: PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out;
144: if (*datatype != MPIU_REAL) {
145: (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
146: MPI_Abort(MPI_COMM_WORLD,1);
147: }
148: if (xin[0] > xout[0]) {
149: xout[0] = xin[0];
150: xout[1] = xin[1];
151: }
152: PetscStackPop;
153: return; /* cannot return a value */
154: }
155: EXTERN_C_END
157: EXTERN_C_BEGIN
158: void VecMin_Local(void *in,void *out,int *cnt,MPI_Datatype *datatype)
159: {
160: PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out;
163: if (*datatype != MPIU_REAL) {
164: (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
165: MPI_Abort(MPI_COMM_WORLD,1);
166: }
167: if (xin[0] < xout[0]) {
168: xout[0] = xin[0];
169: xout[1] = xin[1];
170: }
171: PetscStackPop;
172: return;
173: }
174: EXTERN_C_END
176: int VecMax_MPI(Vec xin,int *idx,PetscReal *z)
177: {
178: int ierr;
179: PetscReal work;
182: /* Find the local max */
183: VecMax_Seq(xin,idx,&work);
185: /* Find the global max */
186: if (!idx) {
187: MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,xin->comm);
188: } else {
189: PetscReal work2[2],z2[2];
190: int rstart;
192: if (!VecMax_Local_Op) {
193: MPI_Op_create(VecMax_Local,1,&VecMax_Local_Op);
194: }
195:
196: VecGetOwnershipRange(xin,&rstart,PETSC_NULL);
197: work2[0] = work;
198: work2[1] = *idx + rstart;
199: MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMax_Local_Op,xin->comm);
200: *z = z2[0];
201: *idx = (int)z2[1];
202: }
203: return(0);
204: }
206: int VecMin_MPI(Vec xin,int *idx,PetscReal *z)
207: {
208: int ierr;
209: PetscReal work;
212: /* Find the local Min */
213: VecMin_Seq(xin,idx,&work);
215: /* Find the global Min */
216: if (!idx) {
217: MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MIN,xin->comm);
218: } else {
219: PetscReal work2[2],z2[2];
220: int rstart;
222: if (!VecMin_Local_Op) {
223: MPI_Op_create(VecMin_Local,1,&VecMin_Local_Op);
224: }
225:
226: VecGetOwnershipRange(xin,&rstart,PETSC_NULL);
227: work2[0] = work;
228: work2[1] = *idx + rstart;
229: MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMin_Local_Op,xin->comm);
230: *z = z2[0];
231: *idx = (int)z2[1];
232: }
233: return(0);
234: }