Actual source code: pvec2.c

  1: /*$Id: pvec2.c,v 1.57 2001/09/11 16:32:01 bsmith Exp $*/

  3: /*
  4:      Code for some of the parallel vector primatives.
  5: */
 6:  #include src/vec/impls/mpi/pvecimpl.h
 7:  #include src/inline/dot.h
 8:  #include petscblaslapack.h

 10: #define do_not_use_ethernet
 11: int Ethernet_Allreduce(PetscReal *in,PetscReal *out,int n,MPI_Datatype type,MPI_Op op,MPI_Comm comm)
 12: {
 13:   int        i,rank,size,ierr;
 14:   MPI_Status status;


 17:   MPI_Comm_size(comm,&size);
 18:   MPI_Comm_rank(comm,&rank);

 20:   if (rank) {
 21:     MPI_Recv(out,n,MPIU_REAL,rank-1,837,comm,&status);
 22:     for (i =0; i<n; i++) in[i] += out[i];
 23:   }
 24:   if (rank != size - 1) {
 25:     MPI_Send(in,n,MPIU_REAL,rank+1,837,comm);
 26:   }
 27:   if (rank == size-1) {
 28:     for (i=0; i<n; i++) out[i] = in[i];
 29:   } else {
 30:     MPI_Recv(out,n,MPIU_REAL,rank+1,838,comm,&status);
 31:   }
 32:   if (rank) {
 33:     MPI_Send(out,n,MPIU_REAL,rank-1,838,comm);
 34:   }
 35:   return 0;
 36: }


 39: int VecMDot_MPI(int nv,Vec xin,const Vec y[],PetscScalar *z)
 40: {
 41:   PetscScalar awork[128],*work = awork;
 42:   int         ierr;

 45:   if (nv > 128) {
 46:     PetscMalloc(nv*sizeof(PetscScalar),&work);
 47:   }
 48:   VecMDot_Seq(nv,xin,y,work);
 49:   MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,xin->comm);
 50:   if (nv > 128) {
 51:     PetscFree(work);
 52:   }
 53:   return(0);
 54: }

 56: int VecMTDot_MPI(int nv,Vec xin,const Vec y[],PetscScalar *z)
 57: {
 58:   PetscScalar awork[128],*work = awork;
 59:   int         ierr;

 62:   if (nv > 128) {
 63:     PetscMalloc(nv*sizeof(PetscScalar),&work);
 64:   }
 65:   VecMTDot_Seq(nv,xin,y,work);
 66:   MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,xin->comm);
 67:   if (nv > 128) {
 68:     PetscFree(work);
 69:   }
 70:   return(0);
 71: }

 73: int VecNorm_MPI(Vec xin,NormType type,PetscReal *z)
 74: {
 75:   Vec_MPI      *x = (Vec_MPI*)xin->data;
 76:   PetscReal    sum,work = 0.0;
 77:   PetscScalar  *xx = x->array;
 78:   int          n = xin->n,ierr;

 81:   if (type == NORM_2) {

 83: #if defined(PETSC_HAVE_SLOW_NRM2)
 84: #if defined(PETSC_USE_FORTRAN_KERNEL_NORM)
 85:     fortrannormsqr_(xx,&n,&work);
 86: #elif defined(PETSC_USE_UNROLLED_NORM)
 87:     switch (n & 0x3) {
 88:       case 3: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
 89:       case 2: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
 90:       case 1: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; n -= 4;
 91:     }
 92:     while (n>0) {
 93:       work += PetscRealPart(xx[0]*PetscConj(xx[0])+xx[1]*PetscConj(xx[1])+
 94:                         xx[2]*PetscConj(xx[2])+xx[3]*PetscConj(xx[3]));
 95:       xx += 4; n -= 4;
 96:     }
 97: #else
 98:     {int i; for (i=0; i<n; i++) work += PetscRealPart((xx[i])*(PetscConj(xx[i])));}
 99: #endif
100: #else
101:     {int one = 1;
102:       work  = BLnrm2_(&n,xx,&one);
103:       work *= work;
104:     }
105: #endif
106:     MPI_Allreduce(&work,&sum,1,MPIU_REAL,MPI_SUM,xin->comm);
107:     *z = sqrt(sum);
108:     PetscLogFlops(2*xin->n);
109:   } else if (type == NORM_1) {
110:     /* Find the local part */
111:     VecNorm_Seq(xin,NORM_1,&work);
112:     /* Find the global max */
113:     MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_SUM,xin->comm);
114:   } else if (type == NORM_INFINITY) {
115:     /* Find the local max */
116:     VecNorm_Seq(xin,NORM_INFINITY,&work);
117:     /* Find the global max */
118:     MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,xin->comm);
119:   } else if (type == NORM_1_AND_2) {
120:     PetscReal temp[2];
121:     VecNorm_Seq(xin,NORM_1,temp);
122:     VecNorm_Seq(xin,NORM_2,temp+1);
123:     temp[1] = temp[1]*temp[1];
124:     MPI_Allreduce(temp,z,2,MPIU_REAL,MPI_SUM,xin->comm);
125:     z[1] = sqrt(z[1]);
126:   }
127:   return(0);
128: }

130: /*
131:        These two functions are the MPI reduction operation used for max and min with index
132:    The call below to MPI_Op_create() converts the function Vec[Max,Min]_Local() to the 
133:    MPI operator Vec[Max,Min]_Local_Op.
134: */
135: MPI_Op VecMax_Local_Op = 0;
136: MPI_Op VecMin_Local_Op = 0;

138: EXTERN_C_BEGIN
139: void VecMax_Local(void *in,void *out,int *cnt,MPI_Datatype *datatype)
140: {
141:   PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out;

144:   if (*datatype != MPIU_REAL) {
145:     (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
146:     MPI_Abort(MPI_COMM_WORLD,1);
147:   }
148:   if (xin[0] > xout[0]) {
149:     xout[0] = xin[0];
150:     xout[1] = xin[1];
151:   }
152:   PetscStackPop;
153:   return; /* cannot return a value */
154: }
155: EXTERN_C_END

157: EXTERN_C_BEGIN
158: void VecMin_Local(void *in,void *out,int *cnt,MPI_Datatype *datatype)
159: {
160:   PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out;

163:   if (*datatype != MPIU_REAL) {
164:     (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
165:     MPI_Abort(MPI_COMM_WORLD,1);
166:   }
167:   if (xin[0] < xout[0]) {
168:     xout[0] = xin[0];
169:     xout[1] = xin[1];
170:   }
171:   PetscStackPop;
172:   return;
173: }
174: EXTERN_C_END

176: int VecMax_MPI(Vec xin,int *idx,PetscReal *z)
177: {
178:   int       ierr;
179:   PetscReal work;

182:   /* Find the local max */
183:   VecMax_Seq(xin,idx,&work);

185:   /* Find the global max */
186:   if (!idx) {
187:     MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,xin->comm);
188:   } else {
189:     PetscReal work2[2],z2[2];
190:     int       rstart;

192:     if (!VecMax_Local_Op) {
193:       MPI_Op_create(VecMax_Local,1,&VecMax_Local_Op);
194:     }
195: 
196:     VecGetOwnershipRange(xin,&rstart,PETSC_NULL);
197:     work2[0] = work;
198:     work2[1] = *idx + rstart;
199:     MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMax_Local_Op,xin->comm);
200:     *z   = z2[0];
201:     *idx = (int)z2[1];
202:   }
203:   return(0);
204: }

206: int VecMin_MPI(Vec xin,int *idx,PetscReal *z)
207: {
208:   int       ierr;
209:   PetscReal work;

212:   /* Find the local Min */
213:   VecMin_Seq(xin,idx,&work);

215:   /* Find the global Min */
216:   if (!idx) {
217:     MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MIN,xin->comm);
218:   } else {
219:     PetscReal work2[2],z2[2];
220:     int       rstart;

222:     if (!VecMin_Local_Op) {
223:       MPI_Op_create(VecMin_Local,1,&VecMin_Local_Op);
224:     }
225: 
226:     VecGetOwnershipRange(xin,&rstart,PETSC_NULL);
227:     work2[0] = work;
228:     work2[1] = *idx + rstart;
229:     MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMin_Local_Op,xin->comm);
230:     *z   = z2[0];
231:     *idx = (int)z2[1];
232:   }
233:   return(0);
234: }