Actual source code: pvec2.c

  1: #define PETSCVEC_DLL
  2: /*
  3:      Code for some of the parallel vector primatives.
  4: */
 5:  #include src/vec/impls/mpi/pvecimpl.h
 6:  #include src/inline/dot.h
 7:  #include petscblaslapack.h

  9: #define do_not_use_ethernet
 10: PetscErrorCode Ethernet_Allreduce(PetscReal *in,PetscReal *out,PetscInt n,MPI_Datatype type,MPI_Op op,MPI_Comm comm)
 11: {
 13:   PetscMPIInt    rank,size;
 14:   PetscInt            i;
 15:   MPI_Status     status;


 18:   MPI_Comm_size(comm,&size);
 19:   MPI_Comm_rank(comm,&rank);

 21:   if (rank) {
 22:     MPI_Recv(out,n,MPIU_REAL,rank-1,837,comm,&status);
 23:     for (i =0; i<n; i++) in[i] += out[i];
 24:   }
 25:   if (rank != size - 1) {
 26:     MPI_Send(in,n,MPIU_REAL,rank+1,837,comm);
 27:   }
 28:   if (rank == size-1) {
 29:     for (i=0; i<n; i++) out[i] = in[i];
 30:   } else {
 31:     MPI_Recv(out,n,MPIU_REAL,rank+1,838,comm,&status);
 32:   }
 33:   if (rank) {
 34:     MPI_Send(out,n,MPIU_REAL,rank-1,838,comm);
 35:   }
 36:   return 0;
 37: }


 42: PetscErrorCode VecMDot_MPI(PetscInt nv,Vec xin,const Vec y[],PetscScalar *z)
 43: {
 44:   PetscScalar awork[128],*work = awork;

 48:   if (nv > 128) {
 49:     PetscMalloc(nv*sizeof(PetscScalar),&work);
 50:   }
 51:   VecMDot_Seq(nv,xin,y,work);
 52:   MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,xin->comm);
 53:   if (nv > 128) {
 54:     PetscFree(work);
 55:   }
 56:   return(0);
 57: }

 61: PetscErrorCode VecMTDot_MPI(PetscInt nv,Vec xin,const Vec y[],PetscScalar *z)
 62: {
 63:   PetscScalar awork[128],*work = awork;

 67:   if (nv > 128) {
 68:     PetscMalloc(nv*sizeof(PetscScalar),&work);
 69:   }
 70:   VecMTDot_Seq(nv,xin,y,work);
 71:   MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,xin->comm);
 72:   if (nv > 128) {
 73:     PetscFree(work);
 74:   }
 75:   return(0);
 76: }

 80: PetscErrorCode VecNorm_MPI(Vec xin,NormType type,PetscReal *z)
 81: {
 82:   Vec_MPI        *x = (Vec_MPI*)xin->data;
 83:   PetscReal      sum,work = 0.0;
 84:   PetscScalar    *xx = x->array;
 86:   PetscInt       n = xin->n;

 89:   if (type == NORM_2 || type == NORM_FROBENIUS) {

 91: #if defined(PETSC_HAVE_SLOW_BLAS_NORM2)
 92: #if defined(PETSC_USE_FORTRAN_KERNEL_NORM)
 93:     fortrannormsqr_(xx,&n,&work);
 94: #elif defined(PETSC_USE_UNROLLED_NORM)
 95:     switch (n & 0x3) {
 96:       case 3: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
 97:       case 2: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
 98:       case 1: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; n -= 4;
 99:     }
100:     while (n>0) {
101:       work += PetscRealPart(xx[0]*PetscConj(xx[0])+xx[1]*PetscConj(xx[1])+
102:                         xx[2]*PetscConj(xx[2])+xx[3]*PetscConj(xx[3]));
103:       xx += 4; n -= 4;
104:     }
105: #else
106:     {PetscInt i; for (i=0; i<n; i++) work += PetscRealPart((xx[i])*(PetscConj(xx[i])));}
107: #endif
108: #else
109:     {PetscBLASInt bn = (PetscBLASInt)n, one = 1;
110:       work  = BLASnrm2_(&bn,xx,&one);
111:       work *= work;
112:     }
113: #endif
114:     MPI_Allreduce(&work,&sum,1,MPIU_REAL,MPI_SUM,xin->comm);
115:     *z = sqrt(sum);
116:     PetscLogFlops(2*xin->n);
117:   } else if (type == NORM_1) {
118:     /* Find the local part */
119:     VecNorm_Seq(xin,NORM_1,&work);
120:     /* Find the global max */
121:     MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_SUM,xin->comm);
122:   } else if (type == NORM_INFINITY) {
123:     /* Find the local max */
124:     VecNorm_Seq(xin,NORM_INFINITY,&work);
125:     /* Find the global max */
126:     MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,xin->comm);
127:   } else if (type == NORM_1_AND_2) {
128:     PetscReal temp[2];
129:     VecNorm_Seq(xin,NORM_1,temp);
130:     VecNorm_Seq(xin,NORM_2,temp+1);
131:     temp[1] = temp[1]*temp[1];
132:     MPI_Allreduce(temp,z,2,MPIU_REAL,MPI_SUM,xin->comm);
133:     z[1] = sqrt(z[1]);
134:   }
135:   return(0);
136: }

138: /*
139:        These two functions are the MPI reduction operation used for max and min with index
140:    The call below to MPI_Op_create() converts the function Vec[Max,Min]_Local() to the 
141:    MPI operator Vec[Max,Min]_Local_Op.
142: */
143: MPI_Op VecMax_Local_Op = 0;
144: MPI_Op VecMin_Local_Op = 0;

149: void PETSCVEC_DLLEXPORT VecMax_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
150: {
151:   PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out;

154:   if (*datatype != MPIU_REAL) {
155:     (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
156:     MPI_Abort(MPI_COMM_WORLD,1);
157:   }
158:   if (xin[0] > xout[0]) {
159:     xout[0] = xin[0];
160:     xout[1] = xin[1];
161:   }
162:   PetscFunctionReturnVoid(); /* cannot return a value */
163: }

169: void PETSCVEC_DLLEXPORT VecMin_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
170: {
171:   PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out;

174:   if (*datatype != MPIU_REAL) {
175:     (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
176:     MPI_Abort(MPI_COMM_WORLD,1);
177:   }
178:   if (xin[0] < xout[0]) {
179:     xout[0] = xin[0];
180:     xout[1] = xin[1];
181:   }
182:   PetscFunctionReturnVoid();
183: }

188: PetscErrorCode VecMax_MPI(Vec xin,PetscInt *idx,PetscReal *z)
189: {
191:   PetscReal      work;

194:   /* Find the local max */
195:   VecMax_Seq(xin,idx,&work);

197:   /* Find the global max */
198:   if (!idx) {
199:     MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,xin->comm);
200:   } else {
201:     PetscReal work2[2],z2[2];
202:     PetscInt  rstart;

204:     VecGetOwnershipRange(xin,&rstart,PETSC_NULL);
205:     work2[0] = work;
206:     work2[1] = *idx + rstart;
207:     MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMax_Local_Op,xin->comm);
208:     *z   = z2[0];
209:     *idx = (PetscInt)z2[1];
210:   }
211:   return(0);
212: }

216: PetscErrorCode VecMin_MPI(Vec xin,PetscInt *idx,PetscReal *z)
217: {
219:   PetscReal work;

222:   /* Find the local Min */
223:   VecMin_Seq(xin,idx,&work);

225:   /* Find the global Min */
226:   if (!idx) {
227:     MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MIN,xin->comm);
228:   } else {
229:     PetscReal work2[2],z2[2];
230:     PetscInt       rstart;

232:     VecGetOwnershipRange(xin,&rstart,PETSC_NULL);
233:     work2[0] = work;
234:     work2[1] = *idx + rstart;
235:     MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMin_Local_Op,xin->comm);
236:     *z   = z2[0];
237:     *idx = (PetscInt)z2[1];
238:   }
239:   return(0);
240: }