Actual source code: pvec2.c
1: #define PETSCVEC_DLL
2: /*
3: Code for some of the parallel vector primatives.
4: */
5: #include src/vec/impls/mpi/pvecimpl.h
6: #include src/inline/dot.h
7: #include petscblaslapack.h
9: #define do_not_use_ethernet
10: PetscErrorCode Ethernet_Allreduce(PetscReal *in,PetscReal *out,PetscInt n,MPI_Datatype type,MPI_Op op,MPI_Comm comm)
11: {
13: PetscMPIInt rank,size;
14: PetscInt i;
15: MPI_Status status;
18: MPI_Comm_size(comm,&size);
19: MPI_Comm_rank(comm,&rank);
21: if (rank) {
22: MPI_Recv(out,n,MPIU_REAL,rank-1,837,comm,&status);
23: for (i =0; i<n; i++) in[i] += out[i];
24: }
25: if (rank != size - 1) {
26: MPI_Send(in,n,MPIU_REAL,rank+1,837,comm);
27: }
28: if (rank == size-1) {
29: for (i=0; i<n; i++) out[i] = in[i];
30: } else {
31: MPI_Recv(out,n,MPIU_REAL,rank+1,838,comm,&status);
32: }
33: if (rank) {
34: MPI_Send(out,n,MPIU_REAL,rank-1,838,comm);
35: }
36: return 0;
37: }
42: PetscErrorCode VecMDot_MPI(PetscInt nv,Vec xin,const Vec y[],PetscScalar *z)
43: {
44: PetscScalar awork[128],*work = awork;
48: if (nv > 128) {
49: PetscMalloc(nv*sizeof(PetscScalar),&work);
50: }
51: VecMDot_Seq(nv,xin,y,work);
52: MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,xin->comm);
53: if (nv > 128) {
54: PetscFree(work);
55: }
56: return(0);
57: }
61: PetscErrorCode VecMTDot_MPI(PetscInt nv,Vec xin,const Vec y[],PetscScalar *z)
62: {
63: PetscScalar awork[128],*work = awork;
67: if (nv > 128) {
68: PetscMalloc(nv*sizeof(PetscScalar),&work);
69: }
70: VecMTDot_Seq(nv,xin,y,work);
71: MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,xin->comm);
72: if (nv > 128) {
73: PetscFree(work);
74: }
75: return(0);
76: }
80: PetscErrorCode VecNorm_MPI(Vec xin,NormType type,PetscReal *z)
81: {
82: Vec_MPI *x = (Vec_MPI*)xin->data;
83: PetscReal sum,work = 0.0;
84: PetscScalar *xx = x->array;
86: PetscInt n = xin->n;
89: if (type == NORM_2 || type == NORM_FROBENIUS) {
91: #if defined(PETSC_HAVE_SLOW_BLAS_NORM2)
92: #if defined(PETSC_USE_FORTRAN_KERNEL_NORM)
93: fortrannormsqr_(xx,&n,&work);
94: #elif defined(PETSC_USE_UNROLLED_NORM)
95: switch (n & 0x3) {
96: case 3: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
97: case 2: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
98: case 1: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; n -= 4;
99: }
100: while (n>0) {
101: work += PetscRealPart(xx[0]*PetscConj(xx[0])+xx[1]*PetscConj(xx[1])+
102: xx[2]*PetscConj(xx[2])+xx[3]*PetscConj(xx[3]));
103: xx += 4; n -= 4;
104: }
105: #else
106: {PetscInt i; for (i=0; i<n; i++) work += PetscRealPart((xx[i])*(PetscConj(xx[i])));}
107: #endif
108: #else
109: {PetscBLASInt bn = (PetscBLASInt)n, one = 1;
110: work = BLASnrm2_(&bn,xx,&one);
111: work *= work;
112: }
113: #endif
114: MPI_Allreduce(&work,&sum,1,MPIU_REAL,MPI_SUM,xin->comm);
115: *z = sqrt(sum);
116: PetscLogFlops(2*xin->n);
117: } else if (type == NORM_1) {
118: /* Find the local part */
119: VecNorm_Seq(xin,NORM_1,&work);
120: /* Find the global max */
121: MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_SUM,xin->comm);
122: } else if (type == NORM_INFINITY) {
123: /* Find the local max */
124: VecNorm_Seq(xin,NORM_INFINITY,&work);
125: /* Find the global max */
126: MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,xin->comm);
127: } else if (type == NORM_1_AND_2) {
128: PetscReal temp[2];
129: VecNorm_Seq(xin,NORM_1,temp);
130: VecNorm_Seq(xin,NORM_2,temp+1);
131: temp[1] = temp[1]*temp[1];
132: MPI_Allreduce(temp,z,2,MPIU_REAL,MPI_SUM,xin->comm);
133: z[1] = sqrt(z[1]);
134: }
135: return(0);
136: }
138: /*
139: These two functions are the MPI reduction operation used for max and min with index
140: The call below to MPI_Op_create() converts the function Vec[Max,Min]_Local() to the
141: MPI operator Vec[Max,Min]_Local_Op.
142: */
143: MPI_Op VecMax_Local_Op = 0;
144: MPI_Op VecMin_Local_Op = 0;
149: void PETSCVEC_DLLEXPORT VecMax_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
150: {
151: PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out;
154: if (*datatype != MPIU_REAL) {
155: (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
156: MPI_Abort(MPI_COMM_WORLD,1);
157: }
158: if (xin[0] > xout[0]) {
159: xout[0] = xin[0];
160: xout[1] = xin[1];
161: }
162: PetscFunctionReturnVoid(); /* cannot return a value */
163: }
169: void PETSCVEC_DLLEXPORT VecMin_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
170: {
171: PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out;
174: if (*datatype != MPIU_REAL) {
175: (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
176: MPI_Abort(MPI_COMM_WORLD,1);
177: }
178: if (xin[0] < xout[0]) {
179: xout[0] = xin[0];
180: xout[1] = xin[1];
181: }
182: PetscFunctionReturnVoid();
183: }
188: PetscErrorCode VecMax_MPI(Vec xin,PetscInt *idx,PetscReal *z)
189: {
191: PetscReal work;
194: /* Find the local max */
195: VecMax_Seq(xin,idx,&work);
197: /* Find the global max */
198: if (!idx) {
199: MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,xin->comm);
200: } else {
201: PetscReal work2[2],z2[2];
202: PetscInt rstart;
204: VecGetOwnershipRange(xin,&rstart,PETSC_NULL);
205: work2[0] = work;
206: work2[1] = *idx + rstart;
207: MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMax_Local_Op,xin->comm);
208: *z = z2[0];
209: *idx = (PetscInt)z2[1];
210: }
211: return(0);
212: }
216: PetscErrorCode VecMin_MPI(Vec xin,PetscInt *idx,PetscReal *z)
217: {
219: PetscReal work;
222: /* Find the local Min */
223: VecMin_Seq(xin,idx,&work);
225: /* Find the global Min */
226: if (!idx) {
227: MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MIN,xin->comm);
228: } else {
229: PetscReal work2[2],z2[2];
230: PetscInt rstart;
232: VecGetOwnershipRange(xin,&rstart,PETSC_NULL);
233: work2[0] = work;
234: work2[1] = *idx + rstart;
235: MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMin_Local_Op,xin->comm);
236: *z = z2[0];
237: *idx = (PetscInt)z2[1];
238: }
239: return(0);
240: }