Actual source code: ex5s.c
1: /*$Id: ex5s.c,v 1.29 2001/08/07 21:31:17 bsmith Exp $*/
3: static char help[] = "2d Bratu problem in shared memory parallel with SNES.n
4: We solve the Bratu (SFI - solid fuel ignition) problem in a 2D rectangularn
5: domain, uses SHARED MEMORY to evaluate the user function.n
6: The command line options include:n
7: -par <parameter>, where <parameter> indicates the problem's nonlinearityn
8: problem SFI: <parameter> = Bratu parameter (0 <= par <= 6.81)n
9: -mx <xg>, where <xg> = number of grid points in the x-directionn
10: -my <yg>, where <yg> = number of grid points in the y-directionn
11: -use_fortran_function: use Fortran coded function, rather than Cn";
13: /*
14: This code compiles ONLY on SGI systems
15: ========================================
16: */
17: /*T
18: Concepts: SNES^parallel Bratu example
19: Concepts: shared memory
20: Processors: n
21: T*/
23: /*
25: Programming model: Combination of
26: 1) MPI message passing for PETSc routines
27: 2) automatic loop parallism (using shared memory) for user
28: provided function.
30: While the user function is being evaluated all MPI processes except process
31: 0 blocks. Process zero spawns nt threads to evaluate the user function. Once
32: the user function is complete, the worker threads are suspended and all the MPI processes
33: continue.
35: Other useful options:
37: -snes_mf : use matrix free operator and no preconditioner
38: -snes_mf_operator : use matrix free operator but compute Jacobian via
39: finite differences to form preconditioner
41: Environmental variable:
43: setenv MPC_NUM_THREADS nt <- set number of threads processor 0 should
44: use to evaluate user provided function
46: Note: The number of MPI processes (set with the mpirun option -np) can
47: be set completely independently from the number of threads process 0
48: uses to evaluate the function (though usually one would make them the same).
49: */
50:
51: /* ------------------------------------------------------------------------
53: Solid Fuel Ignition (SFI) problem. This problem is modeled by
54: the partial differential equation
55:
56: -Laplacian u - lambda*exp(u) = 0, 0 < x,y < 1,
57:
58: with boundary conditions
59:
60: u = 0 for x = 0, x = 1, y = 0, y = 1.
61:
62: A finite difference approximation with the usual 5-point stencil
63: is used to discretize the boundary value problem to obtain a nonlinear
64: system of equations.
66: The uniprocessor version of this code is snes/examples/tutorials/ex4.c
67: A parallel distributed memory version is snes/examples/tutorials/ex5.c and ex5f.F
69: ------------------------------------------------------------------------- */
71: /*
72: Include "petscsnes.h" so that we can use SNES solvers. Note that this
73: file automatically includes:
74: petsc.h - base PETSc routines petscvec.h - vectors
75: petscsys.h - system routines petscmat.h - matrices
76: petscis.h - index sets petscksp.h - Krylov subspace methods
77: petscviewer.h - viewers petscpc.h - preconditioners
78: petscsles.h - linear solvers
79: */
80: #include petscsnes.h
82: /*
83: User-defined application context - contains data needed by the
84: application-provided call-back routines FormFunction().
85: */
86: typedef struct {
87: PetscReal param; /* test problem parameter */
88: int mx,my; /* discretization in x, y directions */
89: int rank; /* processor rank */
90: } AppCtx;
92: /*
93: User-defined routines
94: */
95: extern int FormFunction(SNES,Vec,Vec,void*),FormInitialGuess(AppCtx*,Vec);
96: extern int FormFunctionFortran(SNES,Vec,Vec,void*);
98: /*
99: The main program is written in C while the user provided function
100: is given in both Fortran and C. The main program could also be written
101: in Fortran; the ONE PROBLEM is that VecGetArray() cannot be called from
102: Fortran on the SGI machines; thus the routine FormFunctionFortran() must
103: be written in C.
104: */
105: int main(int argc,char **argv)
106: {
107: SNES snes; /* nonlinear solver */
108: Vec x,r; /* solution, residual vectors */
109: AppCtx user; /* user-defined work context */
110: int its; /* iterations for convergence */
111: int N,ierr,rstart,rend,*colors,i,ii,ri,rj;
112: int (*fnc)(SNES,Vec,Vec,void*);
113: PetscReal bratu_lambda_max = 6.81,bratu_lambda_min = 0.;
114: MatFDColoring fdcoloring;
115: ISColoring iscoloring;
116: Mat J;
117: PetscScalar zero = 0.0;
118: PetscTruth flg;
120: PetscInitialize(&argc,&argv,(char *)0,help);
121: MPI_Comm_rank(PETSC_COMM_WORLD,&user.rank);
123: /*
124: Initialize problem parameters
125: */
126: user.mx = 4; user.my = 4; user.param = 6.0;
127: PetscOptionsGetInt(PETSC_NULL,"-mx",&user.mx,PETSC_NULL);
128: PetscOptionsGetInt(PETSC_NULL,"-my",&user.my,PETSC_NULL);
129: PetscOptionsGetReal(PETSC_NULL,"-par",&user.param,PETSC_NULL);
130: if (user.param >= bratu_lambda_max || user.param <= bratu_lambda_min) {
131: SETERRQ(1,"Lambda is out of range");
132: }
133: N = user.mx*user.my;
135: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
136: Create nonlinear solver context
137: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
139: SNESCreate(PETSC_COMM_WORLD,&snes);
141: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
142: Create vector data structures; set function evaluation routine
143: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
145: /*
146: The routine VecCreateShared() creates a parallel vector with each processor
147: assigned its own segment, BUT, in addition, the first processor has access to the
148: entire array. This is to allow the users function to be based on loop level
149: parallelism rather than MPI.
150: */
151: VecCreateShared(PETSC_COMM_WORLD,PETSC_DECIDE,N,&x);
152: VecDuplicate(x,&r);
154: PetscOptionsHasName(PETSC_NULL,"-use_fortran_function",&flg);
155: if (flg) {
156: fnc = FormFunctionFortran;
157: } else {
158: fnc = FormFunction;
159: }
161: /*
162: Set function evaluation routine and vector
163: */
164: SNESSetFunction(snes,r,fnc,&user);
166: /*
167: Currently when using VecCreateShared() and using loop level parallelism
168: to automatically parallelise the user function it makes no sense for the
169: Jacobian to be computed via loop level parallelism, because all the threads
170: would be simultaneously calling MatSetValues() causing a bottle-neck.
172: Thus this example uses the PETSc Jacobian calculations via finite differencing
173: to approximate the Jacobian
174: */
176: /*
178: */
179: VecGetOwnershipRange(r,&rstart,&rend);
180: PetscMalloc((rend-rstart)*sizeof(int),&colors);
181: for (i=rstart; i<rend; i++) {
182: colors[i - rstart] = 3*((i/user.mx) % 3) + (i % 3);
183: }
184: ierr = ISColoringCreate(PETSC_COMM_WORLD,rend-rstart,colors,&iscoloring);
185: PetscFree(colors);
187: /*
188: Create and set the nonzero pattern for the Jacobian: This is not done
189: particularly efficiently. One should process the boundary nodes seperately and
190: then use a simple loop for the interior nodes.
191: Note that for this code we use the "natural" number of the nodes on the
192: grid (since that is what is good for the user provided function). In the
193: DA examples we must use the DA numbering where each processor is assigned a
194: chunk of data.
195: */
196: MatCreateMPIAIJ(PETSC_COMM_WORLD,rend-rstart,rend-rstart,N,
197: N,5,0,0,0,&J);
198: for (i=rstart; i<rend; i++) {
199: rj = i % user.mx; /* column in grid */
200: ri = i / user.mx; /* row in grid */
201: if (ri != 0) { /* first row does not have neighbor below */
202: ii = i - user.mx;
203: MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
204: }
205: if (ri != user.my - 1) { /* last row does not have neighbors above */
206: ii = i + user.mx;
207: MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
208: }
209: if (rj != 0) { /* first column does not have neighbor to left */
210: ii = i - 1;
211: MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
212: }
213: if (rj != user.mx - 1) { /* last column does not have neighbor to right */
214: ii = i + 1;
215: MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
216: }
217: MatSetValues(J,1,&i,1,&i,&zero,INSERT_VALUES);
218: }
219: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
220: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
222: /*
223: Create the data structure that SNESDefaultComputeJacobianColor() uses
224: to compute the actual Jacobians via finite differences.
225: */
226: MatFDColoringCreate(J,iscoloring,&fdcoloring);
227: MatFDColoringSetFunction(fdcoloring,(int (*)(void))fnc,&user);
228: MatFDColoringSetFromOptions(fdcoloring);
229: /*
230: Tell SNES to use the routine SNESDefaultComputeJacobianColor()
231: to compute Jacobians.
232: */
233: SNESSetJacobian(snes,J,J,SNESDefaultComputeJacobianColor,fdcoloring);
234: ISColoringDestroy(iscoloring);
237: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
238: Customize nonlinear solver; set runtime options
239: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
241: /*
242: Set runtime options (e.g., -snes_monitor -snes_rtol <rtol> -ksp_type <type>)
243: */
244: SNESSetFromOptions(snes);
246: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
247: Evaluate initial guess; then solve nonlinear system
248: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
249: /*
250: Note: The user should initialize the vector, x, with the initial guess
251: for the nonlinear solver prior to calling SNESSolve(). In particular,
252: to employ an initial guess of zero, the user should explicitly set
253: this vector to zero by calling VecSet().
254: */
255: FormInitialGuess(&user,x);
256: SNESSolve(snes,x,&its);
257: PetscPrintf(PETSC_COMM_WORLD,"Number of Newton iterations = %dn",its);
259: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
260: Free work space. All PETSc objects should be destroyed when they
261: are no longer needed.
262: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
263: VecDestroy(x);
264: VecDestroy(r);
265: SNESDestroy(snes);
266: PetscFinalize();
268: return 0;
269: }
270: /* ------------------------------------------------------------------- */
272: /*
273: FormInitialGuess - Forms initial approximation.
275: Input Parameters:
276: user - user-defined application context
277: X - vector
279: Output Parameter:
280: X - vector
281: */
282: int FormInitialGuess(AppCtx *user,Vec X)
283: {
284: int i,j,row,mx,my,ierr;
285: PetscReal one = 1.0,lambda,temp1,temp,hx,hy,hxdhy,hydhx,sc;
286: PetscScalar *x;
288: /*
289: Process 0 has to wait for all other processes to get here
290: before proceeding to write in the shared vector
291: */
292: PetscBarrier((PetscObject)X);
293: if (user->rank) {
294: /*
295: All the non-busy processors have to wait here for process 0 to finish
296: evaluating the function; otherwise they will start using the vector values
297: before they have been computed
298: */
299: PetscBarrier((PetscObject)X);
300: return 0;
301: }
303: mx = user->mx; my = user->my; lambda = user->param;
304: hx = one/(PetscReal)(mx-1); hy = one/(PetscReal)(my-1);
305: sc = hx*hy*lambda; hxdhy = hx/hy; hydhx = hy/hx;
306: temp1 = lambda/(lambda + one);
308: /*
309: Get a pointer to vector data.
310: - For default PETSc vectors, VecGetArray() returns a pointer to
311: the data array. Otherwise, the routine is implementation dependent.
312: - You MUST call VecRestoreArray() when you no longer need access to
313: the array.
314: */
315: VecGetArray(X,&x);
317: /*
318: Compute initial guess over the locally owned part of the grid
319: */
320: #pragma arl(4)
321: #pragma distinct (*x,*f)
322: #pragma no side effects (sqrt)
323: for (j=0; j<my; j++) {
324: temp = (PetscReal)(PetscMin(j,my-j-1))*hy;
325: for (i=0; i<mx; i++) {
326: row = i + j*mx;
327: if (i == 0 || j == 0 || i == mx-1 || j == my-1) {
328: x[row] = 0.0;
329: continue;
330: }
331: x[row] = temp1*sqrt(PetscMin((PetscReal)(PetscMin(i,mx-i-1))*hx,temp));
332: }
333: }
335: /*
336: Restore vector
337: */
338: VecRestoreArray(X,&x);
340: PetscBarrier((PetscObject)X);
341: return 0;
342: }
343: /* ------------------------------------------------------------------- */
344: /*
345: FormFunction - Evaluates nonlinear function, F(x).
347: Input Parameters:
348: . snes - the SNES context
349: . X - input vector
350: . ptr - optional user-defined context, as set by SNESSetFunction()
352: Output Parameter:
353: . F - function vector
354: */
355: int FormFunction(SNES snes,Vec X,Vec F,void *ptr)
356: {
357: AppCtx *user = (AppCtx*)ptr;
358: int ierr,i,j,row,mx,my;
359: PetscReal two = 2.0,one = 1.0,lambda,hx,hy,hxdhy,hydhx,sc;
360: PetscScalar u,uxx,uyy,*x,*f;
362: /*
363: Process 0 has to wait for all other processes to get here
364: before proceeding to write in the shared vector
365: */
366: PetscBarrier((PetscObject)X);
368: if (user->rank) {
369: /*
370: All the non-busy processors have to wait here for process 0 to finish
371: evaluating the function; otherwise they will start using the vector values
372: before they have been computed
373: */
374: PetscBarrier((PetscObject)X);
375: return 0;
376: }
378: mx = user->mx; my = user->my; lambda = user->param;
379: hx = one/(PetscReal)(mx-1); hy = one/(PetscReal)(my-1);
380: sc = hx*hy*lambda; hxdhy = hx/hy; hydhx = hy/hx;
382: /*
383: Get pointers to vector data
384: */
385: VecGetArray(X,&x);
386: VecGetArray(F,&f);
388: /*
389: The next line tells the SGI compiler that x and f contain no overlapping
390: regions and thus it can use addition optimizations.
391: */
392: #pragma arl(4)
393: #pragma distinct (*x,*f)
394: #pragma no side effects (exp)
396: /*
397: Compute function over the entire grid
398: */
399: for (j=0; j<my; j++) {
400: for (i=0; i<mx; i++) {
401: row = i + j*mx;
402: if (i == 0 || j == 0 || i == mx-1 || j == my-1) {
403: f[row] = x[row];
404: continue;
405: }
406: u = x[row];
407: uxx = (two*u - x[row-1] - x[row+1])*hydhx;
408: uyy = (two*u - x[row-mx] - x[row+mx])*hxdhy;
409: f[row] = uxx + uyy - sc*exp(u);
410: }
411: }
413: /*
414: Restore vectors
415: */
416: VecRestoreArray(X,&x);
417: VecRestoreArray(F,&f);
419: PetscLogFlops(11*(mx-2)*(my-2))
420: PetscBarrier((PetscObject)X);
421: return 0;
422: }
424: #if defined(PETSC_HAVE_FORTRAN_CAPS)
425: #define applicationfunctionfortran_ APPLICATIONFUNCTIONFORTRAN
426: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
427: #define applicationfunctionfortran_ applicationfunctionfortran
428: #endif
430: /* ------------------------------------------------------------------- */
431: /*
432: FormFunctionFortran - Evaluates nonlinear function, F(x) in Fortran.
434: */
435: int FormFunctionFortran(SNES snes,Vec X,Vec F,void *ptr)
436: {
437: AppCtx *user = (AppCtx*)ptr;
438: int ierr;
439: PetscScalar *x,*f;
441: /*
442: Process 0 has to wait for all other processes to get here
443: before proceeding to write in the shared vector
444: */
445: PetscBarrier((PetscObject)snes);
446: if (!user->rank) {
447: VecGetArray(X,&x);
448: VecGetArray(F,&f);
449: applicationfunctionfortran_(&user->param,&user->mx,&user->my,x,f,&ierr);
450: VecRestoreArray(X,&x);
451: VecRestoreArray(F,&f);
452: PetscLogFlops(11*(user->mx-2)*(user->my-2))
453: }
454: /*
455: All the non-busy processors have to wait here for process 0 to finish
456: evaluating the function; otherwise they will start using the vector values
457: before they have been computed
458: */
459: PetscBarrier((PetscObject)snes);
460: return 0;
461: }