Actual source code: matptap.c
1: /*
2: Defines projective product routines where A is a SeqAIJ matrix
3: C = P^T * A * P
4: */
6: #include src/mat/impls/aij/seq/aij.h
7: #include src/mat/utils/freespace.h
9: int MatSeqAIJPtAP(Mat,Mat,Mat*);
10: int MatSeqAIJPtAPSymbolic(Mat,Mat,Mat*);
11: int MatSeqAIJPtAPNumeric(Mat,Mat,Mat);
13: static int MATSeqAIJ_PtAP = 0;
14: static int MATSeqAIJ_PtAPSymbolic = 0;
15: static int MATSeqAIJ_PtAPNumeric = 0;
17: /*
18: MatSeqAIJPtAP - Creates the SeqAIJ matrix product, C,
19: of SeqAIJ matrix A and matrix P:
20: C = P^T * A * P;
22: Note: C is assumed to be uncreated.
23: If this is not the case, Destroy C before calling this routine.
24: */
25: int MatSeqAIJPtAP(Mat A,Mat P,Mat *C) {
27: char funct[80];
31: PetscLogEventBegin(MATSeqAIJ_PtAP,A,P,0,0);
33: MatSeqAIJPtAPSymbolic(A,P,C);
35: /* Avoid additional error checking included in */
36: /* MatSeqAIJApplyPtAPNumeric(A,P,*C); */
38: /* Query A for ApplyPtAPNumeric implementation based on types of P */
39: PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_");
40: PetscStrcat(funct,P->type_name);
41: PetscTryMethod(A,funct,(Mat,Mat,Mat),(A,P,*C));
43: PetscLogEventEnd(MATSeqAIJ_PtAP,A,P,0,0);
45: return(0);
46: }
48: /*
49: MatSeqAIJPtAPSymbolic - Creates the (i,j) structure of the SeqAIJ matrix product, C,
50: of SeqAIJ matrix A and matrix P, according to:
51: C = P^T * A * P;
53: Note: C is assumed to be uncreated.
54: If this is not the case, Destroy C before calling this routine.
55: */
56: int MatSeqAIJPtAPSymbolic(Mat A,Mat P,Mat *C) {
58: char funct[80];
66: MatPreallocated(A);
67: if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
68: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
72: MatPreallocated(P);
73: if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
74: if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
76: if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
77: if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
79: /* Query A for ApplyPtAP implementation based on types of P */
80: PetscStrcpy(funct,"MatApplyPtAPSymbolic_seqaij_");
81: PetscStrcat(funct,P->type_name);
82: PetscTryMethod(A,funct,(Mat,Mat,Mat*),(A,P,C));
84: return(0);
85: }
87: EXTERN_C_BEGIN
88: int MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
89: int ierr;
90: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
91: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
92: int aishift=a->indexshift,pishift=p->indexshift;
93: int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
94: int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
95: int an=A->N,am=A->M,pn=P->N,pm=P->M;
96: int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
97: MatScalar *ca;
101: /* some error checking which could be moved into interface layer */
102: if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
103:
104: /* Start timer */
105: PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,P,0,0);
107: /* Get ij structure of P^T */
108: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
109: ptJ=ptj;
111: /* Allocate ci array, arrays for fill computation and */
112: /* free space for accumulating nonzero column info */
113: PetscMalloc((pn+1)*sizeof(int),&ci);
114: ci[0] = 0;
116: PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);
117: PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));
118: ptasparserow = ptadenserow + an;
119: denserow = ptasparserow + an;
120: sparserow = denserow + pn;
122: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
123: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
124: ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space);
125: current_space = free_space;
127: /* Determine symbolic info for each row of C: */
128: for (i=0;i<pn;i++) {
129: ptnzi = pti[i+1] - pti[i];
130: ptanzi = 0;
131: /* Determine symbolic row of PtA: */
132: for (j=0;j<ptnzi;j++) {
133: arow = *ptJ++;
134: anzj = ai[arow+1] - ai[arow];
135: ajj = aj + ai[arow];
136: for (k=0;k<anzj;k++) {
137: if (!ptadenserow[ajj[k]]) {
138: ptadenserow[ajj[k]] = -1;
139: ptasparserow[ptanzi++] = ajj[k];
140: }
141: }
142: }
143: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
144: ptaj = ptasparserow;
145: cnzi = 0;
146: for (j=0;j<ptanzi;j++) {
147: prow = *ptaj++;
148: pnzj = pi[prow+1] - pi[prow];
149: pjj = pj + pi[prow];
150: for (k=0;k<pnzj;k++) {
151: if (!denserow[pjj[k]]) {
152: denserow[pjj[k]] = -1;
153: sparserow[cnzi++] = pjj[k];
154: }
155: }
156: }
158: /* sort sparserow */
159: PetscSortInt(cnzi,sparserow);
160:
161: /* If free space is not available, make more free space */
162: /* Double the amount of total space in the list */
163: if (current_space->local_remaining<cnzi) {
164: GetMoreSpace(current_space->total_array_size,¤t_space);
165: }
167: /* Copy data into free space, and zero out denserows */
168: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));
169: current_space->array += cnzi;
170: current_space->local_used += cnzi;
171: current_space->local_remaining -= cnzi;
172:
173: for (j=0;j<ptanzi;j++) {
174: ptadenserow[ptasparserow[j]] = 0;
175: }
176: for (j=0;j<cnzi;j++) {
177: denserow[sparserow[j]] = 0;
178: }
179: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
180: /* For now, we will recompute what is needed. */
181: ci[i+1] = ci[i] + cnzi;
182: }
183: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
184: /* Allocate space for cj, initialize cj, and */
185: /* destroy list of free space and other temporary array(s) */
186: PetscMalloc((ci[pn]+1)*sizeof(int),&cj);
187: MakeSpaceContiguous(&free_space,cj);
188: PetscFree(ptadenserow);
189:
190: /* Allocate space for ca */
191: PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);
192: PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));
193:
194: /* put together the new matrix */
195: MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);
197: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
198: /* Since these are PETSc arrays, change flags to free them as necessary. */
199: c = (Mat_SeqAIJ *)((*C)->data);
200: c->freedata = PETSC_TRUE;
201: c->nonew = 0;
203: /* Clean up. */
204: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
206: PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,P,0,0);
207: return(0);
208: }
209: EXTERN_C_END
211: #include src/mat/impls/maij/maij.h
212: EXTERN_C_BEGIN
213: int MatApplyPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
214: int ierr;
215: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
216: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
217: Mat P=pp->AIJ;
218: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
219: int aishift=a->indexshift,pishift=p->indexshift;
220: int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
221: int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
222: int an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
223: int i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
224: MatScalar *ca;
228: /* some error checking which could be moved into interface layer */
229: if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
230:
231: /* Start timer */
232: PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);
234: /* Get ij structure of P^T */
235: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
237: /* Allocate ci array, arrays for fill computation and */
238: /* free space for accumulating nonzero column info */
239: PetscMalloc((pn+1)*sizeof(int),&ci);
240: ci[0] = 0;
242: PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);
243: PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));
244: ptasparserow = ptadenserow + an;
245: denserow = ptasparserow + an;
246: sparserow = denserow + pn;
248: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
249: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
250: ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space);
251: current_space = free_space;
253: /* Determine symbolic info for each row of C: */
254: for (i=0;i<pn/ppdof;i++) {
255: ptnzi = pti[i+1] - pti[i];
256: ptanzi = 0;
257: ptJ = ptj + pti[i];
258: for (dof=0;dof<ppdof;dof++) {
259: /* Determine symbolic row of PtA: */
260: for (j=0;j<ptnzi;j++) {
261: arow = ptJ[j] + dof;
262: anzj = ai[arow+1] - ai[arow];
263: ajj = aj + ai[arow];
264: for (k=0;k<anzj;k++) {
265: if (!ptadenserow[ajj[k]]) {
266: ptadenserow[ajj[k]] = -1;
267: ptasparserow[ptanzi++] = ajj[k];
268: }
269: }
270: }
271: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
272: ptaj = ptasparserow;
273: cnzi = 0;
274: for (j=0;j<ptanzi;j++) {
275: pdof = *ptaj%dof;
276: prow = (*ptaj++)/dof;
277: pnzj = pi[prow+1] - pi[prow];
278: pjj = pj + pi[prow];
279: for (k=0;k<pnzj;k++) {
280: if (!denserow[pjj[k]+pdof]) {
281: denserow[pjj[k]+pdof] = -1;
282: sparserow[cnzi++] = pjj[k]+pdof;
283: }
284: }
285: }
287: /* sort sparserow */
288: PetscSortInt(cnzi,sparserow);
289:
290: /* If free space is not available, make more free space */
291: /* Double the amount of total space in the list */
292: if (current_space->local_remaining<cnzi) {
293: GetMoreSpace(current_space->total_array_size,¤t_space);
294: }
296: /* Copy data into free space, and zero out denserows */
297: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));
298: current_space->array += cnzi;
299: current_space->local_used += cnzi;
300: current_space->local_remaining -= cnzi;
302: for (j=0;j<ptanzi;j++) {
303: ptadenserow[ptasparserow[j]] = 0;
304: }
305: for (j=0;j<cnzi;j++) {
306: denserow[sparserow[j]] = 0;
307: }
308: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
309: /* For now, we will recompute what is needed. */
310: ci[i+1+dof] = ci[i+dof] + cnzi;
311: }
312: }
313: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
314: /* Allocate space for cj, initialize cj, and */
315: /* destroy list of free space and other temporary array(s) */
316: PetscMalloc((ci[pn]+1)*sizeof(int),&cj);
317: MakeSpaceContiguous(&free_space,cj);
318: PetscFree(ptadenserow);
319:
320: /* Allocate space for ca */
321: PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);
322: PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));
323:
324: /* put together the new matrix */
325: MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);
327: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
328: /* Since these are PETSc arrays, change flags to free them as necessary. */
329: c = (Mat_SeqAIJ *)((*C)->data);
330: c->freedata = PETSC_TRUE;
331: c->nonew = 0;
333: /* Clean up. */
334: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
336: PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);
337: return(0);
338: }
339: EXTERN_C_END
341: /*
342: MatSeqAIJPtAPNumeric - Computes the SeqAIJ matrix product, C,
343: of SeqAIJ matrix A and matrix P, according to:
344: C = P^T * A * P
345: Note: C must have been created by calling MatSeqAIJApplyPtAPSymbolic.
346: */
347: int MatSeqAIJPtAPNumeric(Mat A,Mat P,Mat C) {
349: char funct[80];
355: MatPreallocated(A);
356: if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
357: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
361: MatPreallocated(P);
362: if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
363: if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
367: MatPreallocated(C);
368: if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
369: if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
371: if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
372: if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
373: if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
374: if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
376: /* Query A for ApplyPtAP implementation based on types of P */
377: PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_");
378: PetscStrcat(funct,P->type_name);
379: PetscTryMethod(A,funct,(Mat,Mat,Mat),(A,P,C));
381: return(0);
382: }
384: EXTERN_C_BEGIN
385: int MatApplyPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) {
386: int ierr,flops=0;
387: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
388: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
389: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
390: int aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift;
391: int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
392: int *ci=c->i,*cj=c->j,*cjj;
393: int am=A->M,cn=C->N,cm=C->M;
394: int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
395: MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
399: /* Currently not for shifted matrices! */
400: if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
402: PetscLogEventBegin(MATSeqAIJ_PtAPNumeric,A,P,C,0);
404: /* Allocate temporary array for storage of one row of A*P */
405: PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);
406: PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));
408: apj = (int *)(apa + cn);
409: apjdense = apj + cn;
411: /* Clear old values in C */
412: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
414: for (i=0;i<am;i++) {
415: /* Form sparse row of A*P */
416: anzi = ai[i+1] - ai[i];
417: apnzj = 0;
418: for (j=0;j<anzi;j++) {
419: prow = *aj++;
420: pnzj = pi[prow+1] - pi[prow];
421: pjj = pj + pi[prow];
422: paj = pa + pi[prow];
423: for (k=0;k<pnzj;k++) {
424: if (!apjdense[pjj[k]]) {
425: apjdense[pjj[k]] = -1;
426: apj[apnzj++] = pjj[k];
427: }
428: apa[pjj[k]] += (*aa)*paj[k];
429: }
430: flops += 2*pnzj;
431: aa++;
432: }
434: /* Sort the j index array for quick sparse axpy. */
435: PetscSortInt(apnzj,apj);
437: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
438: pnzi = pi[i+1] - pi[i];
439: for (j=0;j<pnzi;j++) {
440: nextap = 0;
441: crow = *pJ++;
442: cjj = cj + ci[crow];
443: caj = ca + ci[crow];
444: /* Perform sparse axpy operation. Note cjj includes apj. */
445: for (k=0;nextap<apnzj;k++) {
446: if (cjj[k]==apj[nextap]) {
447: caj[k] += (*pA)*apa[apj[nextap++]];
448: }
449: }
450: flops += 2*apnzj;
451: pA++;
452: }
454: /* Zero the current row info for A*P */
455: for (j=0;j<apnzj;j++) {
456: apa[apj[j]] = 0.;
457: apjdense[apj[j]] = 0;
458: }
459: }
461: /* Assemble the final matrix and clean up */
462: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
463: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
464: PetscFree(apa);
465: PetscLogFlops(flops);
466: PetscLogEventEnd(MATSeqAIJ_PtAPNumeric,A,P,C,0);
468: return(0);
469: }
470: EXTERN_C_END
472: int RegisterApplyPtAPRoutines_Private(Mat A) {
477: if (!MATSeqAIJ_PtAP) {
478: PetscLogEventRegister(&MATSeqAIJ_PtAP,"MatSeqAIJApplyPtAP",MAT_COOKIE);
479: }
481: if (!MATSeqAIJ_PtAPSymbolic) {
482: PetscLogEventRegister(&MATSeqAIJ_PtAPSymbolic,"MatSeqAIJApplyPtAPSymbolic",MAT_COOKIE);
483: }
484: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPSymbolic_seqaij_seqaij",
485: "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ",
486: MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ);
488: if (!MATSeqAIJ_PtAPNumeric) {
489: PetscLogEventRegister(&MATSeqAIJ_PtAPNumeric,"MatSeqAIJApplyPtAPNumeric",MAT_COOKIE);
490: }
491: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPNumeric_seqaij_seqaij",
492: "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ",
493: MatApplyPtAPNumeric_SeqAIJ_SeqAIJ);
494: return(0);
495: }