Actual source code: matmatmult.c
1: /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/
2: /*
3: Defines matrix-matrix product routines for pairs of SeqAIJ matrices
4: C = A * B
5: C = P * A * P^T
6: */
8: #include src/mat/impls/aij/seq/aij.h
9: #include src/mat/utils/freespace.h
11: static int logkey_matmatmult = 0;
12: static int logkey_matmatmult_symbolic = 0;
13: static int logkey_matmatmult_numeric = 0;
15: static int logkey_matapplypapt = 0;
16: static int logkey_matapplypapt_symbolic = 0;
17: static int logkey_matapplypapt_numeric = 0;
19: /*
20: MatMatMult_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
21: C = A * B;
23: Note: C is assumed to be uncreated.
24: If this is not the case, Destroy C before calling this routine.
25: */
26: int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C)
27: {
28: int ierr;
29: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
30: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
31: int aishift=a->indexshift,bishift=b->indexshift;
32: int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
33: int *ci,*cj,*denserow,*sparserow;
34: int an=A->N,am=A->M,bn=B->N,bm=B->M;
35: int i,j,k,anzi,brow,bnzj,cnzi;
36: MatScalar *ca;
39: /* some error checking which could be moved into interface layer */
40: if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
41: if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
42:
43: /* Set up timers */
44: if (!logkey_matmatmult_symbolic) {
45: PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);
46: }
47: PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);
49: /* Set up */
50: /* Allocate ci array, arrays for fill computation and */
51: /* free space for accumulating nonzero column info */
52: PetscMalloc(((am+1)+1)*sizeof(int),&ci);
53: ci[0] = 0;
55: PetscMalloc((2*bn+1)*sizeof(int),&denserow);
56: PetscMemzero(denserow,(2*bn+1)*sizeof(int));
57: sparserow = denserow + bn;
59: /* Initial FreeSpace size is nnz(B)=bi[bm] */
60: ierr = GetMoreSpace(bi[bm],&free_space);
61: current_space = free_space;
63: /* Determine symbolic info for each row of the product: */
64: for (i=0;i<am;i++) {
65: anzi = ai[i+1] - ai[i];
66: cnzi = 0;
67: for (j=0;j<anzi;j++) {
68: brow = *aj++;
69: bnzj = bi[brow+1] - bi[brow];
70: bjj = bj + bi[brow];
71: for (k=0;k<bnzj;k++) {
72: /* If column is not marked, mark it in compressed and uncompressed locations. */
73: /* For simplicity, leave uncompressed row unsorted until finished with row, */
74: /* and increment nonzero count for this row. */
75: if (!denserow[bjj[k]]) {
76: denserow[bjj[k]] = -1;
77: sparserow[cnzi++] = bjj[k];
78: }
79: }
80: }
82: /* sort sparserow */
83: PetscSortInt(cnzi,sparserow);
85: /* If free space is not available, make more free space */
86: /* Double the amount of total space in the list */
87: if (current_space->local_remaining<cnzi) {
88: GetMoreSpace(current_space->total_array_size,¤t_space);
89: }
91: /* Copy data into free space, and zero out denserow */
92: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));
93: current_space->array += cnzi;
94: current_space->local_used += cnzi;
95: current_space->local_remaining -= cnzi;
96: for (j=0;j<cnzi;j++) {
97: denserow[sparserow[j]] = 0;
98: }
99: ci[i+1] = ci[i] + cnzi;
100: }
102: /* Column indices are in the list of free space */
103: /* Allocate space for cj, initialize cj, and */
104: /* destroy list of free space and other temporary array(s) */
105: PetscMalloc((ci[am]+1)*sizeof(int),&cj);
106: MakeSpaceContiguous(&free_space,cj);
107: PetscFree(denserow);
108:
109: /* Allocate space for ca */
110: PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);
111: PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));
112:
113: /* put together the new matrix */
114: MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);
116: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
117: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
118: c = (Mat_SeqAIJ *)((*C)->data);
119: c->freedata = PETSC_TRUE;
120: c->nonew = 0;
122: PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);
123: return(0);
124: }
126: /*
127: MatMatMult_Numeric_SeqAIJ_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
128: C=A*B;
129: Note: C must have been created by calling MatMatMult_Symbolic_SeqAIJ_SeqAIJ.
130: */
131: int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
132: {
133: int ierr,flops=0;
134: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
135: Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
136: Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
137: int aishift=a->indexshift,bishift=b->indexshift,cishift=c->indexshift;
138: int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
139: int an=A->N,am=A->M,bn=B->N,bm=B->M,cn=C->N,cm=C->M;
140: int i,j,k,anzi,bnzi,cnzi,brow;
141: MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
145: /* This error checking should be unnecessary if the symbolic was performed */
146: if (aishift || bishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
147: if (am!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",am,cm);
148: if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
149: if (bn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",bn,cn);
151: /* Set up timers */
152: if (!logkey_matmatmult_numeric) {
153: PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);
154: }
155: PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);
157: /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
158: PetscMalloc((cn+1)*sizeof(MatScalar),&temp);
159: PetscMemzero(temp,cn*sizeof(MatScalar));
160: /* Traverse A row-wise. */
161: /* Build the ith row in C by summing over nonzero columns in A, */
162: /* the rows of B corresponding to nonzeros of A. */
163: for (i=0;i<am;i++) {
164: anzi = ai[i+1] - ai[i];
165: for (j=0;j<anzi;j++) {
166: brow = *aj++;
167: bnzi = bi[brow+1] - bi[brow];
168: bjj = bj + bi[brow];
169: baj = ba + bi[brow];
170: for (k=0;k<bnzi;k++) {
171: temp[bjj[k]] += (*aa)*baj[k];
172: }
173: flops += 2*bnzi;
174: aa++;
175: }
176: /* Store row back into C, and re-zero temp */
177: cnzi = ci[i+1] - ci[i];
178: for (j=0;j<cnzi;j++) {
179: ca[j] = temp[cj[j]];
180: temp[cj[j]] = 0.0;
181: }
182: ca += cnzi;
183: cj += cnzi;
184: }
185: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
186: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
187:
188: /* Free temp */
189: PetscFree(temp);
190: PetscLogFlops(flops);
191: PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);
192: return(0);
193: }
195: int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) {
199: if (!logkey_matmatmult) {
200: PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);
201: }
202: PetscLogEventBegin(logkey_matmatmult,A,B,0,0);
203: MatMatMult_Symbolic_SeqAIJ_SeqAIJ(A,B,C);
204: MatMatMult_Numeric_SeqAIJ_SeqAIJ(A,B,*C);
205: PetscLogEventEnd(logkey_matmatmult,A,B,0,0);
206: return(0);
207: }
210: /*
211: MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
212: C = P * A * P^T;
214: Note: C is assumed to be uncreated.
215: If this is not the case, Destroy C before calling this routine.
216: */
217: int MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
218: /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */
219: /* and MatMatMult_SeqAIJ_SeqAIJ_Symbolic. Perhaps they could be merged nicely. */
220: int ierr;
221: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
222: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
223: int aishift=a->indexshift,pishift=p->indexshift;
224: int *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj;
225: int *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow;
226: int an=A->N,am=A->M,pn=P->N,pm=P->M;
227: int i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi;
228: MatScalar *ca;
232: /* some error checking which could be moved into interface layer */
233: if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
234: if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am);
235: if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);
237: /* Set up timers */
238: if (!logkey_matapplypapt_symbolic) {
239: PetscLogEventRegister(&logkey_matapplypapt_symbolic,"MatApplyPAPt_Symbolic",MAT_COOKIE);
240: }
241: PetscLogEventBegin(logkey_matapplypapt_symbolic,A,P,0,0);
243: /* Create ij structure of P^T */
244: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
246: /* Allocate ci array, arrays for fill computation and */
247: /* free space for accumulating nonzero column info */
248: PetscMalloc(((pm+1)*1)*sizeof(int),&ci);
249: ci[0] = 0;
251: PetscMalloc((2*an+2*pm+1)*sizeof(int),&padenserow);
252: PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(int));
253: pasparserow = padenserow + an;
254: denserow = pasparserow + an;
255: sparserow = denserow + pm;
257: /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */
258: /* This should be reasonable if sparsity of PAPt is similar to that of A. */
259: ierr = GetMoreSpace((ai[am]/pn)*pm,&free_space);
260: current_space = free_space;
262: /* Determine fill for each row of C: */
263: for (i=0;i<pm;i++) {
264: pnzi = pi[i+1] - pi[i];
265: panzi = 0;
266: /* Get symbolic sparse row of PA: */
267: for (j=0;j<pnzi;j++) {
268: arow = *pj++;
269: anzj = ai[arow+1] - ai[arow];
270: ajj = aj + ai[arow];
271: for (k=0;k<anzj;k++) {
272: if (!padenserow[ajj[k]]) {
273: padenserow[ajj[k]] = -1;
274: pasparserow[panzi++] = ajj[k];
275: }
276: }
277: }
278: /* Using symbolic row of PA, determine symbolic row of C: */
279: paj = pasparserow;
280: cnzi = 0;
281: for (j=0;j<panzi;j++) {
282: ptrow = *paj++;
283: ptnzj = pti[ptrow+1] - pti[ptrow];
284: ptjj = ptj + pti[ptrow];
285: for (k=0;k<ptnzj;k++) {
286: if (!denserow[ptjj[k]]) {
287: denserow[ptjj[k]] = -1;
288: sparserow[cnzi++] = ptjj[k];
289: }
290: }
291: }
293: /* sort sparse representation */
294: PetscSortInt(cnzi,sparserow);
296: /* If free space is not available, make more free space */
297: /* Double the amount of total space in the list */
298: if (current_space->local_remaining<cnzi) {
299: GetMoreSpace(current_space->total_array_size,¤t_space);
300: }
302: /* Copy data into free space, and zero out dense row */
303: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));
304: current_space->array += cnzi;
305: current_space->local_used += cnzi;
306: current_space->local_remaining -= cnzi;
308: for (j=0;j<panzi;j++) {
309: padenserow[pasparserow[j]] = 0;
310: }
311: for (j=0;j<cnzi;j++) {
312: denserow[sparserow[j]] = 0;
313: }
314: ci[i+1] = ci[i] + cnzi;
315: }
316: /* column indices are in the list of free space */
317: /* Allocate space for cj, initialize cj, and */
318: /* destroy list of free space and other temporary array(s) */
319: PetscMalloc((ci[pm]+1)*sizeof(int),&cj);
320: MakeSpaceContiguous(&free_space,cj);
321: PetscFree(padenserow);
322:
323: /* Allocate space for ca */
324: PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);
325: PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));
326:
327: /* put together the new matrix */
328: MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);
330: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
331: /* Since these are PETSc arrays, change flags to free them as necessary. */
332: c = (Mat_SeqAIJ *)((*C)->data);
333: c->freedata = PETSC_TRUE;
334: c->nonew = 0;
336: /* Clean up. */
337: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
339: PetscLogEventEnd(logkey_matapplypapt_symbolic,A,P,0,0);
340: return(0);
341: }
343: /*
344: MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
345: C = P * A * P^T;
346: Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ.
347: */
348: int MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) {
349: int ierr,flops=0;
350: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
351: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
352: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
353: int aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift;
354: int *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj;
355: int *ci=c->i,*cj=c->j;
356: int an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M;
357: int i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi;
358: MatScalar *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum;
362: /* This error checking should be unnecessary if the symbolic was performed */
363: if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
364: if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,cm);
365: if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am);
366: if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);
367: if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm, cn);
369: /* Set up timers */
370: if (!logkey_matapplypapt_numeric) {
371: PetscLogEventRegister(&logkey_matapplypapt_numeric,"MatApplyPAPt_Numeric",MAT_COOKIE);
372: }
373: PetscLogEventBegin(logkey_matapplypapt_numeric,A,P,C,0);
375: PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(int)),&paa);
376: PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(int)));
377: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
379: paj = (int *)(paa + an);
380: pajdense = paj + an;
382: for (i=0;i<pm;i++) {
383: /* Form sparse row of P*A */
384: pnzi = pi[i+1] - pi[i];
385: panzj = 0;
386: for (j=0;j<pnzi;j++) {
387: arow = *pj++;
388: anzj = ai[arow+1] - ai[arow];
389: ajj = aj + ai[arow];
390: aaj = aa + ai[arow];
391: for (k=0;k<anzj;k++) {
392: if (!pajdense[ajj[k]]) {
393: pajdense[ajj[k]] = -1;
394: paj[panzj++] = ajj[k];
395: }
396: paa[ajj[k]] += (*pa)*aaj[k];
397: }
398: flops += 2*anzj;
399: pa++;
400: }
402: /* Sort the j index array for quick sparse axpy. */
403: PetscSortInt(panzj,paj);
405: /* Compute P*A*P^T using sparse inner products. */
406: /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */
407: cnzi = ci[i+1] - ci[i];
408: for (j=0;j<cnzi;j++) {
409: /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */
410: ptcol = *cj++;
411: ptnzj = pi[ptcol+1] - pi[ptcol];
412: ptj = pjj + pi[ptcol];
413: ptaj = pta + pi[ptcol];
414: sum = 0.;
415: k1 = 0;
416: k2 = 0;
417: while ((k1<panzj) && (k2<ptnzj)) {
418: if (paj[k1]==ptj[k2]) {
419: sum += paa[paj[k1++]]*ptaj[k2++];
420: } else if (paj[k1] < ptj[k2]) {
421: k1++;
422: } else /* if (paj[k1] > ptj[k2]) */ {
423: k2++;
424: }
425: }
426: *ca++ = sum;
427: }
429: /* Zero the current row info for P*A */
430: for (j=0;j<panzj;j++) {
431: paa[paj[j]] = 0.;
432: pajdense[paj[j]] = 0;
433: }
434: }
436: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
437: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
438: PetscLogFlops(flops);
439: PetscLogEventEnd(logkey_matapplypapt_numeric,A,P,C,0);
440: return(0);
441: }
442:
443: int MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
447: if (!logkey_matapplypapt) {
448: PetscLogEventRegister(&logkey_matapplypapt,"MatApplyPAPt",MAT_COOKIE);
449: }
450: PetscLogEventBegin(logkey_matapplypapt,A,P,0,0);
451: MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);
452: MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);
453: PetscLogEventEnd(logkey_matapplypapt,A,P,0,0);
454: return(0);
455: }