Actual source code: fdda.c
2: #include <petsc/private/dmdaimpl.h>
3: #include <petscmat.h>
4: #include <petscbt.h>
6: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
7: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
8: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
9: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
11: /*
12: For ghost i that may be negative or greater than the upper bound this
13: maps it into the 0:m-1 range using periodicity
14: */
15: #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
17: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
18: {
19: PetscInt i,j,nz,*fill;
21: if (!dfill) return 0;
23: /* count number nonzeros */
24: nz = 0;
25: for (i=0; i<w; i++) {
26: for (j=0; j<w; j++) {
27: if (dfill[w*i+j]) nz++;
28: }
29: }
30: PetscMalloc1(nz + w + 1,&fill);
31: /* construct modified CSR storage of nonzero structure */
32: /* fill[0 -- w] marks starts of each row of column indices (and end of last row)
33: so fill[1] - fill[0] gives number of nonzeros in first row etc */
34: nz = w + 1;
35: for (i=0; i<w; i++) {
36: fill[i] = nz;
37: for (j=0; j<w; j++) {
38: if (dfill[w*i+j]) {
39: fill[nz] = j;
40: nz++;
41: }
42: }
43: }
44: fill[w] = nz;
46: *rfill = fill;
47: return 0;
48: }
50: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
51: {
52: PetscInt nz;
54: if (!dfillsparse) return 0;
56: /* Determine number of non-zeros */
57: nz = (dfillsparse[w] - w - 1);
59: /* Allocate space for our copy of the given sparse matrix representation. */
60: PetscMalloc1(nz + w + 1,rfill);
61: PetscArraycpy(*rfill,dfillsparse,nz+w+1);
62: return 0;
63: }
65: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
66: {
67: PetscInt i,k,cnt = 1;
70: /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
71: columns to the left with any nonzeros in them plus 1 */
72: PetscCalloc1(dd->w,&dd->ofillcols);
73: for (i=0; i<dd->w; i++) {
74: for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
75: }
76: for (i=0; i<dd->w; i++) {
77: if (dd->ofillcols[i]) {
78: dd->ofillcols[i] = cnt++;
79: }
80: }
81: return 0;
82: }
84: /*@
85: DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
86: of the matrix returned by DMCreateMatrix().
88: Logically Collective on da
90: Input Parameters:
91: + da - the distributed array
92: . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
93: - ofill - the fill pattern in the off-diagonal blocks
95: Level: developer
97: Notes:
98: This only makes sense when you are doing multicomponent problems but using the
99: MPIAIJ matrix format
101: The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
102: representing coupling and 0 entries for missing coupling. For example
103: $ dfill[9] = {1, 0, 0,
104: $ 1, 1, 0,
105: $ 0, 1, 1}
106: means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
107: itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
108: diagonal block).
110: DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
111: can be represented in the dfill, ofill format
113: Contributed by Glenn Hammond
115: .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
117: @*/
118: PetscErrorCode DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
119: {
120: DM_DA *dd = (DM_DA*)da->data;
122: /* save the given dfill and ofill information */
123: DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);
124: DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);
126: /* count nonzeros in ofill columns */
127: DMDASetBlockFills_Private2(dd);
129: return 0;
130: }
132: /*@
133: DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
134: of the matrix returned by DMCreateMatrix(), using sparse representations
135: of fill patterns.
137: Logically Collective on da
139: Input Parameters:
140: + da - the distributed array
141: . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
142: - ofill - the sparse fill pattern in the off-diagonal blocks
144: Level: developer
146: Notes: This only makes sense when you are doing multicomponent problems but using the
147: MPIAIJ matrix format
149: The format for dfill and ofill is a sparse representation of a
150: dof-by-dof matrix with 1 entries representing coupling and 0 entries
151: for missing coupling. The sparse representation is a 1 dimensional
152: array of length nz + dof + 1, where nz is the number of non-zeros in
153: the matrix. The first dof entries in the array give the
154: starting array indices of each row's items in the rest of the array,
155: the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
156: and the remaining nz items give the column indices of each of
157: the 1s within the logical 2D matrix. Each row's items within
158: the array are the column indices of the 1s within that row
159: of the 2D matrix. PETSc developers may recognize that this is the
160: same format as that computed by the DMDASetBlockFills_Private()
161: function from a dense 2D matrix representation.
163: DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
164: can be represented in the dfill, ofill format
166: Contributed by Philip C. Roth
168: .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
170: @*/
171: PetscErrorCode DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
172: {
173: DM_DA *dd = (DM_DA*)da->data;
175: /* save the given dfill and ofill information */
176: DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);
177: DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);
179: /* count nonzeros in ofill columns */
180: DMDASetBlockFills_Private2(dd);
182: return 0;
183: }
185: PetscErrorCode DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
186: {
187: PetscInt dim,m,n,p,nc;
188: DMBoundaryType bx,by,bz;
189: MPI_Comm comm;
190: PetscMPIInt size;
191: PetscBool isBAIJ;
192: DM_DA *dd = (DM_DA*)da->data;
194: /*
195: m
196: ------------------------------------------------------
197: | |
198: | |
199: | ---------------------- |
200: | | | |
201: n | yn | | |
202: | | | |
203: | .--------------------- |
204: | (xs,ys) xn |
205: | . |
206: | (gxs,gys) |
207: | |
208: -----------------------------------------------------
209: */
211: /*
212: nc - number of components per grid point
213: col - number of colors needed in one direction for single component problem
215: */
216: DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);
218: PetscObjectGetComm((PetscObject)da,&comm);
219: MPI_Comm_size(comm,&size);
220: if (ctype == IS_COLORING_LOCAL) {
221: if (size == 1) {
222: ctype = IS_COLORING_GLOBAL;
223: } else if (dim > 1) {
224: if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
225: SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
226: }
227: }
228: }
230: /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
231: matrices is for the blocks, not the individual matrix elements */
232: PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);
233: if (!isBAIJ) PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);
234: if (!isBAIJ) PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);
235: if (isBAIJ) {
236: dd->w = 1;
237: dd->xs = dd->xs/nc;
238: dd->xe = dd->xe/nc;
239: dd->Xs = dd->Xs/nc;
240: dd->Xe = dd->Xe/nc;
241: }
243: /*
244: We do not provide a getcoloring function in the DMDA operations because
245: the basic DMDA does not know about matrices. We think of DMDA as being
246: more low-level then matrices.
247: */
248: if (dim == 1) {
249: DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);
250: } else if (dim == 2) {
251: DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);
252: } else if (dim == 3) {
253: DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);
254: } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
255: if (isBAIJ) {
256: dd->w = nc;
257: dd->xs = dd->xs*nc;
258: dd->xe = dd->xe*nc;
259: dd->Xs = dd->Xs*nc;
260: dd->Xe = dd->Xe*nc;
261: }
262: return 0;
263: }
265: /* ---------------------------------------------------------------------------------*/
267: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
268: {
269: PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
270: PetscInt ncolors;
271: MPI_Comm comm;
272: DMBoundaryType bx,by;
273: DMDAStencilType st;
274: ISColoringValue *colors;
275: DM_DA *dd = (DM_DA*)da->data;
277: /*
278: nc - number of components per grid point
279: col - number of colors needed in one direction for single component problem
281: */
282: DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);
283: col = 2*s + 1;
284: DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
285: DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
286: PetscObjectGetComm((PetscObject)da,&comm);
288: /* special case as taught to us by Paul Hovland */
289: if (st == DMDA_STENCIL_STAR && s == 1) {
290: DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);
291: } else {
292: if (ctype == IS_COLORING_GLOBAL) {
293: if (!dd->localcoloring) {
294: PetscMalloc1(nc*nx*ny,&colors);
295: ii = 0;
296: for (j=ys; j<ys+ny; j++) {
297: for (i=xs; i<xs+nx; i++) {
298: for (k=0; k<nc; k++) {
299: colors[ii++] = k + nc*((i % col) + col*(j % col));
300: }
301: }
302: }
303: ncolors = nc + nc*(col-1 + col*(col-1));
304: ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);
305: }
306: *coloring = dd->localcoloring;
307: } else if (ctype == IS_COLORING_LOCAL) {
308: if (!dd->ghostedcoloring) {
309: PetscMalloc1(nc*gnx*gny,&colors);
310: ii = 0;
311: for (j=gys; j<gys+gny; j++) {
312: for (i=gxs; i<gxs+gnx; i++) {
313: for (k=0; k<nc; k++) {
314: /* the complicated stuff is to handle periodic boundaries */
315: colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
316: }
317: }
318: }
319: ncolors = nc + nc*(col - 1 + col*(col-1));
320: ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
321: /* PetscIntView(ncolors,(PetscInt*)colors,0); */
323: ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
324: }
325: *coloring = dd->ghostedcoloring;
326: } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
327: }
328: ISColoringReference(*coloring);
329: return 0;
330: }
332: /* ---------------------------------------------------------------------------------*/
334: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
335: {
336: PetscInt xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
337: PetscInt ncolors;
338: MPI_Comm comm;
339: DMBoundaryType bx,by,bz;
340: DMDAStencilType st;
341: ISColoringValue *colors;
342: DM_DA *dd = (DM_DA*)da->data;
344: /*
345: nc - number of components per grid point
346: col - number of colors needed in one direction for single component problem
348: */
349: DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
350: col = 2*s + 1;
351: DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
352: DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
353: PetscObjectGetComm((PetscObject)da,&comm);
355: /* create the coloring */
356: if (ctype == IS_COLORING_GLOBAL) {
357: if (!dd->localcoloring) {
358: PetscMalloc1(nc*nx*ny*nz,&colors);
359: ii = 0;
360: for (k=zs; k<zs+nz; k++) {
361: for (j=ys; j<ys+ny; j++) {
362: for (i=xs; i<xs+nx; i++) {
363: for (l=0; l<nc; l++) {
364: colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
365: }
366: }
367: }
368: }
369: ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
370: ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);
371: }
372: *coloring = dd->localcoloring;
373: } else if (ctype == IS_COLORING_LOCAL) {
374: if (!dd->ghostedcoloring) {
375: PetscMalloc1(nc*gnx*gny*gnz,&colors);
376: ii = 0;
377: for (k=gzs; k<gzs+gnz; k++) {
378: for (j=gys; j<gys+gny; j++) {
379: for (i=gxs; i<gxs+gnx; i++) {
380: for (l=0; l<nc; l++) {
381: /* the complicated stuff is to handle periodic boundaries */
382: colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
383: }
384: }
385: }
386: }
387: ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
388: ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
389: ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
390: }
391: *coloring = dd->ghostedcoloring;
392: } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
393: ISColoringReference(*coloring);
394: return 0;
395: }
397: /* ---------------------------------------------------------------------------------*/
399: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
400: {
401: PetscInt xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
402: PetscInt ncolors;
403: MPI_Comm comm;
404: DMBoundaryType bx;
405: ISColoringValue *colors;
406: DM_DA *dd = (DM_DA*)da->data;
408: /*
409: nc - number of components per grid point
410: col - number of colors needed in one direction for single component problem
412: */
413: DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);
414: col = 2*s + 1;
415: DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);
416: DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);
417: PetscObjectGetComm((PetscObject)da,&comm);
419: /* create the coloring */
420: if (ctype == IS_COLORING_GLOBAL) {
421: if (!dd->localcoloring) {
422: PetscMalloc1(nc*nx,&colors);
423: if (dd->ofillcols) {
424: PetscInt tc = 0;
425: for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
426: i1 = 0;
427: for (i=xs; i<xs+nx; i++) {
428: for (l=0; l<nc; l++) {
429: if (dd->ofillcols[l] && (i % col)) {
430: colors[i1++] = nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
431: } else {
432: colors[i1++] = l;
433: }
434: }
435: }
436: ncolors = nc + 2*s*tc;
437: } else {
438: i1 = 0;
439: for (i=xs; i<xs+nx; i++) {
440: for (l=0; l<nc; l++) {
441: colors[i1++] = l + nc*(i % col);
442: }
443: }
444: ncolors = nc + nc*(col-1);
445: }
446: ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);
447: }
448: *coloring = dd->localcoloring;
449: } else if (ctype == IS_COLORING_LOCAL) {
450: if (!dd->ghostedcoloring) {
451: PetscMalloc1(nc*gnx,&colors);
452: i1 = 0;
453: for (i=gxs; i<gxs+gnx; i++) {
454: for (l=0; l<nc; l++) {
455: /* the complicated stuff is to handle periodic boundaries */
456: colors[i1++] = l + nc*(SetInRange(i,m) % col);
457: }
458: }
459: ncolors = nc + nc*(col-1);
460: ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
461: ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
462: }
463: *coloring = dd->ghostedcoloring;
464: } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
465: ISColoringReference(*coloring);
466: return 0;
467: }
469: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
470: {
471: PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
472: PetscInt ncolors;
473: MPI_Comm comm;
474: DMBoundaryType bx,by;
475: ISColoringValue *colors;
476: DM_DA *dd = (DM_DA*)da->data;
478: /*
479: nc - number of components per grid point
480: col - number of colors needed in one direction for single component problem
482: */
483: DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);
484: DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
485: DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
486: PetscObjectGetComm((PetscObject)da,&comm);
487: /* create the coloring */
488: if (ctype == IS_COLORING_GLOBAL) {
489: if (!dd->localcoloring) {
490: PetscMalloc1(nc*nx*ny,&colors);
491: ii = 0;
492: for (j=ys; j<ys+ny; j++) {
493: for (i=xs; i<xs+nx; i++) {
494: for (k=0; k<nc; k++) {
495: colors[ii++] = k + nc*((3*j+i) % 5);
496: }
497: }
498: }
499: ncolors = 5*nc;
500: ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);
501: }
502: *coloring = dd->localcoloring;
503: } else if (ctype == IS_COLORING_LOCAL) {
504: if (!dd->ghostedcoloring) {
505: PetscMalloc1(nc*gnx*gny,&colors);
506: ii = 0;
507: for (j=gys; j<gys+gny; j++) {
508: for (i=gxs; i<gxs+gnx; i++) {
509: for (k=0; k<nc; k++) {
510: colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
511: }
512: }
513: }
514: ncolors = 5*nc;
515: ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
516: ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
517: }
518: *coloring = dd->ghostedcoloring;
519: } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
520: return 0;
521: }
523: /* =========================================================================== */
524: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
525: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
526: extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat);
527: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
528: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
529: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
530: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
531: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
532: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
533: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
534: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
535: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
536: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
537: extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
539: /*@C
540: MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
542: Logically Collective on mat
544: Input Parameters:
545: + mat - the matrix
546: - da - the da
548: Level: intermediate
550: @*/
551: PetscErrorCode MatSetupDM(Mat mat,DM da)
552: {
555: PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));
556: return 0;
557: }
559: PetscErrorCode MatView_MPI_DA(Mat A,PetscViewer viewer)
560: {
561: DM da;
562: const char *prefix;
563: Mat Anatural;
564: AO ao;
565: PetscInt rstart,rend,*petsc,i;
566: IS is;
567: MPI_Comm comm;
568: PetscViewerFormat format;
570: /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
571: PetscViewerGetFormat(viewer,&format);
572: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) return 0;
574: PetscObjectGetComm((PetscObject)A,&comm);
575: MatGetDM(A, &da);
578: DMDAGetAO(da,&ao);
579: MatGetOwnershipRange(A,&rstart,&rend);
580: PetscMalloc1(rend-rstart,&petsc);
581: for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
582: AOApplicationToPetsc(ao,rend-rstart,petsc);
583: ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);
585: /* call viewer on natural ordering */
586: MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);
587: ISDestroy(&is);
588: PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);
589: PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);
590: PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);
591: ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
592: MatView(Anatural,viewer);
593: ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
594: MatDestroy(&Anatural);
595: return 0;
596: }
598: PetscErrorCode MatLoad_MPI_DA(Mat A,PetscViewer viewer)
599: {
600: DM da;
601: Mat Anatural,Aapp;
602: AO ao;
603: PetscInt rstart,rend,*app,i,m,n,M,N;
604: IS is;
605: MPI_Comm comm;
607: PetscObjectGetComm((PetscObject)A,&comm);
608: MatGetDM(A, &da);
611: /* Load the matrix in natural ordering */
612: MatCreate(PetscObjectComm((PetscObject)A),&Anatural);
613: MatSetType(Anatural,((PetscObject)A)->type_name);
614: MatGetSize(A,&M,&N);
615: MatGetLocalSize(A,&m,&n);
616: MatSetSizes(Anatural,m,n,M,N);
617: MatLoad(Anatural,viewer);
619: /* Map natural ordering to application ordering and create IS */
620: DMDAGetAO(da,&ao);
621: MatGetOwnershipRange(Anatural,&rstart,&rend);
622: PetscMalloc1(rend-rstart,&app);
623: for (i=rstart; i<rend; i++) app[i-rstart] = i;
624: AOPetscToApplication(ao,rend-rstart,app);
625: ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);
627: /* Do permutation and replace header */
628: MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);
629: MatHeaderReplace(A,&Aapp);
630: ISDestroy(&is);
631: MatDestroy(&Anatural);
632: return 0;
633: }
635: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
636: {
637: PetscInt dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
638: Mat A;
639: MPI_Comm comm;
640: MatType Atype;
641: void (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
642: MatType mtype;
643: PetscMPIInt size;
644: DM_DA *dd = (DM_DA*)da->data;
646: MatInitializePackage();
647: mtype = da->mattype;
649: /*
650: m
651: ------------------------------------------------------
652: | |
653: | |
654: | ---------------------- |
655: | | | |
656: n | ny | | |
657: | | | |
658: | .--------------------- |
659: | (xs,ys) nx |
660: | . |
661: | (gxs,gys) |
662: | |
663: -----------------------------------------------------
664: */
666: /*
667: nc - number of components per grid point
668: col - number of colors needed in one direction for single component problem
670: */
671: M = dd->M;
672: N = dd->N;
673: P = dd->P;
674: dim = da->dim;
675: dof = dd->w;
676: /* DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL); */
677: DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);
678: PetscObjectGetComm((PetscObject)da,&comm);
679: MatCreate(comm,&A);
680: MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);
681: MatSetType(A,mtype);
682: MatSetFromOptions(A);
683: if (dof*nx*ny*nz < da->bind_below) {
684: MatSetBindingPropagates(A,PETSC_TRUE);
685: MatBindToCPU(A,PETSC_TRUE);
686: }
687: MatSetDM(A,da);
688: if (da->structure_only) {
689: MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);
690: }
691: MatGetType(A,&Atype);
692: /*
693: We do not provide a getmatrix function in the DMDA operations because
694: the basic DMDA does not know about matrices. We think of DMDA as being more
695: more low-level than matrices. This is kind of cheating but, cause sometimes
696: we think of DMDA has higher level than matrices.
698: We could switch based on Atype (or mtype), but we do not since the
699: specialized setting routines depend only on the particular preallocation
700: details of the matrix, not the type itself.
701: */
702: PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);
703: if (!aij) {
704: PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);
705: }
706: if (!aij) {
707: PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);
708: if (!baij) {
709: PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);
710: }
711: if (!baij) {
712: PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);
713: if (!sbaij) {
714: PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);
715: }
716: if (!sbaij) {
717: PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);
718: if (!sell) {
719: PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);
720: }
721: }
722: if (!sell) {
723: PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);
724: }
725: }
726: }
727: if (aij) {
728: if (dim == 1) {
729: if (dd->ofill) {
730: DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);
731: } else {
732: DMBoundaryType bx;
733: PetscMPIInt size;
734: DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);
735: MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);
736: if (size == 1 && bx == DM_BOUNDARY_NONE) {
737: DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A);
738: } else {
739: DMCreateMatrix_DA_1d_MPIAIJ(da,A);
740: }
741: }
742: } else if (dim == 2) {
743: if (dd->ofill) {
744: DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);
745: } else {
746: DMCreateMatrix_DA_2d_MPIAIJ(da,A);
747: }
748: } else if (dim == 3) {
749: if (dd->ofill) {
750: DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);
751: } else {
752: DMCreateMatrix_DA_3d_MPIAIJ(da,A);
753: }
754: }
755: } else if (baij) {
756: if (dim == 2) {
757: DMCreateMatrix_DA_2d_MPIBAIJ(da,A);
758: } else if (dim == 3) {
759: DMCreateMatrix_DA_3d_MPIBAIJ(da,A);
760: } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
761: } else if (sbaij) {
762: if (dim == 2) {
763: DMCreateMatrix_DA_2d_MPISBAIJ(da,A);
764: } else if (dim == 3) {
765: DMCreateMatrix_DA_3d_MPISBAIJ(da,A);
766: } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
767: } else if (sell) {
768: if (dim == 2) {
769: DMCreateMatrix_DA_2d_MPISELL(da,A);
770: } else if (dim == 3) {
771: DMCreateMatrix_DA_3d_MPISELL(da,A);
772: } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
773: } else if (is) {
774: DMCreateMatrix_DA_IS(da,A);
775: } else {
776: ISLocalToGlobalMapping ltog;
778: MatSetBlockSize(A,dof);
779: MatSetUp(A);
780: DMGetLocalToGlobalMapping(da,<og);
781: MatSetLocalToGlobalMapping(A,ltog,ltog);
782: }
783: DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);
784: MatSetStencil(A,dim,dims,starts,dof);
785: MatSetDM(A,da);
786: MPI_Comm_size(comm,&size);
787: if (size > 1) {
788: /* change viewer to display matrix in natural ordering */
789: MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);
790: MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);
791: }
792: *J = A;
793: return 0;
794: }
796: /* ---------------------------------------------------------------------------------*/
797: PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
799: PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
800: {
801: DM_DA *da = (DM_DA*)dm->data;
802: Mat lJ,P;
803: ISLocalToGlobalMapping ltog;
804: IS is;
805: PetscBT bt;
806: const PetscInt *e_loc,*idx;
807: PetscInt i,nel,nen,nv,dof,*gidx,n,N;
809: /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
810: We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
811: dof = da->w;
812: MatSetBlockSize(J,dof);
813: DMGetLocalToGlobalMapping(dm,<og);
815: /* flag local elements indices in local DMDA numbering */
816: ISLocalToGlobalMappingGetSize(ltog,&nv);
817: PetscBTCreate(nv/dof,&bt);
818: DMDAGetElements(dm,&nel,&nen,&e_loc); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
819: for (i=0;i<nel*nen;i++) PetscBTSet(bt,e_loc[i]);
821: /* filter out (set to -1) the global indices not used by the local elements */
822: PetscMalloc1(nv/dof,&gidx);
823: ISLocalToGlobalMappingGetBlockIndices(ltog,&idx);
824: PetscArraycpy(gidx,idx,nv/dof);
825: ISLocalToGlobalMappingRestoreBlockIndices(ltog,&idx);
826: for (i=0;i<nv/dof;i++) if (!PetscBTLookup(bt,i)) gidx[i] = -1;
827: PetscBTDestroy(&bt);
828: ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nv/dof,gidx,PETSC_OWN_POINTER,&is);
829: ISLocalToGlobalMappingCreateIS(is,<og);
830: MatSetLocalToGlobalMapping(J,ltog,ltog);
831: ISLocalToGlobalMappingDestroy(<og);
832: ISDestroy(&is);
834: /* Preallocation */
835: if (dm->prealloc_skip) {
836: MatSetUp(J);
837: } else {
838: MatISGetLocalMat(J,&lJ);
839: MatGetLocalToGlobalMapping(lJ,<og,NULL);
840: MatCreate(PetscObjectComm((PetscObject)lJ),&P);
841: MatSetType(P,MATPREALLOCATOR);
842: MatSetLocalToGlobalMapping(P,ltog,ltog);
843: MatGetSize(lJ,&N,NULL);
844: MatGetLocalSize(lJ,&n,NULL);
845: MatSetSizes(P,n,n,N,N);
846: MatSetBlockSize(P,dof);
847: MatSetUp(P);
848: for (i=0;i<nel;i++) {
849: MatSetValuesBlockedLocal(P,nen,e_loc+i*nen,nen,e_loc+i*nen,NULL,INSERT_VALUES);
850: }
851: MatPreallocatorPreallocate(P,(PetscBool)!da->prealloc_only,lJ);
852: MatISRestoreLocalMat(J,&lJ);
853: DMDARestoreElements(dm,&nel,&nen,&e_loc);
854: MatDestroy(&P);
856: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
857: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
858: }
859: return 0;
860: }
862: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
863: {
864: PetscErrorCode ierr;
865: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
866: PetscInt lstart,lend,pstart,pend,*dnz,*onz;
867: MPI_Comm comm;
868: PetscScalar *values;
869: DMBoundaryType bx,by;
870: ISLocalToGlobalMapping ltog;
871: DMDAStencilType st;
873: /*
874: nc - number of components per grid point
875: col - number of colors needed in one direction for single component problem
877: */
878: DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);
879: col = 2*s + 1;
880: DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
881: DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
882: PetscObjectGetComm((PetscObject)da,&comm);
884: PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);
885: DMGetLocalToGlobalMapping(da,<og);
887: MatSetBlockSize(J,nc);
888: /* determine the matrix preallocation information */
889: MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
890: for (i=xs; i<xs+nx; i++) {
892: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
893: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
895: for (j=ys; j<ys+ny; j++) {
896: slot = i - gxs + gnx*(j - gys);
898: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
899: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
901: cnt = 0;
902: for (k=0; k<nc; k++) {
903: for (l=lstart; l<lend+1; l++) {
904: for (p=pstart; p<pend+1; p++) {
905: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
906: cols[cnt++] = k + nc*(slot + gnx*l + p);
907: }
908: }
909: }
910: rows[k] = k + nc*(slot);
911: }
912: MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
913: }
914: }
915: MatSetBlockSize(J,nc);
916: MatSeqSELLSetPreallocation(J,0,dnz);
917: MatMPISELLSetPreallocation(J,0,dnz,0,onz);
918: MatPreallocateFinalize(dnz,onz);
920: MatSetLocalToGlobalMapping(J,ltog,ltog);
922: /*
923: For each node in the grid: we get the neighbors in the local (on processor ordering
924: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
925: PETSc ordering.
926: */
927: if (!da->prealloc_only) {
928: PetscCalloc1(col*col*nc*nc,&values);
929: for (i=xs; i<xs+nx; i++) {
931: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
932: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
934: for (j=ys; j<ys+ny; j++) {
935: slot = i - gxs + gnx*(j - gys);
937: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
938: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
940: cnt = 0;
941: for (k=0; k<nc; k++) {
942: for (l=lstart; l<lend+1; l++) {
943: for (p=pstart; p<pend+1; p++) {
944: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
945: cols[cnt++] = k + nc*(slot + gnx*l + p);
946: }
947: }
948: }
949: rows[k] = k + nc*(slot);
950: }
951: MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);
952: }
953: }
954: PetscFree(values);
955: /* do not copy values to GPU since they are all zero and not yet needed there */
956: MatBindToCPU(J,PETSC_TRUE);
957: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
958: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
959: MatBindToCPU(J,PETSC_FALSE);
960: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
961: }
962: PetscFree2(rows,cols);
963: return 0;
964: }
966: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
967: {
968: PetscErrorCode ierr;
969: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
970: PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
971: PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
972: MPI_Comm comm;
973: PetscScalar *values;
974: DMBoundaryType bx,by,bz;
975: ISLocalToGlobalMapping ltog;
976: DMDAStencilType st;
978: /*
979: nc - number of components per grid point
980: col - number of colors needed in one direction for single component problem
982: */
983: DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
984: col = 2*s + 1;
985: DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
986: DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
987: PetscObjectGetComm((PetscObject)da,&comm);
989: PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);
990: DMGetLocalToGlobalMapping(da,<og);
992: MatSetBlockSize(J,nc);
993: /* determine the matrix preallocation information */
994: MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
995: for (i=xs; i<xs+nx; i++) {
996: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
997: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
998: for (j=ys; j<ys+ny; j++) {
999: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1000: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1001: for (k=zs; k<zs+nz; k++) {
1002: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1003: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
1005: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1007: cnt = 0;
1008: for (l=0; l<nc; l++) {
1009: for (ii=istart; ii<iend+1; ii++) {
1010: for (jj=jstart; jj<jend+1; jj++) {
1011: for (kk=kstart; kk<kend+1; kk++) {
1012: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1013: cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1014: }
1015: }
1016: }
1017: }
1018: rows[l] = l + nc*(slot);
1019: }
1020: MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1021: }
1022: }
1023: }
1024: MatSetBlockSize(J,nc);
1025: MatSeqSELLSetPreallocation(J,0,dnz);
1026: MatMPISELLSetPreallocation(J,0,dnz,0,onz);
1027: MatPreallocateFinalize(dnz,onz);
1028: MatSetLocalToGlobalMapping(J,ltog,ltog);
1030: /*
1031: For each node in the grid: we get the neighbors in the local (on processor ordering
1032: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1033: PETSc ordering.
1034: */
1035: if (!da->prealloc_only) {
1036: PetscCalloc1(col*col*col*nc*nc*nc,&values);
1037: for (i=xs; i<xs+nx; i++) {
1038: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1039: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1040: for (j=ys; j<ys+ny; j++) {
1041: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1042: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1043: for (k=zs; k<zs+nz; k++) {
1044: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1045: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
1047: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1049: cnt = 0;
1050: for (l=0; l<nc; l++) {
1051: for (ii=istart; ii<iend+1; ii++) {
1052: for (jj=jstart; jj<jend+1; jj++) {
1053: for (kk=kstart; kk<kend+1; kk++) {
1054: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1055: cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1056: }
1057: }
1058: }
1059: }
1060: rows[l] = l + nc*(slot);
1061: }
1062: MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);
1063: }
1064: }
1065: }
1066: PetscFree(values);
1067: /* do not copy values to GPU since they are all zero and not yet needed there */
1068: MatBindToCPU(J,PETSC_TRUE);
1069: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1070: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1071: MatBindToCPU(J,PETSC_FALSE);
1072: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1073: }
1074: PetscFree2(rows,cols);
1075: return 0;
1076: }
1078: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
1079: {
1080: PetscErrorCode ierr;
1081: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1082: PetscInt lstart,lend,pstart,pend,*dnz,*onz;
1083: MPI_Comm comm;
1084: DMBoundaryType bx,by;
1085: ISLocalToGlobalMapping ltog,mltog;
1086: DMDAStencilType st;
1087: PetscBool removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE;
1089: /*
1090: nc - number of components per grid point
1091: col - number of colors needed in one direction for single component problem
1093: */
1094: DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);
1095: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1096: MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);
1097: }
1098: col = 2*s + 1;
1099: /*
1100: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1101: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1102: */
1103: if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1104: if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1105: DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
1106: DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
1107: PetscObjectGetComm((PetscObject)da,&comm);
1109: PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);
1110: DMGetLocalToGlobalMapping(da,<og);
1112: MatSetBlockSize(J,nc);
1113: /* determine the matrix preallocation information */
1114: MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
1115: for (i=xs; i<xs+nx; i++) {
1116: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1117: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1119: for (j=ys; j<ys+ny; j++) {
1120: slot = i - gxs + gnx*(j - gys);
1122: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1123: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1125: cnt = 0;
1126: for (k=0; k<nc; k++) {
1127: for (l=lstart; l<lend+1; l++) {
1128: for (p=pstart; p<pend+1; p++) {
1129: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1130: cols[cnt++] = k + nc*(slot + gnx*l + p);
1131: }
1132: }
1133: }
1134: rows[k] = k + nc*(slot);
1135: }
1136: if (removedups) {
1137: MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1138: } else {
1139: MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1140: }
1141: }
1142: }
1143: MatSetBlockSize(J,nc);
1144: MatSeqAIJSetPreallocation(J,0,dnz);
1145: MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
1146: MatPreallocateFinalize(dnz,onz);
1147: MatGetLocalToGlobalMapping(J,&mltog,NULL);
1148: if (!mltog) {
1149: MatSetLocalToGlobalMapping(J,ltog,ltog);
1150: }
1152: /*
1153: For each node in the grid: we get the neighbors in the local (on processor ordering
1154: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1155: PETSc ordering.
1156: */
1157: if (!da->prealloc_only) {
1158: for (i=xs; i<xs+nx; i++) {
1160: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1161: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1163: for (j=ys; j<ys+ny; j++) {
1164: slot = i - gxs + gnx*(j - gys);
1166: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1167: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1169: cnt = 0;
1170: for (l=lstart; l<lend+1; l++) {
1171: for (p=pstart; p<pend+1; p++) {
1172: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1173: cols[cnt++] = nc*(slot + gnx*l + p);
1174: for (k=1; k<nc; k++) {
1175: cols[cnt] = 1 + cols[cnt-1];cnt++;
1176: }
1177: }
1178: }
1179: }
1180: for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1181: MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1182: }
1183: }
1184: /* do not copy values to GPU since they are all zero and not yet needed there */
1185: MatBoundToCPU(J,&alreadyboundtocpu);
1186: MatBindToCPU(J,PETSC_TRUE);
1187: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1188: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1189: if (!alreadyboundtocpu) MatBindToCPU(J,PETSC_FALSE);
1190: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1191: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1192: MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1193: }
1194: }
1195: PetscFree2(rows,cols);
1196: return 0;
1197: }
1199: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1200: {
1201: PetscErrorCode ierr;
1202: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1203: PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1204: PetscInt lstart,lend,pstart,pend,*dnz,*onz;
1205: DM_DA *dd = (DM_DA*)da->data;
1206: PetscInt ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1207: MPI_Comm comm;
1208: DMBoundaryType bx,by;
1209: ISLocalToGlobalMapping ltog;
1210: DMDAStencilType st;
1211: PetscBool removedups = PETSC_FALSE;
1213: /*
1214: nc - number of components per grid point
1215: col - number of colors needed in one direction for single component problem
1217: */
1218: DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);
1219: col = 2*s + 1;
1220: /*
1221: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1222: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1223: */
1224: if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1225: if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1226: DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
1227: DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
1228: PetscObjectGetComm((PetscObject)da,&comm);
1230: PetscMalloc1(col*col*nc,&cols);
1231: DMGetLocalToGlobalMapping(da,<og);
1233: MatSetBlockSize(J,nc);
1234: /* determine the matrix preallocation information */
1235: MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
1236: for (i=xs; i<xs+nx; i++) {
1238: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1239: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1241: for (j=ys; j<ys+ny; j++) {
1242: slot = i - gxs + gnx*(j - gys);
1244: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1245: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1247: for (k=0; k<nc; k++) {
1248: cnt = 0;
1249: for (l=lstart; l<lend+1; l++) {
1250: for (p=pstart; p<pend+1; p++) {
1251: if (l || p) {
1252: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1253: for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1254: }
1255: } else {
1256: if (dfill) {
1257: for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1258: } else {
1259: for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1260: }
1261: }
1262: }
1263: }
1264: row = k + nc*(slot);
1265: maxcnt = PetscMax(maxcnt,cnt);
1266: if (removedups) {
1267: MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);
1268: } else {
1269: MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);
1270: }
1271: }
1272: }
1273: }
1274: MatSeqAIJSetPreallocation(J,0,dnz);
1275: MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
1276: MatPreallocateFinalize(dnz,onz);
1277: MatSetLocalToGlobalMapping(J,ltog,ltog);
1279: /*
1280: For each node in the grid: we get the neighbors in the local (on processor ordering
1281: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1282: PETSc ordering.
1283: */
1284: if (!da->prealloc_only) {
1285: for (i=xs; i<xs+nx; i++) {
1287: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1288: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1290: for (j=ys; j<ys+ny; j++) {
1291: slot = i - gxs + gnx*(j - gys);
1293: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1294: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1296: for (k=0; k<nc; k++) {
1297: cnt = 0;
1298: for (l=lstart; l<lend+1; l++) {
1299: for (p=pstart; p<pend+1; p++) {
1300: if (l || p) {
1301: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1302: for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1303: }
1304: } else {
1305: if (dfill) {
1306: for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1307: } else {
1308: for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1309: }
1310: }
1311: }
1312: }
1313: row = k + nc*(slot);
1314: MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1315: }
1316: }
1317: }
1318: /* do not copy values to GPU since they are all zero and not yet needed there */
1319: MatBindToCPU(J,PETSC_TRUE);
1320: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1321: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1322: MatBindToCPU(J,PETSC_FALSE);
1323: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1324: }
1325: PetscFree(cols);
1326: return 0;
1327: }
1329: /* ---------------------------------------------------------------------------------*/
1331: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1332: {
1333: PetscErrorCode ierr;
1334: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1335: PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1336: PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1337: MPI_Comm comm;
1338: DMBoundaryType bx,by,bz;
1339: ISLocalToGlobalMapping ltog,mltog;
1340: DMDAStencilType st;
1341: PetscBool removedups = PETSC_FALSE;
1343: /*
1344: nc - number of components per grid point
1345: col - number of colors needed in one direction for single component problem
1347: */
1348: DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
1349: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1350: MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);
1351: }
1352: col = 2*s + 1;
1354: /*
1355: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1356: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1357: */
1358: if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1359: if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1360: if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1362: DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
1363: DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
1364: PetscObjectGetComm((PetscObject)da,&comm);
1366: PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);
1367: DMGetLocalToGlobalMapping(da,<og);
1369: MatSetBlockSize(J,nc);
1370: /* determine the matrix preallocation information */
1371: MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
1372: for (i=xs; i<xs+nx; i++) {
1373: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1374: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1375: for (j=ys; j<ys+ny; j++) {
1376: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1377: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1378: for (k=zs; k<zs+nz; k++) {
1379: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1380: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
1382: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1384: cnt = 0;
1385: for (l=0; l<nc; l++) {
1386: for (ii=istart; ii<iend+1; ii++) {
1387: for (jj=jstart; jj<jend+1; jj++) {
1388: for (kk=kstart; kk<kend+1; kk++) {
1389: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1390: cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1391: }
1392: }
1393: }
1394: }
1395: rows[l] = l + nc*(slot);
1396: }
1397: if (removedups) {
1398: MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1399: } else {
1400: MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1401: }
1402: }
1403: }
1404: }
1405: MatSetBlockSize(J,nc);
1406: MatSeqAIJSetPreallocation(J,0,dnz);
1407: MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
1408: MatPreallocateFinalize(dnz,onz);
1409: MatGetLocalToGlobalMapping(J,&mltog,NULL);
1410: if (!mltog) {
1411: MatSetLocalToGlobalMapping(J,ltog,ltog);
1412: }
1414: /*
1415: For each node in the grid: we get the neighbors in the local (on processor ordering
1416: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1417: PETSc ordering.
1418: */
1419: if (!da->prealloc_only) {
1420: for (i=xs; i<xs+nx; i++) {
1421: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1422: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1423: for (j=ys; j<ys+ny; j++) {
1424: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1425: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1426: for (k=zs; k<zs+nz; k++) {
1427: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1428: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
1430: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1432: cnt = 0;
1433: for (kk=kstart; kk<kend+1; kk++) {
1434: for (jj=jstart; jj<jend+1; jj++) {
1435: for (ii=istart; ii<iend+1; ii++) {
1436: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1437: cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1438: for (l=1; l<nc; l++) {
1439: cols[cnt] = 1 + cols[cnt-1];cnt++;
1440: }
1441: }
1442: }
1443: }
1444: }
1445: rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1446: MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1447: }
1448: }
1449: }
1450: /* do not copy values to GPU since they are all zero and not yet needed there */
1451: MatBindToCPU(J,PETSC_TRUE);
1452: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1453: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1454: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1455: MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1456: }
1457: MatBindToCPU(J,PETSC_FALSE);
1458: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1459: }
1460: PetscFree2(rows,cols);
1461: return 0;
1462: }
1464: /* ---------------------------------------------------------------------------------*/
1466: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1467: {
1468: DM_DA *dd = (DM_DA*)da->data;
1469: PetscInt xs,nx,i,j,gxs,gnx,row,k,l;
1470: PetscInt m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1471: PetscInt *ofill = dd->ofill,*dfill = dd->dfill;
1472: DMBoundaryType bx;
1473: ISLocalToGlobalMapping ltog;
1474: PetscMPIInt rank,size;
1476: MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);
1477: MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);
1479: /*
1480: nc - number of components per grid point
1482: */
1483: DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);
1485: DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);
1486: DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);
1488: MatSetBlockSize(J,nc);
1489: PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);
1491: /*
1492: note should be smaller for first and last process with no periodic
1493: does not handle dfill
1494: */
1495: cnt = 0;
1496: /* coupling with process to the left */
1497: for (i=0; i<s; i++) {
1498: for (j=0; j<nc; j++) {
1499: ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1500: cols[cnt] = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1501: if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1502: if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1503: else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1504: }
1505: maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1506: cnt++;
1507: }
1508: }
1509: for (i=s; i<nx-s; i++) {
1510: for (j=0; j<nc; j++) {
1511: cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1512: maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1513: cnt++;
1514: }
1515: }
1516: /* coupling with process to the right */
1517: for (i=nx-s; i<nx; i++) {
1518: for (j=0; j<nc; j++) {
1519: ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1520: cols[cnt] = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1521: if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1522: if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1523: else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1524: }
1525: maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1526: cnt++;
1527: }
1528: }
1530: MatSeqAIJSetPreallocation(J,0,cols);
1531: MatMPIAIJSetPreallocation(J,0,cols,0,ocols);
1532: PetscFree2(cols,ocols);
1534: DMGetLocalToGlobalMapping(da,<og);
1535: MatSetLocalToGlobalMapping(J,ltog,ltog);
1537: /*
1538: For each node in the grid: we get the neighbors in the local (on processor ordering
1539: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1540: PETSc ordering.
1541: */
1542: if (!da->prealloc_only) {
1543: PetscMalloc1(maxcnt,&cols);
1544: row = xs*nc;
1545: /* coupling with process to the left */
1546: for (i=xs; i<xs+s; i++) {
1547: for (j=0; j<nc; j++) {
1548: cnt = 0;
1549: if (rank) {
1550: for (l=0; l<s; l++) {
1551: for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1552: }
1553: }
1554: if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1555: for (l=0; l<s; l++) {
1556: for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1557: }
1558: }
1559: if (dfill) {
1560: for (k=dfill[j]; k<dfill[j+1]; k++) {
1561: cols[cnt++] = i*nc + dfill[k];
1562: }
1563: } else {
1564: for (k=0; k<nc; k++) {
1565: cols[cnt++] = i*nc + k;
1566: }
1567: }
1568: for (l=0; l<s; l++) {
1569: for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1570: }
1571: MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1572: row++;
1573: }
1574: }
1575: for (i=xs+s; i<xs+nx-s; i++) {
1576: for (j=0; j<nc; j++) {
1577: cnt = 0;
1578: for (l=0; l<s; l++) {
1579: for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1580: }
1581: if (dfill) {
1582: for (k=dfill[j]; k<dfill[j+1]; k++) {
1583: cols[cnt++] = i*nc + dfill[k];
1584: }
1585: } else {
1586: for (k=0; k<nc; k++) {
1587: cols[cnt++] = i*nc + k;
1588: }
1589: }
1590: for (l=0; l<s; l++) {
1591: for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1592: }
1593: MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1594: row++;
1595: }
1596: }
1597: /* coupling with process to the right */
1598: for (i=xs+nx-s; i<xs+nx; i++) {
1599: for (j=0; j<nc; j++) {
1600: cnt = 0;
1601: for (l=0; l<s; l++) {
1602: for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1603: }
1604: if (dfill) {
1605: for (k=dfill[j]; k<dfill[j+1]; k++) {
1606: cols[cnt++] = i*nc + dfill[k];
1607: }
1608: } else {
1609: for (k=0; k<nc; k++) {
1610: cols[cnt++] = i*nc + k;
1611: }
1612: }
1613: if (rank < size-1) {
1614: for (l=0; l<s; l++) {
1615: for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1616: }
1617: }
1618: if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1619: for (l=0; l<s; l++) {
1620: for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1621: }
1622: }
1623: MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1624: row++;
1625: }
1626: }
1627: PetscFree(cols);
1628: /* do not copy values to GPU since they are all zero and not yet needed there */
1629: MatBindToCPU(J,PETSC_TRUE);
1630: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1631: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1632: MatBindToCPU(J,PETSC_FALSE);
1633: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1634: }
1635: return 0;
1636: }
1638: /* ---------------------------------------------------------------------------------*/
1640: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1641: {
1642: PetscInt xs,nx,i,i1,slot,gxs,gnx;
1643: PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1644: PetscInt istart,iend;
1645: DMBoundaryType bx;
1646: ISLocalToGlobalMapping ltog,mltog;
1648: /*
1649: nc - number of components per grid point
1650: col - number of colors needed in one direction for single component problem
1652: */
1653: DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);
1654: if (bx == DM_BOUNDARY_NONE) {
1655: MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);
1656: }
1657: col = 2*s + 1;
1659: DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);
1660: DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);
1662: MatSetBlockSize(J,nc);
1663: MatSeqAIJSetPreallocation(J,col*nc,NULL);
1664: MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);
1666: DMGetLocalToGlobalMapping(da,<og);
1667: MatGetLocalToGlobalMapping(J,&mltog,NULL);
1668: if (!mltog) {
1669: MatSetLocalToGlobalMapping(J,ltog,ltog);
1670: }
1672: /*
1673: For each node in the grid: we get the neighbors in the local (on processor ordering
1674: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1675: PETSc ordering.
1676: */
1677: if (!da->prealloc_only) {
1678: PetscMalloc2(nc,&rows,col*nc*nc,&cols);
1679: for (i=xs; i<xs+nx; i++) {
1680: istart = PetscMax(-s,gxs - i);
1681: iend = PetscMin(s,gxs + gnx - i - 1);
1682: slot = i - gxs;
1684: cnt = 0;
1685: for (i1=istart; i1<iend+1; i1++) {
1686: cols[cnt++] = nc*(slot + i1);
1687: for (l=1; l<nc; l++) {
1688: cols[cnt] = 1 + cols[cnt-1];cnt++;
1689: }
1690: }
1691: rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1692: MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1693: }
1694: /* do not copy values to GPU since they are all zero and not yet needed there */
1695: MatBindToCPU(J,PETSC_TRUE);
1696: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1697: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1698: if (bx == DM_BOUNDARY_NONE) {
1699: MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1700: }
1701: MatBindToCPU(J,PETSC_FALSE);
1702: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1703: PetscFree2(rows,cols);
1704: }
1705: return 0;
1706: }
1708: /* ---------------------------------------------------------------------------------*/
1710: PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J)
1711: {
1712: PetscInt xs,nx,i,i1,slot,gxs,gnx;
1713: PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1714: PetscInt istart,iend;
1715: DMBoundaryType bx;
1716: ISLocalToGlobalMapping ltog,mltog;
1718: /*
1719: nc - number of components per grid point
1720: col - number of colors needed in one direction for single component problem
1721: */
1722: DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);
1723: col = 2*s + 1;
1725: DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);
1726: DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);
1728: MatSetBlockSize(J,nc);
1729: MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);
1731: DMGetLocalToGlobalMapping(da,<og);
1732: MatGetLocalToGlobalMapping(J,&mltog,NULL);
1733: if (!mltog) {
1734: MatSetLocalToGlobalMapping(J,ltog,ltog);
1735: }
1737: /*
1738: For each node in the grid: we get the neighbors in the local (on processor ordering
1739: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1740: PETSc ordering.
1741: */
1742: if (!da->prealloc_only) {
1743: PetscMalloc2(nc,&rows,col*nc*nc,&cols);
1744: for (i=xs; i<xs+nx; i++) {
1745: istart = PetscMax(-s,gxs - i);
1746: iend = PetscMin(s,gxs + gnx - i - 1);
1747: slot = i - gxs;
1749: cnt = 0;
1750: for (i1=istart; i1<iend+1; i1++) {
1751: cols[cnt++] = nc*(slot + i1);
1752: for (l=1; l<nc; l++) {
1753: cols[cnt] = 1 + cols[cnt-1];cnt++;
1754: }
1755: }
1756: rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1757: MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1758: }
1759: /* do not copy values to GPU since they are all zero and not yet needed there */
1760: MatBindToCPU(J,PETSC_TRUE);
1761: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1762: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1763: if (bx == DM_BOUNDARY_NONE) {
1764: MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1765: }
1766: MatBindToCPU(J,PETSC_FALSE);
1767: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1768: PetscFree2(rows,cols);
1769: }
1770: MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1771: return 0;
1772: }
1774: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1775: {
1776: PetscErrorCode ierr;
1777: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1778: PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1779: PetscInt istart,iend,jstart,jend,ii,jj;
1780: MPI_Comm comm;
1781: PetscScalar *values;
1782: DMBoundaryType bx,by;
1783: DMDAStencilType st;
1784: ISLocalToGlobalMapping ltog;
1786: /*
1787: nc - number of components per grid point
1788: col - number of colors needed in one direction for single component problem
1789: */
1790: DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);
1791: col = 2*s + 1;
1793: DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
1794: DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
1795: PetscObjectGetComm((PetscObject)da,&comm);
1797: PetscMalloc1(col*col*nc*nc,&cols);
1799: DMGetLocalToGlobalMapping(da,<og);
1801: /* determine the matrix preallocation information */
1802: MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);
1803: for (i=xs; i<xs+nx; i++) {
1804: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1805: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1806: for (j=ys; j<ys+ny; j++) {
1807: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1808: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1809: slot = i - gxs + gnx*(j - gys);
1811: /* Find block columns in block row */
1812: cnt = 0;
1813: for (ii=istart; ii<iend+1; ii++) {
1814: for (jj=jstart; jj<jend+1; jj++) {
1815: if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1816: cols[cnt++] = slot + ii + gnx*jj;
1817: }
1818: }
1819: }
1820: MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);
1821: }
1822: }
1823: MatSeqBAIJSetPreallocation(J,nc,0,dnz);
1824: MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);
1825: MatPreallocateFinalize(dnz,onz);
1827: MatSetLocalToGlobalMapping(J,ltog,ltog);
1829: /*
1830: For each node in the grid: we get the neighbors in the local (on processor ordering
1831: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1832: PETSc ordering.
1833: */
1834: if (!da->prealloc_only) {
1835: PetscCalloc1(col*col*nc*nc,&values);
1836: for (i=xs; i<xs+nx; i++) {
1837: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1838: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1839: for (j=ys; j<ys+ny; j++) {
1840: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1841: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1842: slot = i - gxs + gnx*(j - gys);
1843: cnt = 0;
1844: for (ii=istart; ii<iend+1; ii++) {
1845: for (jj=jstart; jj<jend+1; jj++) {
1846: if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1847: cols[cnt++] = slot + ii + gnx*jj;
1848: }
1849: }
1850: }
1851: MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);
1852: }
1853: }
1854: PetscFree(values);
1855: /* do not copy values to GPU since they are all zero and not yet needed there */
1856: MatBindToCPU(J,PETSC_TRUE);
1857: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1858: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1859: MatBindToCPU(J,PETSC_FALSE);
1860: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1861: }
1862: PetscFree(cols);
1863: return 0;
1864: }
1866: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1867: {
1868: PetscErrorCode ierr;
1869: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1870: PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1871: PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1872: MPI_Comm comm;
1873: PetscScalar *values;
1874: DMBoundaryType bx,by,bz;
1875: DMDAStencilType st;
1876: ISLocalToGlobalMapping ltog;
1878: /*
1879: nc - number of components per grid point
1880: col - number of colors needed in one direction for single component problem
1882: */
1883: DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);
1884: col = 2*s + 1;
1886: DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
1887: DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
1888: PetscObjectGetComm((PetscObject)da,&comm);
1890: PetscMalloc1(col*col*col,&cols);
1892: DMGetLocalToGlobalMapping(da,<og);
1894: /* determine the matrix preallocation information */
1895: MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);
1896: for (i=xs; i<xs+nx; i++) {
1897: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1898: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1899: for (j=ys; j<ys+ny; j++) {
1900: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1901: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1902: for (k=zs; k<zs+nz; k++) {
1903: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1904: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
1906: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1908: /* Find block columns in block row */
1909: cnt = 0;
1910: for (ii=istart; ii<iend+1; ii++) {
1911: for (jj=jstart; jj<jend+1; jj++) {
1912: for (kk=kstart; kk<kend+1; kk++) {
1913: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1914: cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1915: }
1916: }
1917: }
1918: }
1919: MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);
1920: }
1921: }
1922: }
1923: MatSeqBAIJSetPreallocation(J,nc,0,dnz);
1924: MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);
1925: MatPreallocateFinalize(dnz,onz);
1927: MatSetLocalToGlobalMapping(J,ltog,ltog);
1929: /*
1930: For each node in the grid: we get the neighbors in the local (on processor ordering
1931: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1932: PETSc ordering.
1933: */
1934: if (!da->prealloc_only) {
1935: PetscCalloc1(col*col*col*nc*nc,&values);
1936: for (i=xs; i<xs+nx; i++) {
1937: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1938: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1939: for (j=ys; j<ys+ny; j++) {
1940: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1941: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1942: for (k=zs; k<zs+nz; k++) {
1943: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1944: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
1946: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1948: cnt = 0;
1949: for (ii=istart; ii<iend+1; ii++) {
1950: for (jj=jstart; jj<jend+1; jj++) {
1951: for (kk=kstart; kk<kend+1; kk++) {
1952: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1953: cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1954: }
1955: }
1956: }
1957: }
1958: MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);
1959: }
1960: }
1961: }
1962: PetscFree(values);
1963: /* do not copy values to GPU since they are all zero and not yet needed there */
1964: MatBindToCPU(J,PETSC_TRUE);
1965: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1966: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1967: MatBindToCPU(J,PETSC_FALSE);
1968: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1969: }
1970: PetscFree(cols);
1971: return 0;
1972: }
1974: /*
1975: This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1976: identify in the local ordering with periodic domain.
1977: */
1978: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1979: {
1980: PetscInt i,n;
1982: ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);
1983: ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);
1984: for (i=0,n=0; i<*cnt; i++) {
1985: if (col[i] >= *row) col[n++] = col[i];
1986: }
1987: *cnt = n;
1988: return 0;
1989: }
1991: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1992: {
1993: PetscErrorCode ierr;
1994: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1995: PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1996: PetscInt istart,iend,jstart,jend,ii,jj;
1997: MPI_Comm comm;
1998: PetscScalar *values;
1999: DMBoundaryType bx,by;
2000: DMDAStencilType st;
2001: ISLocalToGlobalMapping ltog;
2003: /*
2004: nc - number of components per grid point
2005: col - number of colors needed in one direction for single component problem
2006: */
2007: DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);
2008: col = 2*s + 1;
2010: DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
2011: DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
2012: PetscObjectGetComm((PetscObject)da,&comm);
2014: PetscMalloc1(col*col*nc*nc,&cols);
2016: DMGetLocalToGlobalMapping(da,<og);
2018: /* determine the matrix preallocation information */
2019: MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);
2020: for (i=xs; i<xs+nx; i++) {
2021: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2022: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
2023: for (j=ys; j<ys+ny; j++) {
2024: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2025: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
2026: slot = i - gxs + gnx*(j - gys);
2028: /* Find block columns in block row */
2029: cnt = 0;
2030: for (ii=istart; ii<iend+1; ii++) {
2031: for (jj=jstart; jj<jend+1; jj++) {
2032: if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2033: cols[cnt++] = slot + ii + gnx*jj;
2034: }
2035: }
2036: }
2037: L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2038: MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);
2039: }
2040: }
2041: MatSeqSBAIJSetPreallocation(J,nc,0,dnz);
2042: MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);
2043: MatPreallocateFinalize(dnz,onz);
2045: MatSetLocalToGlobalMapping(J,ltog,ltog);
2047: /*
2048: For each node in the grid: we get the neighbors in the local (on processor ordering
2049: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2050: PETSc ordering.
2051: */
2052: if (!da->prealloc_only) {
2053: PetscCalloc1(col*col*nc*nc,&values);
2054: for (i=xs; i<xs+nx; i++) {
2055: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2056: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
2057: for (j=ys; j<ys+ny; j++) {
2058: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2059: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
2060: slot = i - gxs + gnx*(j - gys);
2062: /* Find block columns in block row */
2063: cnt = 0;
2064: for (ii=istart; ii<iend+1; ii++) {
2065: for (jj=jstart; jj<jend+1; jj++) {
2066: if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2067: cols[cnt++] = slot + ii + gnx*jj;
2068: }
2069: }
2070: }
2071: L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2072: MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);
2073: }
2074: }
2075: PetscFree(values);
2076: /* do not copy values to GPU since they are all zero and not yet needed there */
2077: MatBindToCPU(J,PETSC_TRUE);
2078: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2079: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2080: MatBindToCPU(J,PETSC_FALSE);
2081: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2082: }
2083: PetscFree(cols);
2084: return 0;
2085: }
2087: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2088: {
2089: PetscErrorCode ierr;
2090: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2091: PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2092: PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2093: MPI_Comm comm;
2094: PetscScalar *values;
2095: DMBoundaryType bx,by,bz;
2096: DMDAStencilType st;
2097: ISLocalToGlobalMapping ltog;
2099: /*
2100: nc - number of components per grid point
2101: col - number of colors needed in one direction for single component problem
2102: */
2103: DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);
2104: col = 2*s + 1;
2106: DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
2107: DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
2108: PetscObjectGetComm((PetscObject)da,&comm);
2110: /* create the matrix */
2111: PetscMalloc1(col*col*col,&cols);
2113: DMGetLocalToGlobalMapping(da,<og);
2115: /* determine the matrix preallocation information */
2116: MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);
2117: for (i=xs; i<xs+nx; i++) {
2118: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2119: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
2120: for (j=ys; j<ys+ny; j++) {
2121: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2122: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
2123: for (k=zs; k<zs+nz; k++) {
2124: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2125: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
2127: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2129: /* Find block columns in block row */
2130: cnt = 0;
2131: for (ii=istart; ii<iend+1; ii++) {
2132: for (jj=jstart; jj<jend+1; jj++) {
2133: for (kk=kstart; kk<kend+1; kk++) {
2134: if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2135: cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2136: }
2137: }
2138: }
2139: }
2140: L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2141: MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);
2142: }
2143: }
2144: }
2145: MatSeqSBAIJSetPreallocation(J,nc,0,dnz);
2146: MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);
2147: MatPreallocateFinalize(dnz,onz);
2149: MatSetLocalToGlobalMapping(J,ltog,ltog);
2151: /*
2152: For each node in the grid: we get the neighbors in the local (on processor ordering
2153: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2154: PETSc ordering.
2155: */
2156: if (!da->prealloc_only) {
2157: PetscCalloc1(col*col*col*nc*nc,&values);
2158: for (i=xs; i<xs+nx; i++) {
2159: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2160: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
2161: for (j=ys; j<ys+ny; j++) {
2162: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2163: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
2164: for (k=zs; k<zs+nz; k++) {
2165: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2166: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
2168: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2170: cnt = 0;
2171: for (ii=istart; ii<iend+1; ii++) {
2172: for (jj=jstart; jj<jend+1; jj++) {
2173: for (kk=kstart; kk<kend+1; kk++) {
2174: if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2175: cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2176: }
2177: }
2178: }
2179: }
2180: L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2181: MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);
2182: }
2183: }
2184: }
2185: PetscFree(values);
2186: /* do not copy values to GPU since they are all zero and not yet needed there */
2187: MatBindToCPU(J,PETSC_TRUE);
2188: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2189: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2190: MatBindToCPU(J,PETSC_FALSE);
2191: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2192: }
2193: PetscFree(cols);
2194: return 0;
2195: }
2197: /* ---------------------------------------------------------------------------------*/
2199: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2200: {
2201: PetscErrorCode ierr;
2202: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2203: PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2204: PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2205: DM_DA *dd = (DM_DA*)da->data;
2206: PetscInt ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2207: MPI_Comm comm;
2208: PetscScalar *values;
2209: DMBoundaryType bx,by,bz;
2210: ISLocalToGlobalMapping ltog;
2211: DMDAStencilType st;
2212: PetscBool removedups = PETSC_FALSE;
2214: /*
2215: nc - number of components per grid point
2216: col - number of colors needed in one direction for single component problem
2218: */
2219: DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
2220: col = 2*s + 1;
2222: by 2*stencil_width + 1\n");
2224: by 2*stencil_width + 1\n");
2226: by 2*stencil_width + 1\n");
2228: /*
2229: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2230: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2231: */
2232: if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2233: if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2234: if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2236: DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
2237: DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
2238: PetscObjectGetComm((PetscObject)da,&comm);
2240: PetscMalloc1(col*col*col*nc,&cols);
2241: DMGetLocalToGlobalMapping(da,<og);
2243: /* determine the matrix preallocation information */
2244: MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
2246: MatSetBlockSize(J,nc);
2247: for (i=xs; i<xs+nx; i++) {
2248: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2249: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
2250: for (j=ys; j<ys+ny; j++) {
2251: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2252: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
2253: for (k=zs; k<zs+nz; k++) {
2254: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2255: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
2257: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2259: for (l=0; l<nc; l++) {
2260: cnt = 0;
2261: for (ii=istart; ii<iend+1; ii++) {
2262: for (jj=jstart; jj<jend+1; jj++) {
2263: for (kk=kstart; kk<kend+1; kk++) {
2264: if (ii || jj || kk) {
2265: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2266: for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2267: }
2268: } else {
2269: if (dfill) {
2270: for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2271: } else {
2272: for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2273: }
2274: }
2275: }
2276: }
2277: }
2278: row = l + nc*(slot);
2279: maxcnt = PetscMax(maxcnt,cnt);
2280: if (removedups) {
2281: MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);
2282: } else {
2283: MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);
2284: }
2285: }
2286: }
2287: }
2288: }
2289: MatSeqAIJSetPreallocation(J,0,dnz);
2290: MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
2291: MatPreallocateFinalize(dnz,onz);
2292: MatSetLocalToGlobalMapping(J,ltog,ltog);
2294: /*
2295: For each node in the grid: we get the neighbors in the local (on processor ordering
2296: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2297: PETSc ordering.
2298: */
2299: if (!da->prealloc_only) {
2300: PetscCalloc1(maxcnt,&values);
2301: for (i=xs; i<xs+nx; i++) {
2302: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2303: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
2304: for (j=ys; j<ys+ny; j++) {
2305: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2306: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
2307: for (k=zs; k<zs+nz; k++) {
2308: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2309: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
2311: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2313: for (l=0; l<nc; l++) {
2314: cnt = 0;
2315: for (ii=istart; ii<iend+1; ii++) {
2316: for (jj=jstart; jj<jend+1; jj++) {
2317: for (kk=kstart; kk<kend+1; kk++) {
2318: if (ii || jj || kk) {
2319: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2320: for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2321: }
2322: } else {
2323: if (dfill) {
2324: for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2325: } else {
2326: for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2327: }
2328: }
2329: }
2330: }
2331: }
2332: row = l + nc*(slot);
2333: MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);
2334: }
2335: }
2336: }
2337: }
2338: PetscFree(values);
2339: /* do not copy values to GPU since they are all zero and not yet needed there */
2340: MatBindToCPU(J,PETSC_TRUE);
2341: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2342: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2343: MatBindToCPU(J,PETSC_FALSE);
2344: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2345: }
2346: PetscFree(cols);
2347: return 0;
2348: }