Actual source code: ex52.c
2: static char help[] = "Solves a linear system in parallel with KSP. Modified from ex2.c \n\
3: Illustrate how to use external packages MUMPS, SUPERLU and STRUMPACK \n\
4: Input parameters include:\n\
5: -random_exact_sol : use a random exact solution vector\n\
6: -view_exact_sol : write exact solution vector to stdout\n\
7: -m <mesh_x> : number of mesh points in x-direction\n\
8: -n <mesh_y> : number of mesh points in y-direction\n\n";
10: #include <petscksp.h>
12: #if defined(PETSC_HAVE_MUMPS)
13: /* Subroutine contributed by Varun Hiremath */
14: PetscErrorCode printMumpsMemoryInfo(Mat F)
15: {
16: PetscInt maxMem, sumMem;
19: MatMumpsGetInfog(F,16,&maxMem);
20: MatMumpsGetInfog(F,17,&sumMem);
21: PetscPrintf(PETSC_COMM_WORLD, "\n MUMPS INFOG(16) :: Max memory in MB = %d", maxMem);
22: PetscPrintf(PETSC_COMM_WORLD, "\n MUMPS INFOG(17) :: Sum memory in MB = %d \n", sumMem);
23: return 0;
24: }
25: #endif
27: int main(int argc,char **args)
28: {
29: Vec x,b,u; /* approx solution, RHS, exact solution */
30: Mat A,F;
31: KSP ksp; /* linear solver context */
32: PC pc;
33: PetscRandom rctx; /* random number generator context */
34: PetscReal norm; /* norm of solution error */
35: PetscInt i,j,Ii,J,Istart,Iend,m = 8,n = 7,its;
36: PetscBool flg=PETSC_FALSE,flg_ilu=PETSC_FALSE,flg_ch=PETSC_FALSE;
37: #if defined(PETSC_HAVE_MUMPS)
38: PetscBool flg_mumps=PETSC_FALSE,flg_mumps_ch=PETSC_FALSE;
39: #endif
40: #if defined(PETSC_HAVE_SUPERLU) || defined(PETSC_HAVE_SUPERLU_DIST)
41: PetscBool flg_superlu=PETSC_FALSE;
42: #endif
43: #if defined(PETSC_HAVE_STRUMPACK)
44: PetscBool flg_strumpack=PETSC_FALSE;
45: #endif
46: PetscScalar v;
47: PetscMPIInt rank,size;
48: #if defined(PETSC_USE_LOG)
49: PetscLogStage stage;
50: #endif
52: PetscInitialize(&argc,&args,(char*)0,help);
53: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
54: MPI_Comm_size(PETSC_COMM_WORLD,&size);
55: PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);
56: PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
57: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
58: Compute the matrix and right-hand-side vector that define
59: the linear system, Ax = b.
60: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
61: MatCreate(PETSC_COMM_WORLD,&A);
62: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);
63: MatSetFromOptions(A);
64: MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);
65: MatSeqAIJSetPreallocation(A,5,NULL);
66: MatSetUp(A);
68: /*
69: Currently, all PETSc parallel matrix formats are partitioned by
70: contiguous chunks of rows across the processors. Determine which
71: rows of the matrix are locally owned.
72: */
73: MatGetOwnershipRange(A,&Istart,&Iend);
75: /*
76: Set matrix elements for the 2-D, five-point stencil in parallel.
77: - Each processor needs to insert only elements that it owns
78: locally (but any non-local elements will be sent to the
79: appropriate processor during matrix assembly).
80: - Always specify global rows and columns of matrix entries.
82: Note: this uses the less common natural ordering that orders first
83: all the unknowns for x = h then for x = 2h etc; Hence you see J = Ii +- n
84: instead of J = I +- m as you might expect. The more standard ordering
85: would first do all variables for y = h, then y = 2h etc.
87: */
88: PetscLogStageRegister("Assembly", &stage);
89: PetscLogStagePush(stage);
90: for (Ii=Istart; Ii<Iend; Ii++) {
91: v = -1.0; i = Ii/n; j = Ii - i*n;
92: if (i>0) {J = Ii - n; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
93: if (i<m-1) {J = Ii + n; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
94: if (j>0) {J = Ii - 1; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
95: if (j<n-1) {J = Ii + 1; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
96: v = 4.0; MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);
97: }
99: /*
100: Assemble matrix, using the 2-step process:
101: MatAssemblyBegin(), MatAssemblyEnd()
102: Computations can be done while messages are in transition
103: by placing code between these two statements.
104: */
105: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
106: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
107: PetscLogStagePop();
109: /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
110: MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
112: /*
113: Create parallel vectors.
114: - We form 1 vector from scratch and then duplicate as needed.
115: - When using VecCreate(), VecSetSizes and VecSetFromOptions()
116: in this example, we specify only the
117: vector's global dimension; the parallel partitioning is determined
118: at runtime.
119: - When solving a linear system, the vectors and matrices MUST
120: be partitioned accordingly. PETSc automatically generates
121: appropriately partitioned matrices and vectors when MatCreate()
122: and VecCreate() are used with the same communicator.
123: - The user can alternatively specify the local vector and matrix
124: dimensions when more sophisticated partitioning is needed
125: (replacing the PETSC_DECIDE argument in the VecSetSizes() statement
126: below).
127: */
128: VecCreate(PETSC_COMM_WORLD,&u);
129: VecSetSizes(u,PETSC_DECIDE,m*n);
130: VecSetFromOptions(u);
131: VecDuplicate(u,&b);
132: VecDuplicate(b,&x);
134: /*
135: Set exact solution; then compute right-hand-side vector.
136: By default we use an exact solution of a vector with all
137: elements of 1.0; Alternatively, using the runtime option
138: -random_sol forms a solution vector with random components.
139: */
140: PetscOptionsGetBool(NULL,NULL,"-random_exact_sol",&flg,NULL);
141: if (flg) {
142: PetscRandomCreate(PETSC_COMM_WORLD,&rctx);
143: PetscRandomSetFromOptions(rctx);
144: VecSetRandom(u,rctx);
145: PetscRandomDestroy(&rctx);
146: } else {
147: VecSet(u,1.0);
148: }
149: MatMult(A,u,b);
151: /*
152: View the exact solution vector if desired
153: */
154: flg = PETSC_FALSE;
155: PetscOptionsGetBool(NULL,NULL,"-view_exact_sol",&flg,NULL);
156: if (flg) VecView(u,PETSC_VIEWER_STDOUT_WORLD);
158: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
159: Create the linear solver and set various options
160: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
162: /*
163: Create linear solver context
164: */
165: KSPCreate(PETSC_COMM_WORLD,&ksp);
166: KSPSetOperators(ksp,A,A);
168: /*
169: Example of how to use external package MUMPS
170: Note: runtime options
171: '-ksp_type preonly -pc_type lu -pc_factor_mat_solver_type mumps -mat_mumps_icntl_7 3 -mat_mumps_icntl_1 0.0'
172: are equivalent to these procedural calls
173: */
174: #if defined(PETSC_HAVE_MUMPS)
175: flg_mumps = PETSC_FALSE;
176: flg_mumps_ch = PETSC_FALSE;
177: PetscOptionsGetBool(NULL,NULL,"-use_mumps_lu",&flg_mumps,NULL);
178: PetscOptionsGetBool(NULL,NULL,"-use_mumps_ch",&flg_mumps_ch,NULL);
179: if (flg_mumps || flg_mumps_ch) {
180: KSPSetType(ksp,KSPPREONLY);
181: PetscInt ival,icntl;
182: PetscReal val;
183: KSPGetPC(ksp,&pc);
184: if (flg_mumps) {
185: PCSetType(pc,PCLU);
186: } else if (flg_mumps_ch) {
187: MatSetOption(A,MAT_SPD,PETSC_TRUE); /* set MUMPS id%SYM=1 */
188: PCSetType(pc,PCCHOLESKY);
189: }
190: PCFactorSetMatSolverType(pc,MATSOLVERMUMPS);
191: PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
192: PCFactorGetMatrix(pc,&F);
194: if (flg_mumps) {
195: /* Get memory estimates from MUMPS' MatLUFactorSymbolic(), e.g. INFOG(16), INFOG(17).
196: KSPSetUp() below will do nothing inside MatLUFactorSymbolic() */
197: MatFactorInfo info;
198: MatLUFactorSymbolic(F,A,NULL,NULL,&info);
199: flg = PETSC_FALSE;
200: PetscOptionsGetBool(NULL,NULL,"-print_mumps_memory",&flg,NULL);
201: if (flg) {
202: printMumpsMemoryInfo(F);
203: }
204: }
206: /* sequential ordering */
207: icntl = 7; ival = 2;
208: MatMumpsSetIcntl(F,icntl,ival);
210: /* threshold for row pivot detection */
211: MatMumpsSetIcntl(F,24,1);
212: icntl = 3; val = 1.e-6;
213: MatMumpsSetCntl(F,icntl,val);
215: /* compute determinant of A */
216: MatMumpsSetIcntl(F,33,1);
217: }
218: #endif
220: /*
221: Example of how to use external package SuperLU
222: Note: runtime options
223: '-ksp_type preonly -pc_type ilu -pc_factor_mat_solver_type superlu -mat_superlu_ilu_droptol 1.e-8'
224: are equivalent to these procedual calls
225: */
226: #if defined(PETSC_HAVE_SUPERLU) || defined(PETSC_HAVE_SUPERLU_DIST)
227: flg_ilu = PETSC_FALSE;
228: flg_superlu = PETSC_FALSE;
229: PetscOptionsGetBool(NULL,NULL,"-use_superlu_lu",&flg_superlu,NULL);
230: PetscOptionsGetBool(NULL,NULL,"-use_superlu_ilu",&flg_ilu,NULL);
231: if (flg_superlu || flg_ilu) {
232: KSPSetType(ksp,KSPPREONLY);
233: KSPGetPC(ksp,&pc);
234: if (flg_superlu) {
235: PCSetType(pc,PCLU);
236: } else if (flg_ilu) {
237: PCSetType(pc,PCILU);
238: }
239: if (size == 1) {
240: #if !defined(PETSC_HAVE_SUPERLU)
241: SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This test requires SUPERLU");
242: #else
243: PCFactorSetMatSolverType(pc,MATSOLVERSUPERLU);
244: #endif
245: } else {
246: #if !defined(PETSC_HAVE_SUPERLU_DIST)
247: SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This test requires SUPERLU_DIST");
248: #else
249: PCFactorSetMatSolverType(pc,MATSOLVERSUPERLU_DIST);
250: #endif
251: }
252: PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
253: PCFactorGetMatrix(pc,&F);
254: #if defined(PETSC_HAVE_SUPERLU)
255: if (size == 1) {
256: MatSuperluSetILUDropTol(F,1.e-8);
257: }
258: #endif
259: }
260: #endif
262: /*
263: Example of how to use external package STRUMPACK
264: Note: runtime options
265: '-pc_type lu/ilu \
266: -pc_factor_mat_solver_type strumpack \
267: -mat_strumpack_reordering METIS \
268: -mat_strumpack_colperm 0 \
269: -mat_strumpack_hss_rel_tol 1.e-3 \
270: -mat_strumpack_hss_min_sep_size 50 \
271: -mat_strumpack_max_rank 100 \
272: -mat_strumpack_leaf_size 4'
273: are equivalent to these procedural calls
275: We refer to the STRUMPACK-sparse manual, section 5, for more info on
276: how to tune the preconditioner.
277: */
278: #if defined(PETSC_HAVE_STRUMPACK)
279: flg_ilu = PETSC_FALSE;
280: flg_strumpack = PETSC_FALSE;
281: PetscOptionsGetBool(NULL,NULL,"-use_strumpack_lu",&flg_strumpack,NULL);
282: PetscOptionsGetBool(NULL,NULL,"-use_strumpack_ilu",&flg_ilu,NULL);
283: if (flg_strumpack || flg_ilu) {
284: KSPSetType(ksp,KSPPREONLY);
285: KSPGetPC(ksp,&pc);
286: if (flg_strumpack) {
287: PCSetType(pc,PCLU);
288: } else if (flg_ilu) {
289: PCSetType(pc,PCILU);
290: }
291: #if !defined(PETSC_HAVE_STRUMPACK)
292: SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This test requires STRUMPACK");
293: #endif
294: PCFactorSetMatSolverType(pc,MATSOLVERSTRUMPACK);
295: PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
296: PCFactorGetMatrix(pc,&F);
297: #if defined(PETSC_HAVE_STRUMPACK)
298: /* Set the fill-reducing reordering. */
299: MatSTRUMPACKSetReordering(F,MAT_STRUMPACK_METIS);
300: /* Since this is a simple discretization, the diagonal is always */
301: /* nonzero, and there is no need for the extra MC64 permutation. */
302: MatSTRUMPACKSetColPerm(F,PETSC_FALSE);
303: /* The compression tolerance used when doing low-rank compression */
304: /* in the preconditioner. This is problem specific! */
305: MatSTRUMPACKSetHSSRelTol(F,1.e-3);
306: /* Set minimum matrix size for HSS compression to 15 in order to */
307: /* demonstrate preconditioner on small problems. For performance */
308: /* a value of say 500 is better. */
309: MatSTRUMPACKSetHSSMinSepSize(F,15);
310: /* You can further limit the fill in the preconditioner by */
311: /* setting a maximum rank */
312: MatSTRUMPACKSetHSSMaxRank(F,100);
313: /* Set the size of the diagonal blocks (the leafs) in the HSS */
314: /* approximation. The default value should be better for real */
315: /* problems. This is mostly for illustration on a small problem. */
316: MatSTRUMPACKSetHSSLeafSize(F,4);
317: #endif
318: }
319: #endif
321: /*
322: Example of how to use procedural calls that are equivalent to
323: '-ksp_type preonly -pc_type lu/ilu -pc_factor_mat_solver_type petsc'
324: */
325: flg = PETSC_FALSE;
326: flg_ilu = PETSC_FALSE;
327: flg_ch = PETSC_FALSE;
328: PetscOptionsGetBool(NULL,NULL,"-use_petsc_lu",&flg,NULL);
329: PetscOptionsGetBool(NULL,NULL,"-use_petsc_ilu",&flg_ilu,NULL);
330: PetscOptionsGetBool(NULL,NULL,"-use_petsc_ch",&flg_ch,NULL);
331: if (flg || flg_ilu || flg_ch) {
332: Vec diag;
334: KSPSetType(ksp,KSPPREONLY);
335: KSPGetPC(ksp,&pc);
336: if (flg) {
337: PCSetType(pc,PCLU);
338: } else if (flg_ilu) {
339: PCSetType(pc,PCILU);
340: } else if (flg_ch) {
341: PCSetType(pc,PCCHOLESKY);
342: }
343: PCFactorSetMatSolverType(pc,MATSOLVERPETSC);
344: PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
345: PCFactorGetMatrix(pc,&F);
347: /* Test MatGetDiagonal() */
348: KSPSetUp(ksp);
349: VecDuplicate(x,&diag);
350: MatGetDiagonal(F,diag);
351: /* VecView(diag,PETSC_VIEWER_STDOUT_WORLD); */
352: VecDestroy(&diag);
353: }
355: KSPSetFromOptions(ksp);
357: /* Get info from matrix factors */
358: KSPSetUp(ksp);
360: #if defined(PETSC_HAVE_MUMPS)
361: if (flg_mumps || flg_mumps_ch) {
362: PetscInt icntl,infog34;
363: PetscReal cntl,rinfo12,rinfo13;
364: icntl = 3;
365: MatMumpsGetCntl(F,icntl,&cntl);
367: /* compute determinant */
368: if (rank == 0) {
369: MatMumpsGetInfog(F,34,&infog34);
370: MatMumpsGetRinfog(F,12,&rinfo12);
371: MatMumpsGetRinfog(F,13,&rinfo13);
372: PetscPrintf(PETSC_COMM_SELF," Mumps row pivot threshold = %g\n",cntl);
373: PetscPrintf(PETSC_COMM_SELF," Mumps determinant = (%g, %g) * 2^%D \n",(double)rinfo12,(double)rinfo13,infog34);
374: }
375: }
376: #endif
378: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
379: Solve the linear system
380: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
381: KSPSolve(ksp,b,x);
383: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
384: Check solution and clean up
385: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
386: VecAXPY(x,-1.0,u);
387: VecNorm(x,NORM_2,&norm);
388: KSPGetIterationNumber(ksp,&its);
390: /*
391: Print convergence information. PetscPrintf() produces a single
392: print statement from all processes that share a communicator.
393: An alternative is PetscFPrintf(), which prints to a file.
394: */
395: if (norm < 1.e-12) {
396: PetscPrintf(PETSC_COMM_WORLD,"Norm of error < 1.e-12 iterations %D\n",its);
397: } else {
398: PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g iterations %D\n",(double)norm,its);
399: }
401: /*
402: Free work space. All PETSc objects should be destroyed when they
403: are no longer needed.
404: */
405: KSPDestroy(&ksp);
406: VecDestroy(&u)); PetscCall(VecDestroy(&x);
407: VecDestroy(&b)); PetscCall(MatDestroy(&A);
409: /*
410: Always call PetscFinalize() before exiting a program. This routine
411: - finalizes the PETSc libraries as well as MPI
412: - provides summary and diagnostic information if certain runtime
413: options are chosen (e.g., -log_view).
414: */
415: PetscFinalize();
416: return 0;
417: }
419: /*TEST
421: test:
422: args: -use_petsc_lu
423: output_file: output/ex52_2.out
425: test:
426: suffix: mumps
427: nsize: 3
428: requires: mumps
429: args: -use_mumps_lu
430: output_file: output/ex52_1.out
432: test:
433: suffix: mumps_2
434: nsize: 3
435: requires: mumps
436: args: -use_mumps_ch
437: output_file: output/ex52_1.out
439: test:
440: suffix: mumps_3
441: nsize: 3
442: requires: mumps
443: args: -use_mumps_ch -mat_type sbaij
444: output_file: output/ex52_1.out
446: test:
447: suffix: mumps_4
448: nsize: 3
449: requires: mumps !complex !single
450: args: -use_mumps_lu -m 50 -n 50 -use_mumps_lu -print_mumps_memory
451: output_file: output/ex52_4.out
453: test:
454: suffix: mumps_omp_2
455: nsize: 4
456: requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
457: args: -use_mumps_lu -mat_mumps_use_omp_threads 2
458: output_file: output/ex52_1.out
460: test:
461: suffix: mumps_omp_3
462: nsize: 4
463: requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
464: args: -use_mumps_ch -mat_mumps_use_omp_threads 3
465: # Ignore the warning since we are intentionally testing the imbalanced case
466: filter: grep -v "Warning: number of OpenMP threads"
467: output_file: output/ex52_1.out
469: test:
470: suffix: mumps_omp_4
471: nsize: 4
472: requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
473: # let petsc guess a proper number for threads
474: args: -use_mumps_ch -mat_type sbaij -mat_mumps_use_omp_threads
475: output_file: output/ex52_1.out
477: test:
478: suffix: strumpack
479: requires: strumpack
480: args: -use_strumpack_lu
481: output_file: output/ex52_3.out
483: test:
484: suffix: strumpack_2
485: nsize: 2
486: requires: strumpack
487: args: -use_strumpack_lu
488: output_file: output/ex52_3.out
490: test:
491: suffix: strumpack_ilu
492: requires: strumpack
493: args: -use_strumpack_ilu
494: output_file: output/ex52_3.out
496: test:
497: suffix: strumpack_ilu_2
498: nsize: 2
499: requires: strumpack
500: args: -use_strumpack_ilu
501: output_file: output/ex52_3.out
503: test:
504: suffix: superlu
505: requires: superlu superlu_dist
506: args: -use_superlu_lu
507: output_file: output/ex52_2.out
509: test:
510: suffix: superlu_dist
511: nsize: 2
512: requires: superlu superlu_dist
513: args: -use_superlu_lu
514: output_file: output/ex52_2.out
516: test:
517: suffix: superlu_ilu
518: requires: superlu superlu_dist
519: args: -use_superlu_ilu
520: output_file: output/ex52_2.out
522: TEST*/