Actual source code: bddcscalingbasic.c
1: #include <../src/ksp/pc/impls/bddc/bddc.h>
2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3: #include <petscblaslapack.h>
4: #include <../src/mat/impls/dense/seq/dense.h>
6: /* prototypes for deluxe functions */
7: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
8: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
9: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
10: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
11: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);
13: static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X)
14: {
15: Mat_SeqDense *mat = (Mat_SeqDense*)A->data;
16: const PetscScalar *b;
17: PetscScalar *x;
18: PetscInt n;
19: PetscBLASInt nrhs,info,m;
20: PetscBool flg;
22: PetscBLASIntCast(A->rmap->n,&m);
23: PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
25: PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
28: MatGetSize(B,NULL,&n);
29: PetscBLASIntCast(n,&nrhs);
30: MatDenseGetArrayRead(B,&b);
31: MatDenseGetArray(X,&x);
32: PetscArraycpy(x,b,m*nrhs);
33: MatDenseRestoreArrayRead(B,&b);
35: if (A->factortype == MAT_FACTOR_LU) {
36: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info));
38: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only LU factor supported");
40: MatDenseRestoreArray(X,&x);
41: PetscLogFlops(nrhs*(2.0*m*m - m));
42: return 0;
43: }
45: static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
46: {
47: PC_IS* pcis = (PC_IS*)pc->data;
48: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
50: /* Apply partition of unity */
51: VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);
52: VecSet(global_vector,0.0);
53: VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);
54: VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);
55: return 0;
56: }
58: static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
59: {
60: PC_IS* pcis=(PC_IS*)pc->data;
61: PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
62: PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
64: VecSet(pcbddc->work_scaling,0.0);
65: VecSet(y,0.0);
66: if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
67: PetscInt i;
68: const PetscScalar *array_x,*array_D;
69: PetscScalar *array;
70: VecGetArrayRead(x,&array_x);
71: VecGetArrayRead(pcis->D,&array_D);
72: VecGetArray(pcbddc->work_scaling,&array);
73: for (i=0;i<deluxe_ctx->n_simple;i++) {
74: array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]];
75: }
76: VecRestoreArray(pcbddc->work_scaling,&array);
77: VecRestoreArrayRead(pcis->D,&array_D);
78: VecRestoreArrayRead(x,&array_x);
79: }
80: /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
81: if (deluxe_ctx->seq_mat) {
82: PetscInt i;
83: for (i=0;i<deluxe_ctx->seq_n;i++) {
84: if (deluxe_ctx->change) {
85: VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
86: VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
87: if (deluxe_ctx->change_with_qr) {
88: Mat change;
90: KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
91: MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
92: } else {
93: KSPSolve(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
94: }
95: } else {
96: VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
97: VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
98: }
99: MatMultTranspose(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
100: if (deluxe_ctx->seq_mat_inv_sum[i]) {
101: PetscScalar *x;
103: VecGetArray(deluxe_ctx->seq_work2[i],&x);
104: VecPlaceArray(deluxe_ctx->seq_work1[i],x);
105: VecRestoreArray(deluxe_ctx->seq_work2[i],&x);
106: MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
107: VecResetArray(deluxe_ctx->seq_work1[i]);
108: }
109: if (deluxe_ctx->change) {
110: Mat change;
112: KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
113: MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
114: VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
115: VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
116: } else {
117: VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
118: VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
119: }
120: }
121: }
122: /* put local boundary part in global vector */
123: VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);
124: VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);
125: return 0;
126: }
128: PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
129: {
130: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
136: PetscUseMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));
137: return 0;
138: }
140: static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
141: {
142: PC_IS *pcis = (PC_IS*)pc->data;
144: VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);
145: VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);
146: /* Apply partition of unity */
147: VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);
148: return 0;
149: }
151: static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
152: {
153: PC_IS* pcis=(PC_IS*)pc->data;
154: PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
155: PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
157: /* get local boundary part of global vector */
158: VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);
159: VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);
160: if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
161: PetscInt i;
162: PetscScalar *array_y;
163: const PetscScalar *array_D;
164: VecGetArray(y,&array_y);
165: VecGetArrayRead(pcis->D,&array_D);
166: for (i=0;i<deluxe_ctx->n_simple;i++) {
167: array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
168: }
169: VecRestoreArrayRead(pcis->D,&array_D);
170: VecRestoreArray(y,&array_y);
171: }
172: /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
173: if (deluxe_ctx->seq_mat) {
174: PetscInt i;
175: for (i=0;i<deluxe_ctx->seq_n;i++) {
176: if (deluxe_ctx->change) {
177: Mat change;
179: VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
180: VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
181: KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
182: MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
183: } else {
184: VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
185: VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
186: }
187: if (deluxe_ctx->seq_mat_inv_sum[i]) {
188: PetscScalar *x;
190: VecGetArray(deluxe_ctx->seq_work1[i],&x);
191: VecPlaceArray(deluxe_ctx->seq_work2[i],x);
192: VecRestoreArray(deluxe_ctx->seq_work1[i],&x);
193: MatSolve(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
194: VecResetArray(deluxe_ctx->seq_work2[i]);
195: }
196: MatMult(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
197: if (deluxe_ctx->change) {
198: if (deluxe_ctx->change_with_qr) {
199: Mat change;
201: KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
202: MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
203: } else {
204: KSPSolveTranspose(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
205: }
206: VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);
207: VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);
208: } else {
209: VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);
210: VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);
211: }
212: }
213: }
214: return 0;
215: }
217: PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
218: {
219: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
225: PetscUseMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));
226: return 0;
227: }
229: PetscErrorCode PCBDDCScalingSetUp(PC pc)
230: {
231: PC_IS* pcis=(PC_IS*)pc->data;
232: PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
235: PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0);
236: /* create work vector for the operator */
237: VecDestroy(&pcbddc->work_scaling);
238: VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);
239: /* always rebuild pcis->D */
240: if (pcis->use_stiffness_scaling) {
241: PetscScalar *a;
242: PetscInt i,n;
244: MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);
245: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);
246: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);
247: VecAbs(pcis->D);
248: VecGetLocalSize(pcis->D,&n);
249: VecGetArray(pcis->D,&a);
250: for (i=0;i<n;i++) if (PetscAbsScalar(a[i])<PETSC_SMALL) a[i] = 1.0;
251: VecRestoreArray(pcis->D,&a);
252: }
253: VecSet(pcis->vec1_global,0.0);
254: VecScatterBegin(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
255: VecScatterEnd(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
256: VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
257: VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
258: VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);
259: /* now setup */
260: if (pcbddc->use_deluxe_scaling) {
261: if (!pcbddc->deluxe_ctx) {
262: PCBDDCScalingCreate_Deluxe(pc);
263: }
264: PCBDDCScalingSetUp_Deluxe(pc);
265: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);
266: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);
267: } else {
268: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);
269: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);
270: }
271: PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0);
273: /* test */
274: if (pcbddc->dbg_flag) {
275: Mat B0_B = NULL;
276: Vec B0_Bv = NULL, B0_Bv2 = NULL;
277: Vec vec2_global;
278: PetscViewer viewer = pcbddc->dbg_viewer;
279: PetscReal error;
281: /* extension -> from local to parallel */
282: VecSet(pcis->vec1_global,0.0);
283: VecSetRandom(pcis->vec1_B,NULL);
284: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
285: VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
286: VecDuplicate(pcis->vec1_global,&vec2_global);
287: VecCopy(pcis->vec1_global,vec2_global);
288: VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
289: VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
290: if (pcbddc->benign_n) {
291: IS is_dummy;
293: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
294: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
295: ISDestroy(&is_dummy);
296: MatCreateVecs(B0_B,NULL,&B0_Bv);
297: VecDuplicate(B0_Bv,&B0_Bv2);
298: MatMult(B0_B,pcis->vec1_B,B0_Bv);
299: }
300: PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);
301: if (pcbddc->benign_saddle_point) {
302: PetscReal errorl = 0.;
303: VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
304: VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
305: if (pcbddc->benign_n) {
306: MatMult(B0_B,pcis->vec1_B,B0_Bv2);
307: VecAXPY(B0_Bv,-1.0,B0_Bv2);
308: VecNorm(B0_Bv,NORM_INFINITY,&errorl);
309: }
310: MPI_Allreduce(&errorl,&error,1,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)pc));
311: PetscViewerASCIIPrintf(viewer,"Error benign extension %1.14e\n",error);
312: }
313: VecAXPY(pcis->vec1_global,-1.0,vec2_global);
314: VecNorm(pcis->vec1_global,NORM_INFINITY,&error);
315: PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);
316: VecDestroy(&vec2_global);
318: /* restriction -> from parallel to local */
319: VecSet(pcis->vec1_global,0.0);
320: VecSetRandom(pcis->vec1_B,NULL);
321: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
322: VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
323: PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);
324: VecScale(pcis->vec1_B,-1.0);
325: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
326: VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
327: VecNorm(pcis->vec1_global,NORM_INFINITY,&error);
328: PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);
329: MatDestroy(&B0_B);
330: VecDestroy(&B0_Bv);
331: VecDestroy(&B0_Bv2);
332: }
333: return 0;
334: }
336: PetscErrorCode PCBDDCScalingDestroy(PC pc)
337: {
338: PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
340: if (pcbddc->deluxe_ctx) {
341: PCBDDCScalingDestroy_Deluxe(pc);
342: }
343: VecDestroy(&pcbddc->work_scaling);
344: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);
345: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);
346: return 0;
347: }
349: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
350: {
351: PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
352: PCBDDCDeluxeScaling deluxe_ctx;
354: PetscNew(&deluxe_ctx);
355: pcbddc->deluxe_ctx = deluxe_ctx;
356: return 0;
357: }
359: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
360: {
361: PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
363: PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);
364: PetscFree(pcbddc->deluxe_ctx);
365: return 0;
366: }
368: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
369: {
370: PetscInt i;
372: PetscFree(deluxe_ctx->idx_simple_B);
373: deluxe_ctx->n_simple = 0;
374: for (i=0;i<deluxe_ctx->seq_n;i++) {
375: VecScatterDestroy(&deluxe_ctx->seq_scctx[i]);
376: VecDestroy(&deluxe_ctx->seq_work1[i]);
377: VecDestroy(&deluxe_ctx->seq_work2[i]);
378: MatDestroy(&deluxe_ctx->seq_mat[i]);
379: MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
380: }
381: PetscFree5(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2,deluxe_ctx->seq_mat,deluxe_ctx->seq_mat_inv_sum);
382: PetscFree(deluxe_ctx->workspace);
383: deluxe_ctx->seq_n = 0;
384: return 0;
385: }
387: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
388: {
389: PC_IS *pcis=(PC_IS*)pc->data;
390: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
391: PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
392: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
394: /* reset data structures if the topology has changed */
395: if (pcbddc->recompute_topography) {
396: PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);
397: }
399: /* Compute data structures to solve sequential problems */
400: PCBDDCScalingSetUp_Deluxe_Private(pc);
402: /* diagonal scaling on interface dofs not contained in cc */
403: if (sub_schurs->is_vertices || sub_schurs->is_dir) {
404: PetscInt n_com,n_dir;
405: n_com = 0;
406: if (sub_schurs->is_vertices) {
407: ISGetLocalSize(sub_schurs->is_vertices,&n_com);
408: }
409: n_dir = 0;
410: if (sub_schurs->is_dir) {
411: ISGetLocalSize(sub_schurs->is_dir,&n_dir);
412: }
413: if (!deluxe_ctx->n_simple) {
414: deluxe_ctx->n_simple = n_dir + n_com;
415: PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);
416: if (sub_schurs->is_vertices) {
417: PetscInt nmap;
418: const PetscInt *idxs;
420: ISGetIndices(sub_schurs->is_vertices,&idxs);
421: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B);
423: ISRestoreIndices(sub_schurs->is_vertices,&idxs);
424: }
425: if (sub_schurs->is_dir) {
426: PetscInt nmap;
427: const PetscInt *idxs;
429: ISGetIndices(sub_schurs->is_dir,&idxs);
430: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com);
432: ISRestoreIndices(sub_schurs->is_dir,&idxs);
433: }
434: PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);
435: } else {
437: }
438: } else {
439: deluxe_ctx->n_simple = 0;
440: deluxe_ctx->idx_simple_B = NULL;
441: }
442: return 0;
443: }
445: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
446: {
447: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
448: PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
449: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
450: PetscScalar *matdata,*matdata2;
451: PetscInt i,max_subset_size,cum,cum2;
452: const PetscInt *idxs;
453: PetscBool newsetup = PETSC_FALSE;
456: if (!sub_schurs->n_subs) return 0;
458: /* Allocate arrays for subproblems */
459: if (!deluxe_ctx->seq_n) {
460: deluxe_ctx->seq_n = sub_schurs->n_subs;
461: PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);
462: newsetup = PETSC_TRUE;
465: /* the change of basis is just a reference to sub_schurs->change (if any) */
466: deluxe_ctx->change = sub_schurs->change;
467: deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;
469: /* Create objects for deluxe */
470: max_subset_size = 0;
471: for (i=0;i<sub_schurs->n_subs;i++) {
472: PetscInt subset_size;
473: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
474: max_subset_size = PetscMax(subset_size,max_subset_size);
475: }
476: if (newsetup) {
477: PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);
478: }
479: cum = cum2 = 0;
480: ISGetIndices(sub_schurs->is_Ej_all,&idxs);
481: MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);
482: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);
483: for (i=0;i<deluxe_ctx->seq_n;i++) {
484: PetscInt subset_size;
486: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
487: if (newsetup) {
488: IS sub;
489: /* work vectors */
490: VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);
491: VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);
493: /* scatters */
494: ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);
495: VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);
496: ISDestroy(&sub);
497: }
499: /* S_E_j */
500: MatDestroy(&deluxe_ctx->seq_mat[i]);
501: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);
503: /* \sum_k S^k_E_j */
504: MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
505: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);
506: MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_SPD,sub_schurs->is_posdef);
507: MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_HERMITIAN,sub_schurs->is_hermitian);
508: if (sub_schurs->is_hermitian) {
509: MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);
510: } else {
511: MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);
512: }
513: if (pcbddc->deluxe_singlemat) {
514: Mat X,Y;
515: if (!sub_schurs->is_hermitian) {
516: MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);
517: } else {
518: PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);
519: X = deluxe_ctx->seq_mat[i];
520: }
521: MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);
522: if (!sub_schurs->is_hermitian) {
523: PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);
524: } else {
525: MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);
526: }
528: MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
529: MatDestroy(&deluxe_ctx->seq_mat[i]);
530: MatDestroy(&X);
531: if (deluxe_ctx->change) {
532: Mat C,CY;
534: KSPGetOperators(deluxe_ctx->change[i],&C,NULL);
535: MatMatMult(C,Y,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&CY);
536: MatMatTransposeMult(CY,C,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);
537: MatDestroy(&CY);
538: MatProductClear(Y); /* clear internal matproduct structure of Y since CY is destroyed */
539: }
540: MatTranspose(Y,MAT_INPLACE_MATRIX,&Y);
541: deluxe_ctx->seq_mat[i] = Y;
542: }
543: cum += subset_size;
544: cum2 += subset_size*subset_size;
545: }
546: ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);
547: MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);
548: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);
549: if (pcbddc->deluxe_singlemat) {
550: deluxe_ctx->change = NULL;
551: deluxe_ctx->change_with_qr = PETSC_FALSE;
552: }
554: if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
555: for (i=0;i<deluxe_ctx->seq_n;i++) {
556: if (newsetup) {
557: PC pc;
559: KSPGetPC(deluxe_ctx->change[i],&pc);
560: PCSetType(pc,PCLU);
561: KSPSetFromOptions(deluxe_ctx->change[i]);
562: }
563: KSPSetUp(deluxe_ctx->change[i]);
564: }
565: }
566: return 0;
567: }