Actual source code: bddcscalingbasic.c

  1: #include <../src/ksp/pc/impls/bddc/bddc.h>
  2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  3: #include <petscblaslapack.h>
  4: #include <../src/mat/impls/dense/seq/dense.h>

  6: /* prototypes for deluxe functions */
  7: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
  8: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
  9: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
 10: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
 11: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);

 13: static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X)
 14: {
 15:   Mat_SeqDense      *mat = (Mat_SeqDense*)A->data;
 16:   const PetscScalar *b;
 17:   PetscScalar       *x;
 18:   PetscInt          n;
 19:   PetscBLASInt      nrhs,info,m;
 20:   PetscBool         flg;

 22:   PetscBLASIntCast(A->rmap->n,&m);
 23:   PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
 25:   PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);

 28:   MatGetSize(B,NULL,&n);
 29:   PetscBLASIntCast(n,&nrhs);
 30:   MatDenseGetArrayRead(B,&b);
 31:   MatDenseGetArray(X,&x);
 32:   PetscArraycpy(x,b,m*nrhs);
 33:   MatDenseRestoreArrayRead(B,&b);

 35:   if (A->factortype == MAT_FACTOR_LU) {
 36:     PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info));
 38:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only LU factor supported");

 40:   MatDenseRestoreArray(X,&x);
 41:   PetscLogFlops(nrhs*(2.0*m*m - m));
 42:   return 0;
 43: }

 45: static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
 46: {
 47:   PC_IS*         pcis = (PC_IS*)pc->data;
 48:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;

 50:   /* Apply partition of unity */
 51:   VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);
 52:   VecSet(global_vector,0.0);
 53:   VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);
 54:   VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);
 55:   return 0;
 56: }

 58: static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
 59: {
 60:   PC_IS*              pcis=(PC_IS*)pc->data;
 61:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
 62:   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;

 64:   VecSet(pcbddc->work_scaling,0.0);
 65:   VecSet(y,0.0);
 66:   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
 67:     PetscInt          i;
 68:     const PetscScalar *array_x,*array_D;
 69:     PetscScalar       *array;
 70:     VecGetArrayRead(x,&array_x);
 71:     VecGetArrayRead(pcis->D,&array_D);
 72:     VecGetArray(pcbddc->work_scaling,&array);
 73:     for (i=0;i<deluxe_ctx->n_simple;i++) {
 74:       array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]];
 75:     }
 76:     VecRestoreArray(pcbddc->work_scaling,&array);
 77:     VecRestoreArrayRead(pcis->D,&array_D);
 78:     VecRestoreArrayRead(x,&array_x);
 79:   }
 80:   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
 81:   if (deluxe_ctx->seq_mat) {
 82:     PetscInt i;
 83:     for (i=0;i<deluxe_ctx->seq_n;i++) {
 84:       if (deluxe_ctx->change) {
 85:         VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
 86:         VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
 87:         if (deluxe_ctx->change_with_qr) {
 88:           Mat change;

 90:           KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
 91:           MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
 92:         } else {
 93:           KSPSolve(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
 94:         }
 95:       } else {
 96:         VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
 97:         VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
 98:       }
 99:       MatMultTranspose(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
100:       if (deluxe_ctx->seq_mat_inv_sum[i]) {
101:         PetscScalar *x;

103:         VecGetArray(deluxe_ctx->seq_work2[i],&x);
104:         VecPlaceArray(deluxe_ctx->seq_work1[i],x);
105:         VecRestoreArray(deluxe_ctx->seq_work2[i],&x);
106:         MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
107:         VecResetArray(deluxe_ctx->seq_work1[i]);
108:       }
109:       if (deluxe_ctx->change) {
110:         Mat change;

112:         KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
113:         MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
114:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
115:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
116:       } else {
117:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
118:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
119:       }
120:     }
121:   }
122:   /* put local boundary part in global vector */
123:   VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);
124:   VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);
125:   return 0;
126: }

128: PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
129: {
130:   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;

136:   PetscUseMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));
137:   return 0;
138: }

140: static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
141: {
142:   PC_IS          *pcis = (PC_IS*)pc->data;

144:   VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);
145:   VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);
146:   /* Apply partition of unity */
147:   VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);
148:   return 0;
149: }

151: static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
152: {
153:   PC_IS*              pcis=(PC_IS*)pc->data;
154:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
155:   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;

157:   /* get local boundary part of global vector */
158:   VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);
159:   VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);
160:   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
161:     PetscInt          i;
162:     PetscScalar       *array_y;
163:     const PetscScalar *array_D;
164:     VecGetArray(y,&array_y);
165:     VecGetArrayRead(pcis->D,&array_D);
166:     for (i=0;i<deluxe_ctx->n_simple;i++) {
167:       array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
168:     }
169:     VecRestoreArrayRead(pcis->D,&array_D);
170:     VecRestoreArray(y,&array_y);
171:   }
172:   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
173:   if (deluxe_ctx->seq_mat) {
174:     PetscInt i;
175:     for (i=0;i<deluxe_ctx->seq_n;i++) {
176:       if (deluxe_ctx->change) {
177:         Mat change;

179:         VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
180:         VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
181:         KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
182:         MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
183:       } else {
184:         VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
185:         VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
186:       }
187:       if (deluxe_ctx->seq_mat_inv_sum[i]) {
188:         PetscScalar *x;

190:         VecGetArray(deluxe_ctx->seq_work1[i],&x);
191:         VecPlaceArray(deluxe_ctx->seq_work2[i],x);
192:         VecRestoreArray(deluxe_ctx->seq_work1[i],&x);
193:         MatSolve(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
194:         VecResetArray(deluxe_ctx->seq_work2[i]);
195:       }
196:       MatMult(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
197:       if (deluxe_ctx->change) {
198:         if (deluxe_ctx->change_with_qr) {
199:           Mat change;

201:           KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
202:           MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
203:         } else {
204:           KSPSolveTranspose(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
205:         }
206:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);
207:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);
208:       } else {
209:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);
210:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);
211:       }
212:     }
213:   }
214:   return 0;
215: }

217: PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
218: {
219:   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;

225:   PetscUseMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));
226:   return 0;
227: }

229: PetscErrorCode PCBDDCScalingSetUp(PC pc)
230: {
231:   PC_IS*         pcis=(PC_IS*)pc->data;
232:   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;

235:   PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0);
236:   /* create work vector for the operator */
237:   VecDestroy(&pcbddc->work_scaling);
238:   VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);
239:   /* always rebuild pcis->D */
240:   if (pcis->use_stiffness_scaling) {
241:     PetscScalar *a;
242:     PetscInt    i,n;

244:     MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);
245:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);
246:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);
247:     VecAbs(pcis->D);
248:     VecGetLocalSize(pcis->D,&n);
249:     VecGetArray(pcis->D,&a);
250:     for (i=0;i<n;i++) if (PetscAbsScalar(a[i])<PETSC_SMALL) a[i] = 1.0;
251:     VecRestoreArray(pcis->D,&a);
252:   }
253:   VecSet(pcis->vec1_global,0.0);
254:   VecScatterBegin(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
255:   VecScatterEnd(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
256:   VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
257:   VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
258:   VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);
259:   /* now setup */
260:   if (pcbddc->use_deluxe_scaling) {
261:     if (!pcbddc->deluxe_ctx) {
262:       PCBDDCScalingCreate_Deluxe(pc);
263:     }
264:     PCBDDCScalingSetUp_Deluxe(pc);
265:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);
266:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);
267:   } else {
268:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);
269:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);
270:   }
271:   PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0);

273:   /* test */
274:   if (pcbddc->dbg_flag) {
275:     Mat         B0_B = NULL;
276:     Vec         B0_Bv = NULL, B0_Bv2 = NULL;
277:     Vec         vec2_global;
278:     PetscViewer viewer = pcbddc->dbg_viewer;
279:     PetscReal   error;

281:     /* extension -> from local to parallel */
282:     VecSet(pcis->vec1_global,0.0);
283:     VecSetRandom(pcis->vec1_B,NULL);
284:     VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
285:     VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
286:     VecDuplicate(pcis->vec1_global,&vec2_global);
287:     VecCopy(pcis->vec1_global,vec2_global);
288:     VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
289:     VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
290:     if (pcbddc->benign_n) {
291:       IS is_dummy;

293:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
294:       MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
295:       ISDestroy(&is_dummy);
296:       MatCreateVecs(B0_B,NULL,&B0_Bv);
297:       VecDuplicate(B0_Bv,&B0_Bv2);
298:       MatMult(B0_B,pcis->vec1_B,B0_Bv);
299:     }
300:     PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);
301:     if (pcbddc->benign_saddle_point) {
302:       PetscReal errorl = 0.;
303:       VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
304:       VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
305:       if (pcbddc->benign_n) {
306:         MatMult(B0_B,pcis->vec1_B,B0_Bv2);
307:         VecAXPY(B0_Bv,-1.0,B0_Bv2);
308:         VecNorm(B0_Bv,NORM_INFINITY,&errorl);
309:       }
310:       MPI_Allreduce(&errorl,&error,1,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)pc));
311:       PetscViewerASCIIPrintf(viewer,"Error benign extension %1.14e\n",error);
312:     }
313:     VecAXPY(pcis->vec1_global,-1.0,vec2_global);
314:     VecNorm(pcis->vec1_global,NORM_INFINITY,&error);
315:     PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);
316:     VecDestroy(&vec2_global);

318:     /* restriction -> from parallel to local */
319:     VecSet(pcis->vec1_global,0.0);
320:     VecSetRandom(pcis->vec1_B,NULL);
321:     VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
322:     VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
323:     PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);
324:     VecScale(pcis->vec1_B,-1.0);
325:     VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
326:     VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
327:     VecNorm(pcis->vec1_global,NORM_INFINITY,&error);
328:     PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);
329:     MatDestroy(&B0_B);
330:     VecDestroy(&B0_Bv);
331:     VecDestroy(&B0_Bv2);
332:   }
333:   return 0;
334: }

336: PetscErrorCode PCBDDCScalingDestroy(PC pc)
337: {
338:   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;

340:   if (pcbddc->deluxe_ctx) {
341:     PCBDDCScalingDestroy_Deluxe(pc);
342:   }
343:   VecDestroy(&pcbddc->work_scaling);
344:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);
345:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);
346:   return 0;
347: }

349: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
350: {
351:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
352:   PCBDDCDeluxeScaling deluxe_ctx;

354:   PetscNew(&deluxe_ctx);
355:   pcbddc->deluxe_ctx = deluxe_ctx;
356:   return 0;
357: }

359: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
360: {
361:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;

363:   PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);
364:   PetscFree(pcbddc->deluxe_ctx);
365:   return 0;
366: }

368: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
369: {
370:   PetscInt       i;

372:   PetscFree(deluxe_ctx->idx_simple_B);
373:   deluxe_ctx->n_simple = 0;
374:   for (i=0;i<deluxe_ctx->seq_n;i++) {
375:     VecScatterDestroy(&deluxe_ctx->seq_scctx[i]);
376:     VecDestroy(&deluxe_ctx->seq_work1[i]);
377:     VecDestroy(&deluxe_ctx->seq_work2[i]);
378:     MatDestroy(&deluxe_ctx->seq_mat[i]);
379:     MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
380:   }
381:   PetscFree5(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2,deluxe_ctx->seq_mat,deluxe_ctx->seq_mat_inv_sum);
382:   PetscFree(deluxe_ctx->workspace);
383:   deluxe_ctx->seq_n = 0;
384:   return 0;
385: }

387: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
388: {
389:   PC_IS               *pcis=(PC_IS*)pc->data;
390:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
391:   PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
392:   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;

394:   /* reset data structures if the topology has changed */
395:   if (pcbddc->recompute_topography) {
396:     PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);
397:   }

399:   /* Compute data structures to solve sequential problems */
400:   PCBDDCScalingSetUp_Deluxe_Private(pc);

402:   /* diagonal scaling on interface dofs not contained in cc */
403:   if (sub_schurs->is_vertices || sub_schurs->is_dir) {
404:     PetscInt n_com,n_dir;
405:     n_com = 0;
406:     if (sub_schurs->is_vertices) {
407:       ISGetLocalSize(sub_schurs->is_vertices,&n_com);
408:     }
409:     n_dir = 0;
410:     if (sub_schurs->is_dir) {
411:       ISGetLocalSize(sub_schurs->is_dir,&n_dir);
412:     }
413:     if (!deluxe_ctx->n_simple) {
414:       deluxe_ctx->n_simple = n_dir + n_com;
415:       PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);
416:       if (sub_schurs->is_vertices) {
417:         PetscInt       nmap;
418:         const PetscInt *idxs;

420:         ISGetIndices(sub_schurs->is_vertices,&idxs);
421:         ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B);
423:         ISRestoreIndices(sub_schurs->is_vertices,&idxs);
424:       }
425:       if (sub_schurs->is_dir) {
426:         PetscInt       nmap;
427:         const PetscInt *idxs;

429:         ISGetIndices(sub_schurs->is_dir,&idxs);
430:         ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com);
432:         ISRestoreIndices(sub_schurs->is_dir,&idxs);
433:       }
434:       PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);
435:     } else {
437:     }
438:   } else {
439:     deluxe_ctx->n_simple = 0;
440:     deluxe_ctx->idx_simple_B = NULL;
441:   }
442:   return 0;
443: }

445: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
446: {
447:   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
448:   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
449:   PCBDDCSubSchurs        sub_schurs = pcbddc->sub_schurs;
450:   PetscScalar            *matdata,*matdata2;
451:   PetscInt               i,max_subset_size,cum,cum2;
452:   const PetscInt         *idxs;
453:   PetscBool              newsetup = PETSC_FALSE;

456:   if (!sub_schurs->n_subs) return 0;

458:   /* Allocate arrays for subproblems */
459:   if (!deluxe_ctx->seq_n) {
460:     deluxe_ctx->seq_n = sub_schurs->n_subs;
461:     PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);
462:     newsetup = PETSC_TRUE;

465:   /* the change of basis is just a reference to sub_schurs->change (if any) */
466:   deluxe_ctx->change         = sub_schurs->change;
467:   deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;

469:   /* Create objects for deluxe */
470:   max_subset_size = 0;
471:   for (i=0;i<sub_schurs->n_subs;i++) {
472:     PetscInt subset_size;
473:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
474:     max_subset_size = PetscMax(subset_size,max_subset_size);
475:   }
476:   if (newsetup) {
477:     PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);
478:   }
479:   cum = cum2 = 0;
480:   ISGetIndices(sub_schurs->is_Ej_all,&idxs);
481:   MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);
482:   MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);
483:   for (i=0;i<deluxe_ctx->seq_n;i++) {
484:     PetscInt     subset_size;

486:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
487:     if (newsetup) {
488:       IS  sub;
489:       /* work vectors */
490:       VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);
491:       VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);

493:       /* scatters */
494:       ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);
495:       VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);
496:       ISDestroy(&sub);
497:     }

499:     /* S_E_j */
500:     MatDestroy(&deluxe_ctx->seq_mat[i]);
501:     MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);

503:     /* \sum_k S^k_E_j */
504:     MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
505:     MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);
506:     MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_SPD,sub_schurs->is_posdef);
507:     MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_HERMITIAN,sub_schurs->is_hermitian);
508:     if (sub_schurs->is_hermitian) {
509:       MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);
510:     } else {
511:       MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);
512:     }
513:     if (pcbddc->deluxe_singlemat) {
514:       Mat X,Y;
515:       if (!sub_schurs->is_hermitian) {
516:         MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);
517:       } else {
518:         PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);
519:         X    = deluxe_ctx->seq_mat[i];
520:       }
521:       MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);
522:       if (!sub_schurs->is_hermitian) {
523:         PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);
524:       } else {
525:         MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);
526:       }

528:       MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
529:       MatDestroy(&deluxe_ctx->seq_mat[i]);
530:       MatDestroy(&X);
531:       if (deluxe_ctx->change) {
532:         Mat C,CY;
534:         KSPGetOperators(deluxe_ctx->change[i],&C,NULL);
535:         MatMatMult(C,Y,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&CY);
536:         MatMatTransposeMult(CY,C,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);
537:         MatDestroy(&CY);
538:         MatProductClear(Y); /* clear internal matproduct structure of Y since CY is destroyed */
539:       }
540:       MatTranspose(Y,MAT_INPLACE_MATRIX,&Y);
541:       deluxe_ctx->seq_mat[i] = Y;
542:     }
543:     cum += subset_size;
544:     cum2 += subset_size*subset_size;
545:   }
546:   ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);
547:   MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);
548:   MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);
549:   if (pcbddc->deluxe_singlemat) {
550:     deluxe_ctx->change         = NULL;
551:     deluxe_ctx->change_with_qr = PETSC_FALSE;
552:   }

554:   if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
555:     for (i=0;i<deluxe_ctx->seq_n;i++) {
556:       if (newsetup) {
557:         PC pc;

559:         KSPGetPC(deluxe_ctx->change[i],&pc);
560:         PCSetType(pc,PCLU);
561:         KSPSetFromOptions(deluxe_ctx->change[i]);
562:       }
563:       KSPSetUp(deluxe_ctx->change[i]);
564:     }
565:   }
566:   return 0;
567: }