Actual source code: axpy.c


  2: #include <petsc/private/matimpl.h>

  4: static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T)
  5: {
  6:   Mat            A,F;
  7:   PetscErrorCode (*f)(Mat,Mat*);

  9:   PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f);
 10:   if (f) {
 11:     MatTransposeGetMat(T,&A);
 12:     if (T == X) {
 13:       PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
 14:       MatTranspose(A,MAT_INITIAL_MATRIX,&F);
 15:       A = Y;
 16:     } else {
 17:       PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
 18:       MatTranspose(X,MAT_INITIAL_MATRIX,&F);
 19:     }
 20:   } else {
 21:     MatHermitianTransposeGetMat(T,&A);
 22:     if (T == X) {
 23:       PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
 24:       MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F);
 25:       A = Y;
 26:     } else {
 27:       PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
 28:       MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F);
 29:     }
 30:   }
 31:   MatAXPY(A,a,F,str);
 32:   MatDestroy(&F);
 33:   return 0;
 34: }

 36: /*@
 37:    MatAXPY - Computes Y = a*X + Y.

 39:    Logically  Collective on Mat

 41:    Input Parameters:
 42: +  a - the scalar multiplier
 43: .  X - the first matrix
 44: .  Y - the second matrix
 45: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)

 47:    Notes: No operation is performed when a is zero.

 49:    Level: intermediate

 51: .seealso: MatAYPX()
 52:  @*/
 53: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
 54: {
 55:   PetscInt       M1,M2,N1,N2;
 56:   PetscInt       m1,m2,n1,n2;
 57:   MatType        t1,t2;
 58:   PetscBool      sametype,transpose;

 64:   MatGetSize(X,&M1,&N1);
 65:   MatGetSize(Y,&M2,&N2);
 66:   MatGetLocalSize(X,&m1,&n1);
 67:   MatGetLocalSize(Y,&m2,&n2);
 72:   if (a == (PetscScalar)0.0) return 0;
 73:   if (Y == X) {
 74:     MatScale(Y,1.0 + a);
 75:     return 0;
 76:   }
 77:   MatGetType(X,&t1);
 78:   MatGetType(Y,&t2);
 79:   PetscStrcmp(t1,t2,&sametype);
 80:   PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
 81:   if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
 82:     (*Y->ops->axpy)(Y,a,X,str);
 83:   } else {
 84:     PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose);
 85:     if (transpose) {
 86:       MatTransposeAXPY_Private(Y,a,X,str,X);
 87:     } else {
 88:       PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose);
 89:       if (transpose) {
 90:         MatTransposeAXPY_Private(Y,a,X,str,Y);
 91:       } else {
 92:         MatAXPY_Basic(Y,a,X,str);
 93:       }
 94:     }
 95:   }
 96:   PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
 97:   return 0;
 98: }

100: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
101: {
102:   PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;

104:   /* look for any available faster alternative to the general preallocator */
105:   PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);
106:   if (preall) {
107:     (*preall)(Y,X,B);
108:   } else { /* Use MatPrellocator, assumes same row-col distribution */
109:     Mat      preallocator;
110:     PetscInt r,rstart,rend;
111:     PetscInt m,n,M,N;

113:     MatGetRowUpperTriangular(Y);
114:     MatGetRowUpperTriangular(X);
115:     MatGetSize(Y,&M,&N);
116:     MatGetLocalSize(Y,&m,&n);
117:     MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);
118:     MatSetType(preallocator,MATPREALLOCATOR);
119:     MatSetLayouts(preallocator,Y->rmap,Y->cmap);
120:     MatSetUp(preallocator);
121:     MatGetOwnershipRange(preallocator,&rstart,&rend);
122:     for (r = rstart; r < rend; ++r) {
123:       PetscInt          ncols;
124:       const PetscInt    *row;
125:       const PetscScalar *vals;

127:       MatGetRow(Y,r,&ncols,&row,&vals);
128:       MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
129:       MatRestoreRow(Y,r,&ncols,&row,&vals);
130:       MatGetRow(X,r,&ncols,&row,&vals);
131:       MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
132:       MatRestoreRow(X,r,&ncols,&row,&vals);
133:     }
134:     MatSetOption(preallocator,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
135:     MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
136:     MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
137:     MatRestoreRowUpperTriangular(Y);
138:     MatRestoreRowUpperTriangular(X);

140:     MatCreate(PetscObjectComm((PetscObject)Y),B);
141:     MatSetType(*B,((PetscObject)Y)->type_name);
142:     MatSetLayouts(*B,Y->rmap,Y->cmap);
143:     MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);
144:     MatDestroy(&preallocator);
145:   }
146:   return 0;
147: }

149: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
150: {
151:   PetscBool      isshell,isdense,isnest;

153:   MatIsShell(Y,&isshell);
154:   if (isshell) { /* MatShell has special support for AXPY */
155:     PetscErrorCode (*f)(Mat,PetscScalar,Mat,MatStructure);

157:     MatGetOperation(Y,MATOP_AXPY,(void (**)(void))&f);
158:     if (f) {
159:       (*f)(Y,a,X,str);
160:       return 0;
161:     }
162:   }
163:   /* no need to preallocate if Y is dense */
164:   PetscObjectBaseTypeCompareAny((PetscObject)Y,&isdense,MATSEQDENSE,MATMPIDENSE,"");
165:   if (isdense) {
166:     PetscObjectTypeCompare((PetscObject)X,MATNEST,&isnest);
167:     if (isnest) {
168:       MatAXPY_Dense_Nest(Y,a,X);
169:       return 0;
170:     }
171:     if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
172:   }
173:   if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
174:     PetscInt          i,start,end,j,ncols,m,n;
175:     const PetscInt    *row;
176:     PetscScalar       *val;
177:     const PetscScalar *vals;

179:     MatGetSize(X,&m,&n);
180:     MatGetOwnershipRange(X,&start,&end);
181:     MatGetRowUpperTriangular(X);
182:     if (a == 1.0) {
183:       for (i = start; i < end; i++) {
184:         MatGetRow(X,i,&ncols,&row,&vals);
185:         MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
186:         MatRestoreRow(X,i,&ncols,&row,&vals);
187:       }
188:     } else {
189:       PetscInt vs = 100;
190:       /* realloc if needed, as this function may be used in parallel */
191:       PetscMalloc1(vs,&val);
192:       for (i=start; i<end; i++) {
193:         MatGetRow(X,i,&ncols,&row,&vals);
194:         if (vs < ncols) {
195:           vs   = PetscMin(2*ncols,n);
196:           PetscRealloc(vs*sizeof(*val),&val);
197:         }
198:         for (j=0; j<ncols; j++) val[j] = a*vals[j];
199:         MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
200:         MatRestoreRow(X,i,&ncols,&row,&vals);
201:       }
202:       PetscFree(val);
203:     }
204:     MatRestoreRowUpperTriangular(X);
205:     MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
206:     MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
207:   } else {
208:     Mat B;

210:     MatAXPY_Basic_Preallocate(Y,X,&B);
211:     MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
212:     MatHeaderMerge(Y,&B);
213:   }
214:   return 0;
215: }

217: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
218: {
219:   PetscInt          i,start,end,j,ncols,m,n;
220:   const PetscInt    *row;
221:   PetscScalar       *val;
222:   const PetscScalar *vals;

224:   MatGetSize(X,&m,&n);
225:   MatGetOwnershipRange(X,&start,&end);
226:   MatGetRowUpperTriangular(Y);
227:   MatGetRowUpperTriangular(X);
228:   if (a == 1.0) {
229:     for (i = start; i < end; i++) {
230:       MatGetRow(Y,i,&ncols,&row,&vals);
231:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
232:       MatRestoreRow(Y,i,&ncols,&row,&vals);

234:       MatGetRow(X,i,&ncols,&row,&vals);
235:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
236:       MatRestoreRow(X,i,&ncols,&row,&vals);
237:     }
238:   } else {
239:     PetscInt vs = 100;
240:     /* realloc if needed, as this function may be used in parallel */
241:     PetscMalloc1(vs,&val);
242:     for (i=start; i<end; i++) {
243:       MatGetRow(Y,i,&ncols,&row,&vals);
244:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
245:       MatRestoreRow(Y,i,&ncols,&row,&vals);

247:       MatGetRow(X,i,&ncols,&row,&vals);
248:       if (vs < ncols) {
249:         vs   = PetscMin(2*ncols,n);
250:         PetscRealloc(vs*sizeof(*val),&val);
251:       }
252:       for (j=0; j<ncols; j++) val[j] = a*vals[j];
253:       MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
254:       MatRestoreRow(X,i,&ncols,&row,&vals);
255:     }
256:     PetscFree(val);
257:   }
258:   MatRestoreRowUpperTriangular(Y);
259:   MatRestoreRowUpperTriangular(X);
260:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
261:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
262:   return 0;
263: }

265: /*@
266:    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.

268:    Neighbor-wise Collective on Mat

270:    Input Parameters:
271: +  Y - the matrices
272: -  a - the PetscScalar

274:    Level: intermediate

276:    Notes:
277:     If Y is a rectangular matrix, the shift is done on the main diagonal Y_{ii} of the matrix (https://en.wikipedia.org/wiki/Main_diagonal)

279:     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
280:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
281:    entry). No operation is performed when a is zero.

283:    To form Y = Y + diag(V) use MatDiagonalSet()

285: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
286:  @*/
287: PetscErrorCode  MatShift(Mat Y,PetscScalar a)
288: {
292:   MatCheckPreallocated(Y,1);
293:   if (a == 0.0) return 0;

295:   if (Y->ops->shift) (*Y->ops->shift)(Y,a);
296:   else MatShift_Basic(Y,a);

298:   PetscObjectStateIncrease((PetscObject)Y);
299:   return 0;
300: }

302: PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
303: {
304:   PetscInt          i,start,end;
305:   const PetscScalar *v;

307:   MatGetOwnershipRange(Y,&start,&end);
308:   VecGetArrayRead(D,&v);
309:   for (i=start; i<end; i++) {
310:     MatSetValues(Y,1,&i,1,&i,v+i-start,is);
311:   }
312:   VecRestoreArrayRead(D,&v);
313:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
314:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
315:   return 0;
316: }

318: /*@
319:    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
320:    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
321:    INSERT_VALUES.

323:    Neighbor-wise Collective on Mat

325:    Input Parameters:
326: +  Y - the input matrix
327: .  D - the diagonal matrix, represented as a vector
328: -  i - INSERT_VALUES or ADD_VALUES

330:    Notes:
331:     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
332:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
333:    entry).

335:    Level: intermediate

337: .seealso: MatShift(), MatScale(), MatDiagonalScale()
338: @*/
339: PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
340: {
341:   PetscInt       matlocal,veclocal;

345:   MatGetLocalSize(Y,&matlocal,NULL);
346:   VecGetLocalSize(D,&veclocal);
348:   if (Y->ops->diagonalset) {
349:     (*Y->ops->diagonalset)(Y,D,is);
350:   } else {
351:     MatDiagonalSet_Default(Y,D,is);
352:   }
353:   PetscObjectStateIncrease((PetscObject)Y);
354:   return 0;
355: }

357: /*@
358:    MatAYPX - Computes Y = a*Y + X.

360:    Logically on Mat

362:    Input Parameters:
363: +  a - the PetscScalar multiplier
364: .  Y - the first matrix
365: .  X - the second matrix
366: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)

368:    Level: intermediate

370: .seealso: MatAXPY()
371:  @*/
372: PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
373: {
374:   MatScale(Y,a);
375:   MatAXPY(Y,1.0,X,str);
376:   return 0;
377: }

379: /*@
380:     MatComputeOperator - Computes the explicit matrix

382:     Collective on Mat

384:     Input Parameters:
385: +   inmat - the matrix
386: -   mattype - the matrix type for the explicit operator

388:     Output Parameter:
389: .   mat - the explicit  operator

391:     Notes:
392:     This computation is done by applying the operators to columns of the identity matrix.
393:     This routine is costly in general, and is recommended for use only with relatively small systems.
394:     Currently, this routine uses a dense matrix format if mattype == NULL.

396:     Level: advanced

398: @*/
399: PetscErrorCode  MatComputeOperator(Mat inmat,MatType mattype,Mat *mat)
400: {
403:   MatConvert_Shell(inmat,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
404:   return 0;
405: }

407: /*@
408:     MatComputeOperatorTranspose - Computes the explicit matrix representation of
409:         a give matrix that can apply MatMultTranspose()

411:     Collective on Mat

413:     Input Parameters:
414: +   inmat - the matrix
415: -   mattype - the matrix type for the explicit operator

417:     Output Parameter:
418: .   mat - the explicit  operator transposed

420:     Notes:
421:     This computation is done by applying the transpose of the operator to columns of the identity matrix.
422:     This routine is costly in general, and is recommended for use only with relatively small systems.
423:     Currently, this routine uses a dense matrix format if mattype == NULL.

425:     Level: advanced

427: @*/
428: PetscErrorCode  MatComputeOperatorTranspose(Mat inmat,MatType mattype,Mat *mat)
429: {
430:   Mat            A;

434:   MatCreateTranspose(inmat,&A);
435:   MatConvert_Shell(A,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
436:   MatDestroy(&A);
437:   return 0;
438: }

440: /*@
441:   MatChop - Set all values in the matrix less than the tolerance to zero

443:   Input Parameters:
444: + A   - The matrix
445: - tol - The zero tolerance

447:   Output Parameters:
448: . A - The chopped matrix

450:   Level: intermediate

452: .seealso: MatCreate(), MatZeroEntries()
453:  @*/
454: PetscErrorCode MatChop(Mat A, PetscReal tol)
455: {
456:   Mat            a;
457:   PetscScalar    *newVals;
458:   PetscInt       *newCols, rStart, rEnd, numRows, maxRows, r, colMax = 0;
459:   PetscBool      flg;

461:   PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, "");
462:   if (flg) {
463:     MatDenseGetLocalMatrix(A, &a);
464:     MatDenseGetLDA(a, &r);
465:     MatGetSize(a, &rStart, &rEnd);
466:     MatDenseGetArray(a, &newVals);
467:     for (; colMax < rEnd; ++colMax) {
468:       for (maxRows = 0; maxRows < rStart; ++maxRows) {
469:         newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) < tol ? 0.0 : newVals[maxRows + colMax * r];
470:       }
471:     }
472:     MatDenseRestoreArray(a, &newVals);
473:   } else {
474:     MatGetOwnershipRange(A, &rStart, &rEnd);
475:     MatGetRowUpperTriangular(A);
476:     for (r = rStart; r < rEnd; ++r) {
477:       PetscInt ncols;

479:       MatGetRow(A, r, &ncols, NULL, NULL);
480:       colMax = PetscMax(colMax, ncols);
481:       MatRestoreRow(A, r, &ncols, NULL, NULL);
482:     }
483:     numRows = rEnd - rStart;
484:     MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
485:     PetscMalloc2(colMax, &newCols, colMax, &newVals);
486:     MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg); /* cache user-defined value */
487:     MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
488:     /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd()             */
489:     /* that are potentially called many times depending on the distribution of A */
490:     for (r = rStart; r < rStart+maxRows; ++r) {
491:       const PetscScalar *vals;
492:       const PetscInt    *cols;
493:       PetscInt           ncols, newcols, c;

495:       if (r < rEnd) {
496:         MatGetRow(A, r, &ncols, &cols, &vals);
497:         for (c = 0; c < ncols; ++c) {
498:           newCols[c] = cols[c];
499:           newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
500:         }
501:         newcols = ncols;
502:         MatRestoreRow(A, r, &ncols, &cols, &vals);
503:         MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
504:       }
505:       MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
506:       MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
507:     }
508:     MatRestoreRowUpperTriangular(A);
509:     PetscFree2(newCols, newVals);
510:     MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg); /* reset option to its user-defined value */
511:   }
512:   return 0;
513: }