1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.math.optimization.general;
18
19 import java.util.Arrays;
20
21 import org.apache.commons.math.FunctionEvaluationException;
22 import org.apache.commons.math.optimization.OptimizationException;
23 import org.apache.commons.math.optimization.VectorialPointValuePair;
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102 public class LevenbergMarquardtOptimizer extends AbstractLeastSquaresOptimizer {
103
104
105 private int solvedCols;
106
107
108 private double[] diagR;
109
110
111 private double[] jacNorm;
112
113
114 private double[] beta;
115
116
117 private int[] permutation;
118
119
120 private int rank;
121
122
123 private double lmPar;
124
125
126 private double[] lmDir;
127
128
129 private double initialStepBoundFactor;
130
131
132 private double costRelativeTolerance;
133
134
135 private double parRelativeTolerance;
136
137
138
139 private double orthoTolerance;
140
141
142
143
144
145
146
147
148
149
150
151
152
153 public LevenbergMarquardtOptimizer() {
154
155
156 setMaxIterations(1000);
157
158
159 setInitialStepBoundFactor(100.0);
160 setCostRelativeTolerance(1.0e-10);
161 setParRelativeTolerance(1.0e-10);
162 setOrthoTolerance(1.0e-10);
163
164 }
165
166
167
168
169
170
171
172
173
174
175 public void setInitialStepBoundFactor(double initialStepBoundFactor) {
176 this.initialStepBoundFactor = initialStepBoundFactor;
177 }
178
179
180
181
182
183
184 public void setCostRelativeTolerance(double costRelativeTolerance) {
185 this.costRelativeTolerance = costRelativeTolerance;
186 }
187
188
189
190
191
192
193
194 public void setParRelativeTolerance(double parRelativeTolerance) {
195 this.parRelativeTolerance = parRelativeTolerance;
196 }
197
198
199
200
201
202
203
204 public void setOrthoTolerance(double orthoTolerance) {
205 this.orthoTolerance = orthoTolerance;
206 }
207
208
209 @Override
210 protected VectorialPointValuePair doOptimize()
211 throws FunctionEvaluationException, OptimizationException, IllegalArgumentException {
212
213
214 solvedCols = Math.min(rows, cols);
215 diagR = new double[cols];
216 jacNorm = new double[cols];
217 beta = new double[cols];
218 permutation = new int[cols];
219 lmDir = new double[cols];
220
221
222 double delta = 0, xNorm = 0;
223 double[] diag = new double[cols];
224 double[] oldX = new double[cols];
225 double[] oldRes = new double[rows];
226 double[] work1 = new double[cols];
227 double[] work2 = new double[cols];
228 double[] work3 = new double[cols];
229
230
231 updateResidualsAndCost();
232
233
234 lmPar = 0;
235 boolean firstIteration = true;
236 while (true) {
237
238 incrementIterationsCounter();
239
240
241 updateJacobian();
242 qrDecomposition();
243
244
245 qTy(residuals);
246
247
248
249 for (int k = 0; k < solvedCols; ++k) {
250 int pk = permutation[k];
251 jacobian[k][pk] = diagR[pk];
252 }
253
254 if (firstIteration) {
255
256
257
258 xNorm = 0;
259 for (int k = 0; k < cols; ++k) {
260 double dk = jacNorm[k];
261 if (dk == 0) {
262 dk = 1.0;
263 }
264 double xk = dk * point[k];
265 xNorm += xk * xk;
266 diag[k] = dk;
267 }
268 xNorm = Math.sqrt(xNorm);
269
270
271 delta = (xNorm == 0) ? initialStepBoundFactor : (initialStepBoundFactor * xNorm);
272
273 }
274
275
276 double maxCosine = 0;
277 if (cost != 0) {
278 for (int j = 0; j < solvedCols; ++j) {
279 int pj = permutation[j];
280 double s = jacNorm[pj];
281 if (s != 0) {
282 double sum = 0;
283 for (int i = 0; i <= j; ++i) {
284 sum += jacobian[i][pj] * residuals[i];
285 }
286 maxCosine = Math.max(maxCosine, Math.abs(sum) / (s * cost));
287 }
288 }
289 }
290 if (maxCosine <= orthoTolerance) {
291
292 return new VectorialPointValuePair(point, objective);
293 }
294
295
296 for (int j = 0; j < cols; ++j) {
297 diag[j] = Math.max(diag[j], jacNorm[j]);
298 }
299
300
301 for (double ratio = 0; ratio < 1.0e-4;) {
302
303
304 for (int j = 0; j < solvedCols; ++j) {
305 int pj = permutation[j];
306 oldX[pj] = point[pj];
307 }
308 double previousCost = cost;
309 double[] tmpVec = residuals;
310 residuals = oldRes;
311 oldRes = tmpVec;
312
313
314 determineLMParameter(oldRes, delta, diag, work1, work2, work3);
315
316
317 double lmNorm = 0;
318 for (int j = 0; j < solvedCols; ++j) {
319 int pj = permutation[j];
320 lmDir[pj] = -lmDir[pj];
321 point[pj] = oldX[pj] + lmDir[pj];
322 double s = diag[pj] * lmDir[pj];
323 lmNorm += s * s;
324 }
325 lmNorm = Math.sqrt(lmNorm);
326
327
328 if (firstIteration) {
329 delta = Math.min(delta, lmNorm);
330 }
331
332
333 updateResidualsAndCost();
334
335
336 double actRed = -1.0;
337 if (0.1 * cost < previousCost) {
338 double r = cost / previousCost;
339 actRed = 1.0 - r * r;
340 }
341
342
343
344 for (int j = 0; j < solvedCols; ++j) {
345 int pj = permutation[j];
346 double dirJ = lmDir[pj];
347 work1[j] = 0;
348 for (int i = 0; i <= j; ++i) {
349 work1[i] += jacobian[i][pj] * dirJ;
350 }
351 }
352 double coeff1 = 0;
353 for (int j = 0; j < solvedCols; ++j) {
354 coeff1 += work1[j] * work1[j];
355 }
356 double pc2 = previousCost * previousCost;
357 coeff1 = coeff1 / pc2;
358 double coeff2 = lmPar * lmNorm * lmNorm / pc2;
359 double preRed = coeff1 + 2 * coeff2;
360 double dirDer = -(coeff1 + coeff2);
361
362
363 ratio = (preRed == 0) ? 0 : (actRed / preRed);
364
365
366 if (ratio <= 0.25) {
367 double tmp =
368 (actRed < 0) ? (0.5 * dirDer / (dirDer + 0.5 * actRed)) : 0.5;
369 if ((0.1 * cost >= previousCost) || (tmp < 0.1)) {
370 tmp = 0.1;
371 }
372 delta = tmp * Math.min(delta, 10.0 * lmNorm);
373 lmPar /= tmp;
374 } else if ((lmPar == 0) || (ratio >= 0.75)) {
375 delta = 2 * lmNorm;
376 lmPar *= 0.5;
377 }
378
379
380 if (ratio >= 1.0e-4) {
381
382 firstIteration = false;
383 xNorm = 0;
384 for (int k = 0; k < cols; ++k) {
385 double xK = diag[k] * point[k];
386 xNorm += xK * xK;
387 }
388 xNorm = Math.sqrt(xNorm);
389 } else {
390
391 cost = previousCost;
392 for (int j = 0; j < solvedCols; ++j) {
393 int pj = permutation[j];
394 point[pj] = oldX[pj];
395 }
396 tmpVec = residuals;
397 residuals = oldRes;
398 oldRes = tmpVec;
399 }
400
401
402 if (((Math.abs(actRed) <= costRelativeTolerance) &&
403 (preRed <= costRelativeTolerance) &&
404 (ratio <= 2.0)) ||
405 (delta <= parRelativeTolerance * xNorm)) {
406 return new VectorialPointValuePair(point, objective);
407 }
408
409
410
411 if ((Math.abs(actRed) <= 2.2204e-16) && (preRed <= 2.2204e-16) && (ratio <= 2.0)) {
412 throw new OptimizationException("cost relative tolerance is too small ({0})," +
413 " no further reduction in the" +
414 " sum of squares is possible",
415 costRelativeTolerance);
416 } else if (delta <= 2.2204e-16 * xNorm) {
417 throw new OptimizationException("parameters relative tolerance is too small" +
418 " ({0}), no further improvement in" +
419 " the approximate solution is possible",
420 parRelativeTolerance);
421 } else if (maxCosine <= 2.2204e-16) {
422 throw new OptimizationException("orthogonality tolerance is too small ({0})," +
423 " solution is orthogonal to the jacobian",
424 orthoTolerance);
425 }
426
427 }
428
429 }
430
431 }
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455 private void determineLMParameter(double[] qy, double delta, double[] diag,
456 double[] work1, double[] work2, double[] work3) {
457
458
459
460 for (int j = 0; j < rank; ++j) {
461 lmDir[permutation[j]] = qy[j];
462 }
463 for (int j = rank; j < cols; ++j) {
464 lmDir[permutation[j]] = 0;
465 }
466 for (int k = rank - 1; k >= 0; --k) {
467 int pk = permutation[k];
468 double ypk = lmDir[pk] / diagR[pk];
469 for (int i = 0; i < k; ++i) {
470 lmDir[permutation[i]] -= ypk * jacobian[i][pk];
471 }
472 lmDir[pk] = ypk;
473 }
474
475
476
477 double dxNorm = 0;
478 for (int j = 0; j < solvedCols; ++j) {
479 int pj = permutation[j];
480 double s = diag[pj] * lmDir[pj];
481 work1[pj] = s;
482 dxNorm += s * s;
483 }
484 dxNorm = Math.sqrt(dxNorm);
485 double fp = dxNorm - delta;
486 if (fp <= 0.1 * delta) {
487 lmPar = 0;
488 return;
489 }
490
491
492
493
494 double sum2, parl = 0;
495 if (rank == solvedCols) {
496 for (int j = 0; j < solvedCols; ++j) {
497 int pj = permutation[j];
498 work1[pj] *= diag[pj] / dxNorm;
499 }
500 sum2 = 0;
501 for (int j = 0; j < solvedCols; ++j) {
502 int pj = permutation[j];
503 double sum = 0;
504 for (int i = 0; i < j; ++i) {
505 sum += jacobian[i][pj] * work1[permutation[i]];
506 }
507 double s = (work1[pj] - sum) / diagR[pj];
508 work1[pj] = s;
509 sum2 += s * s;
510 }
511 parl = fp / (delta * sum2);
512 }
513
514
515 sum2 = 0;
516 for (int j = 0; j < solvedCols; ++j) {
517 int pj = permutation[j];
518 double sum = 0;
519 for (int i = 0; i <= j; ++i) {
520 sum += jacobian[i][pj] * qy[i];
521 }
522 sum /= diag[pj];
523 sum2 += sum * sum;
524 }
525 double gNorm = Math.sqrt(sum2);
526 double paru = gNorm / delta;
527 if (paru == 0) {
528
529 paru = 2.2251e-308 / Math.min(delta, 0.1);
530 }
531
532
533
534 lmPar = Math.min(paru, Math.max(lmPar, parl));
535 if (lmPar == 0) {
536 lmPar = gNorm / dxNorm;
537 }
538
539 for (int countdown = 10; countdown >= 0; --countdown) {
540
541
542 if (lmPar == 0) {
543 lmPar = Math.max(2.2251e-308, 0.001 * paru);
544 }
545 double sPar = Math.sqrt(lmPar);
546 for (int j = 0; j < solvedCols; ++j) {
547 int pj = permutation[j];
548 work1[pj] = sPar * diag[pj];
549 }
550 determineLMDirection(qy, work1, work2, work3);
551
552 dxNorm = 0;
553 for (int j = 0; j < solvedCols; ++j) {
554 int pj = permutation[j];
555 double s = diag[pj] * lmDir[pj];
556 work3[pj] = s;
557 dxNorm += s * s;
558 }
559 dxNorm = Math.sqrt(dxNorm);
560 double previousFP = fp;
561 fp = dxNorm - delta;
562
563
564
565 if ((Math.abs(fp) <= 0.1 * delta) ||
566 ((parl == 0) && (fp <= previousFP) && (previousFP < 0))) {
567 return;
568 }
569
570
571 for (int j = 0; j < solvedCols; ++j) {
572 int pj = permutation[j];
573 work1[pj] = work3[pj] * diag[pj] / dxNorm;
574 }
575 for (int j = 0; j < solvedCols; ++j) {
576 int pj = permutation[j];
577 work1[pj] /= work2[j];
578 double tmp = work1[pj];
579 for (int i = j + 1; i < solvedCols; ++i) {
580 work1[permutation[i]] -= jacobian[i][pj] * tmp;
581 }
582 }
583 sum2 = 0;
584 for (int j = 0; j < solvedCols; ++j) {
585 double s = work1[permutation[j]];
586 sum2 += s * s;
587 }
588 double correction = fp / (delta * sum2);
589
590
591 if (fp > 0) {
592 parl = Math.max(parl, lmPar);
593 } else if (fp < 0) {
594 paru = Math.min(paru, lmPar);
595 }
596
597
598 lmPar = Math.max(parl, lmPar + correction);
599
600 }
601 }
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623 private void determineLMDirection(double[] qy, double[] diag,
624 double[] lmDiag, double[] work) {
625
626
627
628 for (int j = 0; j < solvedCols; ++j) {
629 int pj = permutation[j];
630 for (int i = j + 1; i < solvedCols; ++i) {
631 jacobian[i][pj] = jacobian[j][permutation[i]];
632 }
633 lmDir[j] = diagR[pj];
634 work[j] = qy[j];
635 }
636
637
638 for (int j = 0; j < solvedCols; ++j) {
639
640
641
642 int pj = permutation[j];
643 double dpj = diag[pj];
644 if (dpj != 0) {
645 Arrays.fill(lmDiag, j + 1, lmDiag.length, 0);
646 }
647 lmDiag[j] = dpj;
648
649
650
651
652 double qtbpj = 0;
653 for (int k = j; k < solvedCols; ++k) {
654 int pk = permutation[k];
655
656
657
658 if (lmDiag[k] != 0) {
659
660 double sin, cos;
661 double rkk = jacobian[k][pk];
662 if (Math.abs(rkk) < Math.abs(lmDiag[k])) {
663 double cotan = rkk / lmDiag[k];
664 sin = 1.0 / Math.sqrt(1.0 + cotan * cotan);
665 cos = sin * cotan;
666 } else {
667 double tan = lmDiag[k] / rkk;
668 cos = 1.0 / Math.sqrt(1.0 + tan * tan);
669 sin = cos * tan;
670 }
671
672
673
674 jacobian[k][pk] = cos * rkk + sin * lmDiag[k];
675 double temp = cos * work[k] + sin * qtbpj;
676 qtbpj = -sin * work[k] + cos * qtbpj;
677 work[k] = temp;
678
679
680 for (int i = k + 1; i < solvedCols; ++i) {
681 double rik = jacobian[i][pk];
682 temp = cos * rik + sin * lmDiag[i];
683 lmDiag[i] = -sin * rik + cos * lmDiag[i];
684 jacobian[i][pk] = temp;
685 }
686
687 }
688 }
689
690
691
692 lmDiag[j] = jacobian[j][permutation[j]];
693 jacobian[j][permutation[j]] = lmDir[j];
694
695 }
696
697
698
699 int nSing = solvedCols;
700 for (int j = 0; j < solvedCols; ++j) {
701 if ((lmDiag[j] == 0) && (nSing == solvedCols)) {
702 nSing = j;
703 }
704 if (nSing < solvedCols) {
705 work[j] = 0;
706 }
707 }
708 if (nSing > 0) {
709 for (int j = nSing - 1; j >= 0; --j) {
710 int pj = permutation[j];
711 double sum = 0;
712 for (int i = j + 1; i < nSing; ++i) {
713 sum += jacobian[i][pj] * work[i];
714 }
715 work[j] = (work[j] - sum) / lmDiag[j];
716 }
717 }
718
719
720 for (int j = 0; j < lmDir.length; ++j) {
721 lmDir[permutation[j]] = work[j];
722 }
723
724 }
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748 private void qrDecomposition() throws OptimizationException {
749
750
751 for (int k = 0; k < cols; ++k) {
752 permutation[k] = k;
753 double norm2 = 0;
754 for (int i = 0; i < jacobian.length; ++i) {
755 double akk = jacobian[i][k];
756 norm2 += akk * akk;
757 }
758 jacNorm[k] = Math.sqrt(norm2);
759 }
760
761
762 for (int k = 0; k < cols; ++k) {
763
764
765 int nextColumn = -1;
766 double ak2 = Double.NEGATIVE_INFINITY;
767 for (int i = k; i < cols; ++i) {
768 double norm2 = 0;
769 for (int j = k; j < jacobian.length; ++j) {
770 double aki = jacobian[j][permutation[i]];
771 norm2 += aki * aki;
772 }
773 if (Double.isInfinite(norm2) || Double.isNaN(norm2)) {
774 throw new OptimizationException(
775 "unable to perform Q.R decomposition on the {0}x{1} jacobian matrix",
776 rows, cols);
777 }
778 if (norm2 > ak2) {
779 nextColumn = i;
780 ak2 = norm2;
781 }
782 }
783 if (ak2 == 0) {
784 rank = k;
785 return;
786 }
787 int pk = permutation[nextColumn];
788 permutation[nextColumn] = permutation[k];
789 permutation[k] = pk;
790
791
792 double akk = jacobian[k][pk];
793 double alpha = (akk > 0) ? -Math.sqrt(ak2) : Math.sqrt(ak2);
794 double betak = 1.0 / (ak2 - akk * alpha);
795 beta[pk] = betak;
796
797
798 diagR[pk] = alpha;
799 jacobian[k][pk] -= alpha;
800
801
802 for (int dk = cols - 1 - k; dk > 0; --dk) {
803 double gamma = 0;
804 for (int j = k; j < jacobian.length; ++j) {
805 gamma += jacobian[j][pk] * jacobian[j][permutation[k + dk]];
806 }
807 gamma *= betak;
808 for (int j = k; j < jacobian.length; ++j) {
809 jacobian[j][permutation[k + dk]] -= gamma * jacobian[j][pk];
810 }
811 }
812
813 }
814
815 rank = solvedCols;
816
817 }
818
819
820
821
822
823
824 private void qTy(double[] y) {
825 for (int k = 0; k < cols; ++k) {
826 int pk = permutation[k];
827 double gamma = 0;
828 for (int i = k; i < rows; ++i) {
829 gamma += jacobian[i][pk] * y[i];
830 }
831 gamma *= beta[pk];
832 for (int i = k; i < rows; ++i) {
833 y[i] -= gamma * jacobian[i][pk];
834 }
835 }
836 }
837
838 }