001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.math3.optimization.general;
019
020 import org.apache.commons.math3.analysis.DifferentiableMultivariateVectorFunction;
021 import org.apache.commons.math3.analysis.FunctionUtils;
022 import org.apache.commons.math3.analysis.differentiation.DerivativeStructure;
023 import org.apache.commons.math3.analysis.differentiation.MultivariateDifferentiableVectorFunction;
024 import org.apache.commons.math3.exception.DimensionMismatchException;
025 import org.apache.commons.math3.exception.NumberIsTooSmallException;
026 import org.apache.commons.math3.exception.util.LocalizedFormats;
027 import org.apache.commons.math3.linear.ArrayRealVector;
028 import org.apache.commons.math3.linear.RealMatrix;
029 import org.apache.commons.math3.linear.DecompositionSolver;
030 import org.apache.commons.math3.linear.MatrixUtils;
031 import org.apache.commons.math3.linear.QRDecomposition;
032 import org.apache.commons.math3.linear.EigenDecomposition;
033 import org.apache.commons.math3.optimization.OptimizationData;
034 import org.apache.commons.math3.optimization.InitialGuess;
035 import org.apache.commons.math3.optimization.Target;
036 import org.apache.commons.math3.optimization.Weight;
037 import org.apache.commons.math3.optimization.ConvergenceChecker;
038 import org.apache.commons.math3.optimization.DifferentiableMultivariateVectorOptimizer;
039 import org.apache.commons.math3.optimization.PointVectorValuePair;
040 import org.apache.commons.math3.optimization.direct.BaseAbstractMultivariateVectorOptimizer;
041 import org.apache.commons.math3.util.FastMath;
042
043 /**
044 * Base class for implementing least squares optimizers.
045 * It handles the boilerplate methods associated to thresholds settings,
046 * Jacobian and error estimation.
047 * <br/>
048 * This class constructs the Jacobian matrix of the function argument in method
049 * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,MultivariateVectorFunction,OptimizationData[])
050 * optimize} and assumes that the rows of that matrix iterate on the model
051 * functions while the columns iterate on the parameters; thus, the numbers
052 * of rows is equal to the dimension of the
053 * {@link org.apache.commons.math3.optimization.Target Target} while
054 * the number of columns is equal to the dimension of the
055 * {@link org.apache.commons.math3.optimization.InitialGuess InitialGuess}.
056 *
057 * @version $Id: AbstractLeastSquaresOptimizer.java 1422313 2012-12-15 18:53:41Z psteitz $
058 * @deprecated As of 3.1 (to be removed in 4.0).
059 * @since 1.2
060 */
061 @Deprecated
062 public abstract class AbstractLeastSquaresOptimizer
063 extends BaseAbstractMultivariateVectorOptimizer<DifferentiableMultivariateVectorFunction>
064 implements DifferentiableMultivariateVectorOptimizer {
065 /**
066 * Singularity threshold (cf. {@link #getCovariances(double)}).
067 * @deprecated As of 3.1.
068 */
069 @Deprecated
070 private static final double DEFAULT_SINGULARITY_THRESHOLD = 1e-14;
071 /**
072 * Jacobian matrix of the weighted residuals.
073 * This matrix is in canonical form just after the calls to
074 * {@link #updateJacobian()}, but may be modified by the solver
075 * in the derived class (the {@link LevenbergMarquardtOptimizer
076 * Levenberg-Marquardt optimizer} does this).
077 * @deprecated As of 3.1. To be removed in 4.0. Please use
078 * {@link #computeWeightedJacobian(double[])} instead.
079 */
080 @Deprecated
081 protected double[][] weightedResidualJacobian;
082 /** Number of columns of the jacobian matrix.
083 * @deprecated As of 3.1.
084 */
085 @Deprecated
086 protected int cols;
087 /** Number of rows of the jacobian matrix.
088 * @deprecated As of 3.1.
089 */
090 @Deprecated
091 protected int rows;
092 /** Current point.
093 * @deprecated As of 3.1.
094 */
095 @Deprecated
096 protected double[] point;
097 /** Current objective function value.
098 * @deprecated As of 3.1.
099 */
100 @Deprecated
101 protected double[] objective;
102 /** Weighted residuals
103 * @deprecated As of 3.1.
104 */
105 @Deprecated
106 protected double[] weightedResiduals;
107 /** Cost value (square root of the sum of the residuals).
108 * @deprecated As of 3.1. Field to become "private" in 4.0.
109 * Please use {@link #setCost(double)}.
110 */
111 @Deprecated
112 protected double cost;
113 /** Objective function derivatives. */
114 private MultivariateDifferentiableVectorFunction jF;
115 /** Number of evaluations of the Jacobian. */
116 private int jacobianEvaluations;
117 /** Square-root of the weight matrix. */
118 private RealMatrix weightMatrixSqrt;
119
120 /**
121 * Simple constructor with default settings.
122 * The convergence check is set to a {@link
123 * org.apache.commons.math3.optimization.SimpleVectorValueChecker}.
124 * @deprecated See {@link org.apache.commons.math3.optimization.SimpleValueChecker#SimpleValueChecker()}
125 */
126 @Deprecated
127 protected AbstractLeastSquaresOptimizer() {}
128
129 /**
130 * @param checker Convergence checker.
131 */
132 protected AbstractLeastSquaresOptimizer(ConvergenceChecker<PointVectorValuePair> checker) {
133 super(checker);
134 }
135
136 /**
137 * @return the number of evaluations of the Jacobian function.
138 */
139 public int getJacobianEvaluations() {
140 return jacobianEvaluations;
141 }
142
143 /**
144 * Update the jacobian matrix.
145 *
146 * @throws DimensionMismatchException if the Jacobian dimension does not
147 * match problem dimension.
148 * @deprecated As of 3.1. Please use {@link #computeWeightedJacobian(double[])}
149 * instead.
150 */
151 @Deprecated
152 protected void updateJacobian() {
153 final RealMatrix weightedJacobian = computeWeightedJacobian(point);
154 weightedResidualJacobian = weightedJacobian.scalarMultiply(-1).getData();
155 }
156
157 /**
158 * Computes the Jacobian matrix.
159 *
160 * @param params Model parameters at which to compute the Jacobian.
161 * @return the weighted Jacobian: W<sup>1/2</sup> J.
162 * @throws DimensionMismatchException if the Jacobian dimension does not
163 * match problem dimension.
164 * @since 3.1
165 */
166 protected RealMatrix computeWeightedJacobian(double[] params) {
167 ++jacobianEvaluations;
168
169 final DerivativeStructure[] dsPoint = new DerivativeStructure[params.length];
170 final int nC = params.length;
171 for (int i = 0; i < nC; ++i) {
172 dsPoint[i] = new DerivativeStructure(nC, 1, i, params[i]);
173 }
174 final DerivativeStructure[] dsValue = jF.value(dsPoint);
175 final int nR = getTarget().length;
176 if (dsValue.length != nR) {
177 throw new DimensionMismatchException(dsValue.length, nR);
178 }
179 final double[][] jacobianData = new double[nR][nC];
180 for (int i = 0; i < nR; ++i) {
181 int[] orders = new int[nC];
182 for (int j = 0; j < nC; ++j) {
183 orders[j] = 1;
184 jacobianData[i][j] = dsValue[i].getPartialDerivative(orders);
185 orders[j] = 0;
186 }
187 }
188
189 return weightMatrixSqrt.multiply(MatrixUtils.createRealMatrix(jacobianData));
190 }
191
192 /**
193 * Update the residuals array and cost function value.
194 * @throws DimensionMismatchException if the dimension does not match the
195 * problem dimension.
196 * @throws org.apache.commons.math3.exception.TooManyEvaluationsException
197 * if the maximal number of evaluations is exceeded.
198 * @deprecated As of 3.1. Please use {@link #computeResiduals(double[])},
199 * {@link #computeObjectiveValue(double[])}, {@link #computeCost(double[])}
200 * and {@link #setCost(double)} instead.
201 */
202 @Deprecated
203 protected void updateResidualsAndCost() {
204 objective = computeObjectiveValue(point);
205 final double[] res = computeResiduals(objective);
206
207 // Compute cost.
208 cost = computeCost(res);
209
210 // Compute weighted residuals.
211 final ArrayRealVector residuals = new ArrayRealVector(res);
212 weightedResiduals = weightMatrixSqrt.operate(residuals).toArray();
213 }
214
215 /**
216 * Computes the cost.
217 *
218 * @param residuals Residuals.
219 * @return the cost.
220 * @see #computeResiduals(double[])
221 * @since 3.1
222 */
223 protected double computeCost(double[] residuals) {
224 final ArrayRealVector r = new ArrayRealVector(residuals);
225 return FastMath.sqrt(r.dotProduct(getWeight().operate(r)));
226 }
227
228 /**
229 * Get the Root Mean Square value.
230 * Get the Root Mean Square value, i.e. the root of the arithmetic
231 * mean of the square of all weighted residuals. This is related to the
232 * criterion that is minimized by the optimizer as follows: if
233 * <em>c</em> if the criterion, and <em>n</em> is the number of
234 * measurements, then the RMS is <em>sqrt (c/n)</em>.
235 *
236 * @return RMS value
237 */
238 public double getRMS() {
239 return FastMath.sqrt(getChiSquare() / rows);
240 }
241
242 /**
243 * Get a Chi-Square-like value assuming the N residuals follow N
244 * distinct normal distributions centered on 0 and whose variances are
245 * the reciprocal of the weights.
246 * @return chi-square value
247 */
248 public double getChiSquare() {
249 return cost * cost;
250 }
251
252 /**
253 * Gets the square-root of the weight matrix.
254 *
255 * @return the square-root of the weight matrix.
256 * @since 3.1
257 */
258 public RealMatrix getWeightSquareRoot() {
259 return weightMatrixSqrt.copy();
260 }
261
262 /**
263 * Sets the cost.
264 *
265 * @param cost Cost value.
266 * @since 3.1
267 */
268 protected void setCost(double cost) {
269 this.cost = cost;
270 }
271
272 /**
273 * Get the covariance matrix of the optimized parameters.
274 *
275 * @return the covariance matrix.
276 * @throws org.apache.commons.math3.linear.SingularMatrixException
277 * if the covariance matrix cannot be computed (singular problem).
278 * @see #getCovariances(double)
279 * @deprecated As of 3.1. Please use {@link #computeCovariances(double[],double)}
280 * instead.
281 */
282 @Deprecated
283 public double[][] getCovariances() {
284 return getCovariances(DEFAULT_SINGULARITY_THRESHOLD);
285 }
286
287 /**
288 * Get the covariance matrix of the optimized parameters.
289 * <br/>
290 * Note that this operation involves the inversion of the
291 * <code>J<sup>T</sup>J</code> matrix, where {@code J} is the
292 * Jacobian matrix.
293 * The {@code threshold} parameter is a way for the caller to specify
294 * that the result of this computation should be considered meaningless,
295 * and thus trigger an exception.
296 *
297 * @param threshold Singularity threshold.
298 * @return the covariance matrix.
299 * @throws org.apache.commons.math3.linear.SingularMatrixException
300 * if the covariance matrix cannot be computed (singular problem).
301 * @deprecated As of 3.1. Please use {@link #computeCovariances(double[],double)}
302 * instead.
303 */
304 @Deprecated
305 public double[][] getCovariances(double threshold) {
306 return computeCovariances(point, threshold);
307 }
308
309 /**
310 * Get the covariance matrix of the optimized parameters.
311 * <br/>
312 * Note that this operation involves the inversion of the
313 * <code>J<sup>T</sup>J</code> matrix, where {@code J} is the
314 * Jacobian matrix.
315 * The {@code threshold} parameter is a way for the caller to specify
316 * that the result of this computation should be considered meaningless,
317 * and thus trigger an exception.
318 *
319 * @param params Model parameters.
320 * @param threshold Singularity threshold.
321 * @return the covariance matrix.
322 * @throws org.apache.commons.math3.linear.SingularMatrixException
323 * if the covariance matrix cannot be computed (singular problem).
324 * @since 3.1
325 */
326 public double[][] computeCovariances(double[] params,
327 double threshold) {
328 // Set up the Jacobian.
329 final RealMatrix j = computeWeightedJacobian(params);
330
331 // Compute transpose(J)J.
332 final RealMatrix jTj = j.transpose().multiply(j);
333
334 // Compute the covariances matrix.
335 final DecompositionSolver solver
336 = new QRDecomposition(jTj, threshold).getSolver();
337 return solver.getInverse().getData();
338 }
339
340 /**
341 * <p>
342 * Returns an estimate of the standard deviation of each parameter. The
343 * returned values are the so-called (asymptotic) standard errors on the
344 * parameters, defined as {@code sd(a[i]) = sqrt(S / (n - m) * C[i][i])},
345 * where {@code a[i]} is the optimized value of the {@code i}-th parameter,
346 * {@code S} is the minimized value of the sum of squares objective function
347 * (as returned by {@link #getChiSquare()}), {@code n} is the number of
348 * observations, {@code m} is the number of parameters and {@code C} is the
349 * covariance matrix.
350 * </p>
351 * <p>
352 * See also
353 * <a href="http://en.wikipedia.org/wiki/Least_squares">Wikipedia</a>,
354 * or
355 * <a href="http://mathworld.wolfram.com/LeastSquaresFitting.html">MathWorld</a>,
356 * equations (34) and (35) for a particular case.
357 * </p>
358 *
359 * @return an estimate of the standard deviation of the optimized parameters
360 * @throws org.apache.commons.math3.linear.SingularMatrixException
361 * if the covariance matrix cannot be computed.
362 * @throws NumberIsTooSmallException if the number of degrees of freedom is not
363 * positive, i.e. the number of measurements is less or equal to the number of
364 * parameters.
365 * @deprecated as of version 3.1, {@link #computeSigma(double[],double)} should be used
366 * instead. It should be emphasized that {@code guessParametersErrors} and
367 * {@code computeSigma} are <em>not</em> strictly equivalent.
368 */
369 @Deprecated
370 public double[] guessParametersErrors() {
371 if (rows <= cols) {
372 throw new NumberIsTooSmallException(LocalizedFormats.NO_DEGREES_OF_FREEDOM,
373 rows, cols, false);
374 }
375 double[] errors = new double[cols];
376 final double c = FastMath.sqrt(getChiSquare() / (rows - cols));
377 double[][] covar = computeCovariances(point, 1e-14);
378 for (int i = 0; i < errors.length; ++i) {
379 errors[i] = FastMath.sqrt(covar[i][i]) * c;
380 }
381 return errors;
382 }
383
384 /**
385 * Computes an estimate of the standard deviation of the parameters. The
386 * returned values are the square root of the diagonal coefficients of the
387 * covariance matrix, {@code sd(a[i]) ~= sqrt(C[i][i])}, where {@code a[i]}
388 * is the optimized value of the {@code i}-th parameter, and {@code C} is
389 * the covariance matrix.
390 *
391 * @param params Model parameters.
392 * @param covarianceSingularityThreshold Singularity threshold (see
393 * {@link #computeCovariances(double[],double) computeCovariances}).
394 * @return an estimate of the standard deviation of the optimized parameters
395 * @throws org.apache.commons.math3.linear.SingularMatrixException
396 * if the covariance matrix cannot be computed.
397 * @since 3.1
398 */
399 public double[] computeSigma(double[] params,
400 double covarianceSingularityThreshold) {
401 final int nC = params.length;
402 final double[] sig = new double[nC];
403 final double[][] cov = computeCovariances(params, covarianceSingularityThreshold);
404 for (int i = 0; i < nC; ++i) {
405 sig[i] = FastMath.sqrt(cov[i][i]);
406 }
407 return sig;
408 }
409
410 /** {@inheritDoc}
411 * @deprecated As of 3.1. Please use
412 * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,MultivariateVectorFunction,OptimizationData[])
413 * optimize(int,MultivariateDifferentiableVectorFunction,OptimizationData...)}
414 * instead.
415 */
416 @Override
417 @Deprecated
418 public PointVectorValuePair optimize(int maxEval,
419 final DifferentiableMultivariateVectorFunction f,
420 final double[] target, final double[] weights,
421 final double[] startPoint) {
422 return optimizeInternal(maxEval,
423 FunctionUtils.toMultivariateDifferentiableVectorFunction(f),
424 new Target(target),
425 new Weight(weights),
426 new InitialGuess(startPoint));
427 }
428
429 /**
430 * Optimize an objective function.
431 * Optimization is considered to be a weighted least-squares minimization.
432 * The cost function to be minimized is
433 * <code>∑weight<sub>i</sub>(objective<sub>i</sub> - target<sub>i</sub>)<sup>2</sup></code>
434 *
435 * @param f Objective function.
436 * @param target Target value for the objective functions at optimum.
437 * @param weights Weights for the least squares cost computation.
438 * @param startPoint Start point for optimization.
439 * @return the point/value pair giving the optimal value for objective
440 * function.
441 * @param maxEval Maximum number of function evaluations.
442 * @throws org.apache.commons.math3.exception.DimensionMismatchException
443 * if the start point dimension is wrong.
444 * @throws org.apache.commons.math3.exception.TooManyEvaluationsException
445 * if the maximal number of evaluations is exceeded.
446 * @throws org.apache.commons.math3.exception.NullArgumentException if
447 * any argument is {@code null}.
448 * @deprecated As of 3.1. Please use
449 * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,MultivariateVectorFunction,OptimizationData[])
450 * optimize(int,MultivariateDifferentiableVectorFunction,OptimizationData...)}
451 * instead.
452 */
453 @Deprecated
454 public PointVectorValuePair optimize(final int maxEval,
455 final MultivariateDifferentiableVectorFunction f,
456 final double[] target, final double[] weights,
457 final double[] startPoint) {
458 return optimizeInternal(maxEval, f,
459 new Target(target),
460 new Weight(weights),
461 new InitialGuess(startPoint));
462 }
463
464 /**
465 * Optimize an objective function.
466 * Optimization is considered to be a weighted least-squares minimization.
467 * The cost function to be minimized is
468 * <code>∑weight<sub>i</sub>(objective<sub>i</sub> - target<sub>i</sub>)<sup>2</sup></code>
469 *
470 * @param maxEval Allowed number of evaluations of the objective function.
471 * @param f Objective function.
472 * @param optData Optimization data. The following data will be looked for:
473 * <ul>
474 * <li>{@link Target}</li>
475 * <li>{@link Weight}</li>
476 * <li>{@link InitialGuess}</li>
477 * </ul>
478 * @return the point/value pair giving the optimal value of the objective
479 * function.
480 * @throws org.apache.commons.math3.exception.TooManyEvaluationsException if
481 * the maximal number of evaluations is exceeded.
482 * @throws DimensionMismatchException if the target, and weight arguments
483 * have inconsistent dimensions.
484 * @see BaseAbstractMultivariateVectorOptimizer#optimizeInternal(int,MultivariateVectorFunction,OptimizationData[])
485 * @since 3.1
486 * @deprecated As of 3.1. Override is necessary only until this class's generic
487 * argument is changed to {@code MultivariateDifferentiableVectorFunction}.
488 */
489 @Deprecated
490 protected PointVectorValuePair optimizeInternal(final int maxEval,
491 final MultivariateDifferentiableVectorFunction f,
492 OptimizationData... optData) {
493 // XXX Conversion will be removed when the generic argument of the
494 // base class becomes "MultivariateDifferentiableVectorFunction".
495 return super.optimizeInternal(maxEval, FunctionUtils.toDifferentiableMultivariateVectorFunction(f), optData);
496 }
497
498 /** {@inheritDoc} */
499 @Override
500 protected void setUp() {
501 super.setUp();
502
503 // Reset counter.
504 jacobianEvaluations = 0;
505
506 // Square-root of the weight matrix.
507 weightMatrixSqrt = squareRoot(getWeight());
508
509 // Store least squares problem characteristics.
510 // XXX The conversion won't be necessary when the generic argument of
511 // the base class becomes "MultivariateDifferentiableVectorFunction".
512 // XXX "jF" is not strictly necessary anymore but is currently more
513 // efficient than converting the value returned from "getObjectiveFunction()"
514 // every time it is used.
515 jF = FunctionUtils.toMultivariateDifferentiableVectorFunction((DifferentiableMultivariateVectorFunction) getObjectiveFunction());
516
517 // Arrays shared with "private" and "protected" methods.
518 point = getStartPoint();
519 rows = getTarget().length;
520 cols = point.length;
521 }
522
523 /**
524 * Computes the residuals.
525 * The residual is the difference between the observed (target)
526 * values and the model (objective function) value.
527 * There is one residual for each element of the vector-valued
528 * function.
529 *
530 * @param objectiveValue Value of the the objective function. This is
531 * the value returned from a call to
532 * {@link #computeObjectiveValue(double[]) computeObjectiveValue}
533 * (whose array argument contains the model parameters).
534 * @return the residuals.
535 * @throws DimensionMismatchException if {@code params} has a wrong
536 * length.
537 * @since 3.1
538 */
539 protected double[] computeResiduals(double[] objectiveValue) {
540 final double[] target = getTarget();
541 if (objectiveValue.length != target.length) {
542 throw new DimensionMismatchException(target.length,
543 objectiveValue.length);
544 }
545
546 final double[] residuals = new double[target.length];
547 for (int i = 0; i < target.length; i++) {
548 residuals[i] = target[i] - objectiveValue[i];
549 }
550
551 return residuals;
552 }
553
554 /**
555 * Computes the square-root of the weight matrix.
556 *
557 * @param m Symmetric, positive-definite (weight) matrix.
558 * @return the square-root of the weight matrix.
559 */
560 private RealMatrix squareRoot(RealMatrix m) {
561 final EigenDecomposition dec = new EigenDecomposition(m);
562 return dec.getSquareRoot();
563 }
564 }