001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.math3.optim.nonlinear.scalar.gradient;
019
020 import org.apache.commons.math3.analysis.UnivariateFunction;
021 import org.apache.commons.math3.analysis.solvers.BrentSolver;
022 import org.apache.commons.math3.analysis.solvers.UnivariateSolver;
023 import org.apache.commons.math3.exception.MathInternalError;
024 import org.apache.commons.math3.exception.MathIllegalStateException;
025 import org.apache.commons.math3.exception.TooManyEvaluationsException;
026 import org.apache.commons.math3.exception.util.LocalizedFormats;
027 import org.apache.commons.math3.optim.OptimizationData;
028 import org.apache.commons.math3.optim.nonlinear.scalar.GoalType;
029 import org.apache.commons.math3.optim.PointValuePair;
030 import org.apache.commons.math3.optim.ConvergenceChecker;
031 import org.apache.commons.math3.optim.nonlinear.scalar.GradientMultivariateOptimizer;
032 import org.apache.commons.math3.util.FastMath;
033
034 /**
035 * Non-linear conjugate gradient optimizer.
036 * <p>
037 * This class supports both the Fletcher-Reeves and the Polak-Ribière
038 * update formulas for the conjugate search directions.
039 * It also supports optional preconditioning.
040 * </p>
041 *
042 * @version $Id: NonLinearConjugateGradientOptimizer.java 1416643 2012-12-03 19:37:14Z tn $
043 * @since 2.0
044 */
045 public class NonLinearConjugateGradientOptimizer
046 extends GradientMultivariateOptimizer {
047 /** Update formula for the beta parameter. */
048 private final Formula updateFormula;
049 /** Preconditioner (may be null). */
050 private final Preconditioner preconditioner;
051 /** solver to use in the line search (may be null). */
052 private final UnivariateSolver solver;
053 /** Initial step used to bracket the optimum in line search. */
054 private double initialStep = 1;
055
056 /**
057 * Constructor with default {@link BrentSolver line search solver} and
058 * {@link IdentityPreconditioner preconditioner}.
059 *
060 * @param updateFormula formula to use for updating the β parameter,
061 * must be one of {@link Formula#FLETCHER_REEVES} or
062 * {@link Formula#POLAK_RIBIERE}.
063 * @param checker Convergence checker.
064 */
065 public NonLinearConjugateGradientOptimizer(final Formula updateFormula,
066 ConvergenceChecker<PointValuePair> checker) {
067 this(updateFormula,
068 checker,
069 new BrentSolver(),
070 new IdentityPreconditioner());
071 }
072
073 /**
074 * Available choices of update formulas for the updating the parameter
075 * that is used to compute the successive conjugate search directions.
076 * For non-linear conjugate gradients, there are
077 * two formulas:
078 * <ul>
079 * <li>Fletcher-Reeves formula</li>
080 * <li>Polak-Ribière formula</li>
081 * </ul>
082 *
083 * On the one hand, the Fletcher-Reeves formula is guaranteed to converge
084 * if the start point is close enough of the optimum whether the
085 * Polak-Ribière formula may not converge in rare cases. On the
086 * other hand, the Polak-Ribière formula is often faster when it
087 * does converge. Polak-Ribière is often used.
088 *
089 * @since 2.0
090 */
091 public static enum Formula {
092 /** Fletcher-Reeves formula. */
093 FLETCHER_REEVES,
094 /** Polak-Ribière formula. */
095 POLAK_RIBIERE
096 }
097
098 /**
099 * The initial step is a factor with respect to the search direction
100 * (which itself is roughly related to the gradient of the function).
101 * <br/>
102 * It is used to find an interval that brackets the optimum in line
103 * search.
104 *
105 * @since 3.1
106 */
107 public static class BracketingStep implements OptimizationData {
108 /** Initial step. */
109 private final double initialStep;
110
111 /**
112 * @param step Initial step for the bracket search.
113 */
114 public BracketingStep(double step) {
115 initialStep = step;
116 }
117
118 /**
119 * Gets the initial step.
120 *
121 * @return the initial step.
122 */
123 public double getBracketingStep() {
124 return initialStep;
125 }
126 }
127
128 /**
129 * Constructor with default {@link IdentityPreconditioner preconditioner}.
130 *
131 * @param updateFormula formula to use for updating the β parameter,
132 * must be one of {@link Formula#FLETCHER_REEVES} or
133 * {@link Formula#POLAK_RIBIERE}.
134 * @param checker Convergence checker.
135 * @param lineSearchSolver Solver to use during line search.
136 */
137 public NonLinearConjugateGradientOptimizer(final Formula updateFormula,
138 ConvergenceChecker<PointValuePair> checker,
139 final UnivariateSolver lineSearchSolver) {
140 this(updateFormula,
141 checker,
142 lineSearchSolver,
143 new IdentityPreconditioner());
144 }
145
146 /**
147 * @param updateFormula formula to use for updating the β parameter,
148 * must be one of {@link Formula#FLETCHER_REEVES} or
149 * {@link Formula#POLAK_RIBIERE}.
150 * @param checker Convergence checker.
151 * @param lineSearchSolver Solver to use during line search.
152 * @param preconditioner Preconditioner.
153 */
154 public NonLinearConjugateGradientOptimizer(final Formula updateFormula,
155 ConvergenceChecker<PointValuePair> checker,
156 final UnivariateSolver lineSearchSolver,
157 final Preconditioner preconditioner) {
158 super(checker);
159
160 this.updateFormula = updateFormula;
161 solver = lineSearchSolver;
162 this.preconditioner = preconditioner;
163 initialStep = 1;
164 }
165
166 /**
167 * {@inheritDoc}
168 *
169 * @param optData Optimization data.
170 * The following data will be looked for:
171 * <ul>
172 * <li>{@link org.apache.commons.math3.optim.MaxEval}</li>
173 * <li>{@link org.apache.commons.math3.optim.InitialGuess}</li>
174 * <li>{@link org.apache.commons.math3.optim.SimpleBounds}</li>
175 * <li>{@link org.apache.commons.math3.optim.nonlinear.scalar.GoalType}</li>
176 * <li>{@link org.apache.commons.math3.optim.nonlinear.scalar.ObjectiveFunction}</li>
177 * <li>{@link org.apache.commons.math3.optim.nonlinear.scalar.ObjectiveFunctionGradient}</li>
178 * <li>{@link BracketingStep}</li>
179 * </ul>
180 * @return {@inheritDoc}
181 * @throws TooManyEvaluationsException if the maximal number of
182 * evaluations (of the objective function) is exceeded.
183 */
184 @Override
185 public PointValuePair optimize(OptimizationData... optData)
186 throws TooManyEvaluationsException {
187 // Retrieve settings.
188 parseOptimizationData(optData);
189 // Set up base class and perform computation.
190 return super.optimize(optData);
191 }
192
193 /** {@inheritDoc} */
194 @Override
195 protected PointValuePair doOptimize() {
196 final ConvergenceChecker<PointValuePair> checker = getConvergenceChecker();
197 final double[] point = getStartPoint();
198 final GoalType goal = getGoalType();
199 final int n = point.length;
200 double[] r = computeObjectiveGradient(point);
201 if (goal == GoalType.MINIMIZE) {
202 for (int i = 0; i < n; i++) {
203 r[i] = -r[i];
204 }
205 }
206
207 // Initial search direction.
208 double[] steepestDescent = preconditioner.precondition(point, r);
209 double[] searchDirection = steepestDescent.clone();
210
211 double delta = 0;
212 for (int i = 0; i < n; ++i) {
213 delta += r[i] * searchDirection[i];
214 }
215
216 PointValuePair current = null;
217 int iter = 0;
218 int maxEval = getMaxEvaluations();
219 while (true) {
220 ++iter;
221
222 final double objective = computeObjectiveValue(point);
223 PointValuePair previous = current;
224 current = new PointValuePair(point, objective);
225 if (previous != null) {
226 if (checker.converged(iter, previous, current)) {
227 // We have found an optimum.
228 return current;
229 }
230 }
231
232 // Find the optimal step in the search direction.
233 final UnivariateFunction lsf = new LineSearchFunction(point, searchDirection);
234 final double uB = findUpperBound(lsf, 0, initialStep);
235 // XXX Last parameters is set to a value close to zero in order to
236 // work around the divergence problem in the "testCircleFitting"
237 // unit test (see MATH-439).
238 final double step = solver.solve(maxEval, lsf, 0, uB, 1e-15);
239 maxEval -= solver.getEvaluations(); // Subtract used up evaluations.
240
241 // Validate new point.
242 for (int i = 0; i < point.length; ++i) {
243 point[i] += step * searchDirection[i];
244 }
245
246 r = computeObjectiveGradient(point);
247 if (goal == GoalType.MINIMIZE) {
248 for (int i = 0; i < n; ++i) {
249 r[i] = -r[i];
250 }
251 }
252
253 // Compute beta.
254 final double deltaOld = delta;
255 final double[] newSteepestDescent = preconditioner.precondition(point, r);
256 delta = 0;
257 for (int i = 0; i < n; ++i) {
258 delta += r[i] * newSteepestDescent[i];
259 }
260
261 final double beta;
262 switch (updateFormula) {
263 case FLETCHER_REEVES:
264 beta = delta / deltaOld;
265 break;
266 case POLAK_RIBIERE:
267 double deltaMid = 0;
268 for (int i = 0; i < r.length; ++i) {
269 deltaMid += r[i] * steepestDescent[i];
270 }
271 beta = (delta - deltaMid) / deltaOld;
272 break;
273 default:
274 // Should never happen.
275 throw new MathInternalError();
276 }
277 steepestDescent = newSteepestDescent;
278
279 // Compute conjugate search direction.
280 if (iter % n == 0 ||
281 beta < 0) {
282 // Break conjugation: reset search direction.
283 searchDirection = steepestDescent.clone();
284 } else {
285 // Compute new conjugate search direction.
286 for (int i = 0; i < n; ++i) {
287 searchDirection[i] = steepestDescent[i] + beta * searchDirection[i];
288 }
289 }
290 }
291 }
292
293 /**
294 * Scans the list of (required and optional) optimization data that
295 * characterize the problem.
296 *
297 * @param optData Optimization data.
298 * The following data will be looked for:
299 * <ul>
300 * <li>{@link InitialStep}</li>
301 * </ul>
302 */
303 private void parseOptimizationData(OptimizationData... optData) {
304 // The existing values (as set by the previous call) are reused if
305 // not provided in the argument list.
306 for (OptimizationData data : optData) {
307 if (data instanceof BracketingStep) {
308 initialStep = ((BracketingStep) data).getBracketingStep();
309 // If more data must be parsed, this statement _must_ be
310 // changed to "continue".
311 break;
312 }
313 }
314 }
315
316 /**
317 * Finds the upper bound b ensuring bracketing of a root between a and b.
318 *
319 * @param f function whose root must be bracketed.
320 * @param a lower bound of the interval.
321 * @param h initial step to try.
322 * @return b such that f(a) and f(b) have opposite signs.
323 * @throws MathIllegalStateException if no bracket can be found.
324 */
325 private double findUpperBound(final UnivariateFunction f,
326 final double a, final double h) {
327 final double yA = f.value(a);
328 double yB = yA;
329 for (double step = h; step < Double.MAX_VALUE; step *= FastMath.max(2, yA / yB)) {
330 final double b = a + step;
331 yB = f.value(b);
332 if (yA * yB <= 0) {
333 return b;
334 }
335 }
336 throw new MathIllegalStateException(LocalizedFormats.UNABLE_TO_BRACKET_OPTIMUM_IN_LINE_SEARCH);
337 }
338
339 /** Default identity preconditioner. */
340 public static class IdentityPreconditioner implements Preconditioner {
341 /** {@inheritDoc} */
342 public double[] precondition(double[] variables, double[] r) {
343 return r.clone();
344 }
345 }
346
347 /**
348 * Internal class for line search.
349 * <p>
350 * The function represented by this class is the dot product of
351 * the objective function gradient and the search direction. Its
352 * value is zero when the gradient is orthogonal to the search
353 * direction, i.e. when the objective function value is a local
354 * extremum along the search direction.
355 * </p>
356 */
357 private class LineSearchFunction implements UnivariateFunction {
358 /** Current point. */
359 private final double[] currentPoint;
360 /** Search direction. */
361 private final double[] searchDirection;
362
363 /**
364 * @param point Current point.
365 * @param direction Search direction.
366 */
367 public LineSearchFunction(double[] point,
368 double[] direction) {
369 currentPoint = point.clone();
370 searchDirection = direction.clone();
371 }
372
373 /** {@inheritDoc} */
374 public double value(double x) {
375 // current point in the search direction
376 final double[] shiftedPoint = currentPoint.clone();
377 for (int i = 0; i < shiftedPoint.length; ++i) {
378 shiftedPoint[i] += x * searchDirection[i];
379 }
380
381 // gradient of the objective function
382 final double[] gradient = computeObjectiveGradient(shiftedPoint);
383
384 // dot product with the search direction
385 double dotProduct = 0;
386 for (int i = 0; i < gradient.length; ++i) {
387 dotProduct += gradient[i] * searchDirection[i];
388 }
389
390 return dotProduct;
391 }
392 }
393 }