/*========================================================================= * * Copyright NumFOCUS * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0.txt * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * *=========================================================================*/ #include "itkSPSAOptimizer.h" #include "itkMath.h" namespace itk { /** * ************************* Constructor ************************ */ SPSAOptimizer ::SPSAOptimizer() { itkDebugMacro("Constructor"); m_CurrentIteration = 0; m_Maximize = false; m_StopCondition = StopConditionSPSAOptimizerEnum::Unknown; m_StateOfConvergenceDecayRate = 0.9; m_Tolerance = 1e-06; m_StateOfConvergence = 0; m_MaximumNumberOfIterations = 100; m_MinimumNumberOfIterations = 10; m_GradientMagnitude = 0.0; m_NumberOfPerturbations = 1; m_LearningRate = 0.0; m_Sa = 1.0; m_Sc = 1.0; m_A = m_MaximumNumberOfIterations / 10; m_Alpha = 0.602; m_Gamma = 0.101; m_Generator = Statistics::MersenneTwisterRandomVariateGenerator::GetInstance(); } // end Constructor /** * ************************* PrintSelf ************************** */ void SPSAOptimizer::PrintSelf(std::ostream & os, Indent indent) const { Superclass::PrintSelf(os, indent); os << indent << "a: " << m_Sa << std::endl; os << indent << "A: " << m_A << std::endl; os << indent << "Alpha: " << m_Alpha << std::endl; os << indent << "c: " << m_Sc << std::endl; os << indent << "Gamma: " << m_Gamma << std::endl; os << indent << "Tolerance: " << m_Tolerance << std::endl; os << indent << "GradientMagnitude: " << m_GradientMagnitude << std::endl; os << indent << "StateOfConvergenceDecayRate: " << m_StateOfConvergenceDecayRate << std::endl; os << indent << "Gradient: " << m_Gradient << std::endl; os << indent << "StateOfConvergence: " << m_StateOfConvergence << std::endl; os << indent << "NumberOfPerturbations: " << m_NumberOfPerturbations << std::endl; os << indent << "LearningRate: " << m_LearningRate << std::endl; os << indent << "MaximumNumberOfIterations: " << m_MaximumNumberOfIterations << std::endl; os << indent << "MinimumNumberOfIterations: " << m_MinimumNumberOfIterations << std::endl; os << indent << "Maximize: " << m_Maximize << std::endl; os << indent << "CurrentIteration: " << m_CurrentIteration; if (m_CostFunction) { os << indent << "CostFunction: " << m_CostFunction; } os << indent << "StopCondition: " << m_StopCondition; os << std::endl; } // end PrintSelf /** * ***************** GetValue(parameters) ********************* * Get the cost function value at a position. */ SPSAOptimizer::MeasureType SPSAOptimizer::GetValue(const ParametersType & parameters) const { /** * This method just calls the Superclass' implementation, * but is necessary because GetValue(void) is also declared * in this class. */ return this->Superclass::GetValue(parameters); } /** * ***************** GetValue() ******************************** * Get the cost function value at the current position. */ SPSAOptimizer::MeasureType SPSAOptimizer::GetValue() const { /** * The SPSA does not compute the cost function value at * the current position during the optimization, so calculate * it on request: */ return this->GetValue(this->GetCurrentPosition()); } /** * *********************** StartOptimization ******************** */ void SPSAOptimizer::StartOptimization() { itkDebugMacro("StartOptimization"); if (!m_CostFunction) { itkExceptionMacro(<< "No objective function defined! "); } /** The number of parameters: */ const unsigned int spaceDimension = m_CostFunction->GetNumberOfParameters(); if (spaceDimension != this->GetInitialPosition().GetSize()) { itkExceptionMacro(<< "Number of parameters not correct!"); } m_CurrentIteration = 0; m_StopCondition = StopConditionSPSAOptimizerEnum::Unknown; m_StateOfConvergence = 0.0; this->SetCurrentPosition(this->GetInitialPosition()); this->ResumeOptimization(); } // end StartOptimization /** * ********************** ResumeOptimization ******************** */ void SPSAOptimizer::ResumeOptimization() { itkDebugMacro("ResumeOptimization"); m_Stop = false; InvokeEvent(StartEvent()); while (!m_Stop) { AdvanceOneStep(); this->InvokeEvent(IterationEvent()); if (m_Stop) { break; } m_CurrentIteration++; if (m_CurrentIteration >= m_MaximumNumberOfIterations) { m_StopCondition = StopConditionSPSAOptimizerEnum::MaximumNumberOfIterations; StopOptimization(); break; } /** Check convergence */ if ((m_StateOfConvergence < m_Tolerance) && (m_CurrentIteration >= m_MinimumNumberOfIterations)) { m_StopCondition = StopConditionSPSAOptimizerEnum::BelowTolerance; StopOptimization(); break; } m_StateOfConvergence *= m_StateOfConvergenceDecayRate; } // while !m_stop } // end ResumeOptimization /** * ********************** StopOptimization ********************** */ void SPSAOptimizer::StopOptimization() { itkDebugMacro("StopOptimization"); m_Stop = true; InvokeEvent(EndEvent()); } // end StopOptimization /** * ********************** AdvanceOneStep ************************ */ void SPSAOptimizer::AdvanceOneStep() { itkDebugMacro("AdvanceOneStep"); /** Maximize of Minimize the function? */ double direction; if (this->m_Maximize) { direction = 1.0; } else { direction = -1.0; } /** The number of parameters: */ const unsigned int spaceDimension = m_CostFunction->GetNumberOfParameters(); /** Instantiate the newPosition vector and get the current * parameters */ ParametersType newPosition(spaceDimension); const ParametersType & currentPosition = this->GetCurrentPosition(); /** Compute the gradient as an average of q estimates, where * q = m_NumberOfPerturbations */ try { this->ComputeGradient(currentPosition, m_Gradient); } catch (ExceptionObject & err) { // An exception has occurred. // Terminate immediately. m_StopCondition = StopConditionSPSAOptimizerEnum::MetricError; StopOptimization(); // Pass exception to caller throw err; } /** Compute the gain a_k */ const double ak = this->Compute_a(m_CurrentIteration); /** And save it for users that are interested */ m_LearningRate = ak; /** * Compute the new parameters. */ newPosition = currentPosition + (direction * ak) * m_Gradient; this->SetCurrentPosition(newPosition); /** Compute the GradientMagnitude (for checking convergence) */ m_GradientMagnitude = m_Gradient.magnitude(); /** Update the state of convergence: */ m_StateOfConvergence += ak * m_GradientMagnitude; } // end AdvanceOneStep /** * ************************** Compute_a ************************* * * This function computes the parameter a at iteration k, as * described by Spall. */ double SPSAOptimizer::Compute_a(SizeValueType k) const { return static_cast(m_Sa / std::pow(m_A + k + 1, m_Alpha)); } // end Compute_a /** * ************************** Compute_c ************************* * * This function computes the parameter a at iteration k, as * described by Spall. */ double SPSAOptimizer::Compute_c(SizeValueType k) const { return static_cast(m_Sc / std::pow(k + 1, m_Gamma)); } // end Compute_c /** * ********************** GenerateDelta ************************* * * This function generates a perturbation vector delta. * Currently the elements are drawn from a bernouilli * distribution. (+- 1) */ void SPSAOptimizer::GenerateDelta(const unsigned int spaceDimension) { m_Delta = DerivativeType(spaceDimension); const ScalesType & scales = this->GetScales(); // Make sure the scales have been set properly if (scales.size() != spaceDimension) { itkExceptionMacro(<< "The size of Scales is " << scales.size() << ", but the NumberOfParameters for the CostFunction is " << spaceDimension << "."); } const ScalesType & invScales = this->GetInverseScales(); for (unsigned int j = 0; j < spaceDimension; ++j) { /** Generate randomly -1 or 1. */ m_Delta[j] = 2 * Math::Round(this->m_Generator->GetUniformVariate(0.0f, 1.0f)) - 1; /** * Take scales into account. The perturbation of a parameter that has a * large range (and thus is assigned a small scaler) should be higher than * the perturbation of a parameter that has a small range. */ m_Delta[j] *= invScales[j]; } } // end GenerateDelta /** * *************** ComputeGradient() ***************************** */ void SPSAOptimizer::ComputeGradient(const ParametersType & parameters, DerivativeType & gradient) { const unsigned int spaceDimension = parameters.GetSize(); /** Compute c_k */ const double ck = this->Compute_c(m_CurrentIteration); /** Instantiate the vectors thetaplus, thetamin, * set the gradient to the correct size, and get the scales. */ ParametersType thetaplus(spaceDimension); ParametersType thetamin(spaceDimension); gradient = DerivativeType(spaceDimension); gradient.Fill(0.0); const ScalesType & scales = this->GetScales(); /** Compute the gradient as an average of q estimates, where * q = m_NumberOfPerturbations */ for (SizeValueType perturbation = 1; perturbation <= this->GetNumberOfPerturbations(); ++perturbation) { /** Generate a (scaled) perturbation vector m_Delta */ this->GenerateDelta(spaceDimension); /** Create thetaplus and thetamin */ for (unsigned int j = 0; j < spaceDimension; j++) { thetaplus[j] = parameters[j] + ck * m_Delta[j]; thetamin[j] = parameters[j] - ck * m_Delta[j]; } /** Compute the cost function value at thetaplus */ const double valueplus = this->GetValue(thetaplus); /** Compute the cost function value at thetamin */ const double valuemin = this->GetValue(thetamin); /** Compute the contribution to the gradient g_k */ const double valuediff = (valueplus - valuemin) / (2 * ck); for (unsigned int j = 0; j < spaceDimension; j++) { // remember to divide the gradient by the NumberOfPerturbations! gradient[j] += valuediff / m_Delta[j]; } } // end for ++perturbation /** Apply scaling (see below) and divide by the NumberOfPerturbations */ for (unsigned int j = 0; j < spaceDimension; j++) { gradient[j] /= (itk::Math::sqr(scales[j]) * static_cast(m_NumberOfPerturbations)); } /** * Scaling was still needed, because the gradient * should point along the direction of the applied * perturbation. * * Recall that we scaled the perturbation vector by dividing each * element j by scales[j]: * delta'[j] = delta[j] / scales[j] * = (+ or -) 1 / scales[j] * * Consider the case of NumberOfPerturbations=1. * If we would not do any scaling the gradient would * be computed as: * grad[j] = valuediff / delta'[j] * = valuediff / ( delta[j] / scales[j] ) * = scales[j] * valuediff / delta[j] * = (+ or -) valuediff * scales[j] * * This is wrong, because it gives a vector that points * in a different direction than the perturbation. Besides, * it would give an opposite effect as expected from the scaling. * For rigid registration for example, we choose the scaler for * the rotation 1 and for the translation 1/1000 (see * Examples/Registration/ImageRegistration5.cxx), because * we want the optimizer to adjust the translation in bigger steps. * In the formula above, the grad[translation] would then be SMALLER * than grad[rotation], so the optimizer would adjust the translation * in smaller steps. * * To make the gradient point along the perturbation direction we * have to divide it by the square of the scales, to return the scaling * parameter to the denominator where it belongs: * grad[j] = (+ or -) valuediff * scales[j] / scales[j]^2 * = (+ or -) valuediff / scales[j] * which is correct. Now the optimizer will take a step * in the direction of the perturbation (or the opposite * of course, if valuediff is negative). * */ } // end ComputeGradient /** * ************* GuessParameters ************************* */ void SPSAOptimizer::GuessParameters(SizeValueType numberOfGradientEstimates, double initialStepSize) { /** Guess A */ this->SetA(static_cast(this->GetMaximumNumberOfIterations()) / 10.0); if (!m_CostFunction) { itkExceptionMacro(<< "No objective function defined! "); } /** The number of parameters: */ const unsigned int spaceDimension = m_CostFunction->GetNumberOfParameters(); /** Check if the initial position has the correct number of parameters */ const ParametersType & initialPosition = this->GetInitialPosition(); if (spaceDimension != initialPosition.GetSize()) { itkExceptionMacro(<< "Number of parameters not correct!"); } /** Estimate the maximum absolute element of the initial gradient */ DerivativeType averageAbsoluteGradient(spaceDimension); averageAbsoluteGradient.Fill(0.0); m_CurrentIteration = 0; for (SizeValueType n = 1; n <= numberOfGradientEstimates; ++n) { this->ComputeGradient(initialPosition, m_Gradient); for (unsigned int j = 0; j < spaceDimension; j++) { averageAbsoluteGradient[j] += std::fabs(m_Gradient[j]); } } // end for ++n averageAbsoluteGradient /= static_cast(numberOfGradientEstimates); /** Set a in order to make the first steps approximately have an initialStepSize */ this->SetSa(initialStepSize * std::pow(m_A + 1.0, m_Alpha) / averageAbsoluteGradient.max_value()); } // end GuessParameters const std::string SPSAOptimizer::GetStopConditionDescription() const { std::ostringstream reason; reason << this->GetNameOfClass() << ": "; switch (m_StopCondition) { case StopConditionSPSAOptimizerEnum::Unknown: reason << "Unknown stop condition"; break; case StopConditionSPSAOptimizerEnum::MaximumNumberOfIterations: reason << "Maximum number of iterations exceeded. Number of iterations is " << m_MaximumNumberOfIterations; break; case StopConditionSPSAOptimizerEnum::BelowTolerance: reason << "Below tolerance. " << "Tolerance is " << m_Tolerance; break; case StopConditionSPSAOptimizerEnum::MetricError: reason << "Metric error"; break; default: reason << " No reason given for termination "; break; } return reason.str(); } /** Print enum values */ std::ostream & operator<<(std::ostream & out, const SPSAOptimizerEnums::StopConditionSPSAOptimizer value) { return out << [value] { switch (value) { case SPSAOptimizerEnums::StopConditionSPSAOptimizer::Unknown: return "itk::SPSAOptimizerEnums::StopConditionSPSAOptimizer::Unknown"; case SPSAOptimizerEnums::StopConditionSPSAOptimizer::MaximumNumberOfIterations: return "itk::SPSAOptimizerEnums::StopConditionSPSAOptimizer::MaximumNumberOfIterations"; case SPSAOptimizerEnums::StopConditionSPSAOptimizer::BelowTolerance: return "itk::SPSAOptimizerEnums::StopConditionSPSAOptimizer::BelowTolerance"; case SPSAOptimizerEnums::StopConditionSPSAOptimizer::MetricError: return "itk::SPSAOptimizerEnums::StopConditionSPSAOptimizer::MetricError"; default: return "INVALID VALUE FOR itk::SPSAOptimizerEnums::StopConditionSPSAOptimizer"; } }(); } } // end namespace itk