pr-preview/pr-152/NewtonWithTrustRegion_8cxx_source.html

// Copyright 2023, UChicago Argonne, LLC

// All Rights Reserved

// Software Name: NEML2 -- the New Engineering material Model Library, version 2

// By: Argonne National Laboratory

// OPEN SOURCE LICENSE (MIT)

//

// Permission is hereby granted, free of charge, to any person obtaining a copy

// of this software and associated documentation files (the "Software"), to deal

// in the Software without restriction, including without limitation the rights

// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

// copies of the Software, and to permit persons to whom the Software is

// furnished to do so, subject to the following conditions:

//

// The above copyright notice and this permission notice shall be included in

// all copies or substantial portions of the Software.

//

// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN

// THE SOFTWARE.


#include "neml2/solvers/NewtonWithTrustRegion.h"

#include <iomanip>

#include "neml2/misc/math.h"


namespace neml2

{

register_NEML2_object(NewtonWithTrustRegion);


OptionSet


NewtonWithTrustRegion::expected_options()

{

  OptionSet options = Newton::expected_options();

  options.doc() =

      "A trust-region Newton solver. The step size and direction are modified by solving a "

      "constrained minimization problem using the local quadratic approximation. The default "

      "solver parameters are chosen based on a limited set of problems we tested on and are "

      "expected to be tuned.";


  options.set<Real>("delta_0") = 1.0;

  options.set("delta_0").doc() = "Initial trust region radius";


  options.set<Real>("delta_max") = 10.0;

  options.set("delta_max").doc() = "Maximum trust region radius";


  options.set<Real>("reduce_criteria") = 0.25;

  options.set("reduce_criteria").doc() = "The trust region radius is reduced when the merit "

                                         "function reduction is below this threshold";


  options.set<Real>("expand_criteria") = 0.75;

  options.set("expand_criteria").doc() = "The trust region radius is increased when the merit "

                                         "function reduction is above this threshold";


  options.set<Real>("reduce_factor") = 0.25;

  options.set("reduce_factor").doc() = "Factor to apply when reducing the trust region radius";


  options.set<Real>("expand_factor") = 2.0;

  options.set("expand_factor").doc() = "Factor to apply when increasing the trust region radius";


  options.set<Real>("accept_criteria") = 0.1;

  options.set("accept_criteria").doc() =

      "Reject the current step when the merit function reduction is below this threshold";


  options.set<Real>("subproblem_rel_tol") = 1e-6;

  options.set("subproblem_rel_tol").doc() = "Relative tolerance used for the quadratic sub-problem";


  options.set<Real>("subproblem_abs_tol") = 1e-8;

  options.set("subproblem_abs_tol").doc() = "Absolute tolerance used for the quadratic sub-problem";


  options.set<unsigned int>("subproblem_max_its") = 10;

  options.set("subproblem_max_its").doc() =

      "Maximum number of allowable iterations when solving the quadratic sub-problem";


  return options;

}


NewtonWithTrustRegion::NewtonWithTrustRegion(const OptionSet & options)

  : Newton(options),

    _subproblem(subproblem_options(options)),

    _subproblem_solver(subproblem_solver_options(options)),

    _delta_0(options.get<Real>("delta_0")),

    _delta_max(options.get<Real>("delta_max")),

    _reduce_criteria(options.get<Real>("reduce_criteria")),

    _expand_criteria(options.get<Real>("expand_criteria")),

    _reduce_factor(options.get<Real>("reduce_factor")),

    _expand_factor(options.get<Real>("expand_factor")),

    _accept_criteria(options.get<Real>("accept_criteria"))

{

}


OptionSet


NewtonWithTrustRegion::subproblem_options(const OptionSet & /*options*/) const

{

  // By default the nonlinear system turns off automatic scaling (which is what we want here)

  return TrustRegionSubProblem::expected_options();

}


OptionSet


NewtonWithTrustRegion::subproblem_solver_options(const OptionSet & options) const

{

  auto solver_options = Newton::expected_options();

  solver_options.set<Real>("abs_tol") = options.get<Real>("subproblem_abs_tol");

  solver_options.set<Real>("rel_tol") = options.get<Real>("subproblem_rel_tol");

  solver_options.set<unsigned int>("max_its") = options.get<unsigned int>("subproblem_max_its");

  return solver_options;

}


void


NewtonWithTrustRegion::prepare(const NonlinearSystem & /*system*/, const BatchTensor & x)

{

  _delta = Scalar::full(x.batch_sizes(), _delta_0, x.options());

}


void


NewtonWithTrustRegion::update(NonlinearSystem & system, BatchTensor & x)

{

  auto p = solve_direction(system);


  // Predicted reduction in the merit function

  auto nR = system.residual_norm();

  auto red_b = merit_function_reduction(system, p);


  // Actual reduction in the objective function

  auto xp = x + system.scale_direction(p);

  auto [Rp, Jp] = system.residual_and_Jacobian(xp);

  auto nRp = system.residual_norm();

  auto red_a = 0.5 * torch::pow(nR, 2.0) - 0.5 * torch::pow(nRp, 2.0);


  // Quality of the subproblem solution compared to the quadratic model

  auto rho = red_a / red_b;


  // Adjust the trust region based on the quality of the subproblem

  _delta.batch_index_put({rho < _reduce_criteria},

                         _reduce_factor * _delta.batch_index({rho < _reduce_criteria}));

  _delta.batch_index_put({rho > _expand_criteria},

                         torch::clamp(_expand_factor * _delta.batch_index({rho > _expand_criteria}),

                                      c10::nullopt,

                                      _delta_max));


  // Accept or reject the current step

  auto accept = (rho >= _accept_criteria).unsqueeze(-1);


  // Do some printing if verbose

  if (verbose)

  {

    std::cout << "     RHO MIN/MAX            : " << std::scientific << torch::min(rho).item<Real>()

              << "/" << std::scientific << torch::max(rho).item<Real>() << std::endl;

    std::cout << "     ACCEPTANCE RATE        : " << torch::sum(accept).item<TorchSize>() << "/"

              << utils::storage_size(_delta.batch_sizes()) << std::endl;

    std::cout << "     ADJUSTED DELTA MIN/MAX : " << std::scientific

              << torch::min(_delta).item<Real>() << "/" << std::scientific

              << torch::max(_delta).item<Real>() << std::endl;

  }


  x.variable_data().copy_(torch::where(accept, xp, x));

  system.set_solution(x);

}


BatchTensor


NewtonWithTrustRegion::solve_direction(const NonlinearSystem & system)

{

  // The full Newton step

  auto p_newton = Newton::solve_direction(system);


  // The trust region step (obtained by solving the bound constrained subproblem)

  _subproblem.reinit(system, _delta);

  auto s = _subproblem.solution().clone();

  auto [succeeded, iters] = _subproblem_solver.solve(_subproblem, s);

  s = BatchTensor(torch::clamp(s, 0.0), s.batch_dim());

  auto p_trust = -_subproblem.preconditioned_direction(s);


  // Now select between the two... Basically take the full Newton step whenever possible

  auto newton_inside_trust_region =

      (torch::linalg::vector_norm(p_newton, 2, -1, false, c10::nullopt) <= math::sqrt(2.0 * _delta))

          .unsqueeze(-1);


  // Do some printing if verbose

  if (verbose)

  {

    std::cout << "     TRUST-REGION ITERATIONS: " << iters << std::endl;

    std::cout << "     ACTIVE CONSTRAINTS     : " << torch::sum(s > 0).item<TorchSize>() << "/"

              << utils::storage_size(s.batch_sizes()) << std::endl;

  }


  return BatchTensor(torch::where(newton_inside_trust_region, p_newton, p_trust),

                     p_newton.batch_dim());

}


Scalar


NewtonWithTrustRegion::merit_function_reduction(const NonlinearSystem & system,

                                                const BatchTensor & p) const

{

  auto Jp = math::bmv(system.Jacobian_view(), p);

  return -math::bvv(system.residual_view(), Jp) - 0.5 * math::bvv(Jp, Jp);

}


} // namespace neml2

neml2::BatchTensorBase::batch_sizes
TorchShapeRef batch_sizes() const
Return the batch size.
Definition BatchTensorBase.cxx:149

neml2::BatchTensorBase::batch_index
Derived batch_index(TorchSlice indices) const
Get a batch.
Definition BatchTensorBase.cxx:184

neml2::BatchTensorBase::batch_index_put
void batch_index_put(TorchSlice indices, const torch::Tensor &other)
Set a index sliced on the batch dimensions to a value.
Definition BatchTensorBase.cxx:202

neml2::BatchTensor
Definition BatchTensor.h:32

neml2::CrossRef
The wrapper (decorator) for cross-referencing unresolved values at parse time.
Definition CrossRef.h:52

neml2::FixedDimTensor< Scalar >::full
static Scalar full(Real init, const torch::TensorOptions &options=default_tensor_options())
Unbatched tensor filled with a given value given base shape.
Definition FixedDimTensor.h:176

neml2::NewtonWithTrustRegion
The nonlinear solver solves a nonlinear system of equations.
Definition NewtonWithTrustRegion.h:47

neml2::NewtonWithTrustRegion::_subproblem_solver
Newton _subproblem_solver
Solver used to solver the trust-region subproblem.
Definition NewtonWithTrustRegion.h:73

neml2::NewtonWithTrustRegion::solve_direction
virtual BatchTensor solve_direction(const NonlinearSystem &system) override
Find the current update direction.
Definition NewtonWithTrustRegion.cxx:163

neml2::NewtonWithTrustRegion::_reduce_criteria
Real _reduce_criteria
Criteria for reducing the trust region.
Definition NewtonWithTrustRegion.h:85

neml2::NewtonWithTrustRegion::_reduce_factor
Real _reduce_factor
Cutback factor if we do reduce the trust region.
Definition NewtonWithTrustRegion.h:91

neml2::NewtonWithTrustRegion::_subproblem
TrustRegionSubProblem _subproblem
Trust-region subproblem.
Definition NewtonWithTrustRegion.h:70

neml2::NewtonWithTrustRegion::subproblem_solver_options
OptionSet subproblem_solver_options(const OptionSet &) const
Extract options for the subproblem solver.
Definition NewtonWithTrustRegion.cxx:102

neml2::NewtonWithTrustRegion::_delta_max
Real _delta_max
Maximum size of the trust region.
Definition NewtonWithTrustRegion.h:82

neml2::NewtonWithTrustRegion::merit_function_reduction
Scalar merit_function_reduction(const NonlinearSystem &system, const BatchTensor &p) const
Reduction in the merit function.
Definition NewtonWithTrustRegion.cxx:193

neml2::NewtonWithTrustRegion::_delta_0
Real _delta_0
Initial size of the trust region.
Definition NewtonWithTrustRegion.h:79

neml2::NewtonWithTrustRegion::subproblem_options
OptionSet subproblem_options(const OptionSet &) const
Extract options for the subproblem.
Definition NewtonWithTrustRegion.cxx:95

neml2::NewtonWithTrustRegion::NewtonWithTrustRegion
NewtonWithTrustRegion(const OptionSet &options)
Definition NewtonWithTrustRegion.cxx:80

neml2::NewtonWithTrustRegion::_expand_factor
Real _expand_factor
Expansion factor if we do increase the trust region.
Definition NewtonWithTrustRegion.h:94

neml2::NewtonWithTrustRegion::_delta
Scalar _delta
The trust region radius.
Definition NewtonWithTrustRegion.h:76

neml2::NewtonWithTrustRegion::_accept_criteria
Real _accept_criteria
Acceptance criteria for a step.
Definition NewtonWithTrustRegion.h:97

neml2::NewtonWithTrustRegion::update
virtual void update(NonlinearSystem &system, BatchTensor &x) override
Update trial solution.
Definition NewtonWithTrustRegion.cxx:118

neml2::NewtonWithTrustRegion::expected_options
static OptionSet expected_options()
Definition NewtonWithTrustRegion.cxx:34

neml2::NewtonWithTrustRegion::_expand_criteria
Real _expand_criteria
Criteria for expanding the trust region.
Definition NewtonWithTrustRegion.h:88

neml2::NewtonWithTrustRegion::prepare
virtual void prepare(const NonlinearSystem &system, const BatchTensor &x) override
Prepare solver internal data before the iterative update.
Definition NewtonWithTrustRegion.cxx:112

neml2::Newton
The nonlinear solver solves a nonlinear system of equations.
Definition Newton.h:39

neml2::Newton::solve
virtual std::tuple< bool, size_t > solve(NonlinearSystem &system, BatchTensor &x) override
Solve the given nonlinear system.
Definition Newton.cxx:48

neml2::Newton::expected_options
static OptionSet expected_options()
Definition Newton.cxx:34

neml2::Newton::solve_direction
virtual BatchTensor solve_direction(const NonlinearSystem &system)
Find the current update direction.
Definition Newton.cxx:121

neml2::NonlinearSystem
Definition of a nonlinear system of equations.
Definition NonlinearSystem.h:37

neml2::NonlinearSystem::expected_options
static OptionSet expected_options()
Definition NonlinearSystem.cxx:31

neml2::NonlinearSystem::solution
virtual BatchTensor solution() const
Get the solution vector.
Definition NonlinearSystem.h:66

neml2::OptionSet
A custom map-like data structure. The keys are strings, and the values can be nonhomogeneously typed.
Definition OptionSet.h:59

neml2::OptionSet::doc
const std::string & doc() const
A readonly reference to the option set's docstring.
Definition OptionSet.h:91

neml2::OptionSet::get
const T & get(const std::string &) const
Definition OptionSet.h:422

neml2::OptionSet::set
T & set(const std::string &)
Definition OptionSet.h:436

neml2::Scalar
The (logical) scalar.
Definition Scalar.h:38

neml2::Solver::verbose
const bool verbose
Whether to print additional (debugging) information during the solve.
Definition Solver.h:49

neml2::TrustRegionSubProblem::reinit
virtual void reinit(const NonlinearSystem &system, const Scalar &delta)
Definition TrustRegionSubProblem.cxx:36

neml2::TrustRegionSubProblem::preconditioned_direction
BatchTensor preconditioned_direction(const Scalar &s) const
Definition TrustRegionSubProblem.cxx:74

neml2::math::bvv
BatchTensor bvv(const BatchTensor &a, const BatchTensor &b)
Batched vector-vector (dot) product.
Definition BatchTensor.cxx:137

neml2::math::bmv
BatchTensor bmv(const BatchTensor &a, const BatchTensor &v)
Batched matrix-vector product.
Definition BatchTensor.cxx:122

neml2::math::sqrt
Derived sqrt(const Derived &a)
Definition BatchTensorBase.h:439

neml2::utils::storage_size
TorchSize storage_size(TorchShapeRef shape)
The flattened storage size of a tensor with given shape.
Definition utils.cxx:32

neml2
Definition CrossRef.cxx:32