NEML2 1.4.0
Loading...
Searching...
No Matches
NewtonWithTrustRegion.cxx
1// Copyright 2023, UChicago Argonne, LLC
2// All Rights Reserved
3// Software Name: NEML2 -- the New Engineering material Model Library, version 2
4// By: Argonne National Laboratory
5// OPEN SOURCE LICENSE (MIT)
6//
7// Permission is hereby granted, free of charge, to any person obtaining a copy
8// of this software and associated documentation files (the "Software"), to deal
9// in the Software without restriction, including without limitation the rights
10// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11// copies of the Software, and to permit persons to whom the Software is
12// furnished to do so, subject to the following conditions:
13//
14// The above copyright notice and this permission notice shall be included in
15// all copies or substantial portions of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23// THE SOFTWARE.
24
25#include "neml2/solvers/NewtonWithTrustRegion.h"
26#include "neml2/misc/math.h"
27
28#include <iomanip>
29
30namespace neml2
31{
32register_NEML2_object(NewtonWithTrustRegion);
33
36{
38 options.doc() =
39 "A trust-region Newton solver. The step size and direction are modified by solving a "
40 "constrained minimization problem using the local quadratic approximation. The default "
41 "solver parameters are chosen based on a limited set of problems we tested on and are "
42 "expected to be tuned.";
43
44 options.set<Real>("delta_0") = 1.0;
45 options.set("delta_0").doc() = "Initial trust region radius";
46
47 options.set<Real>("delta_max") = 10.0;
48 options.set("delta_max").doc() = "Maximum trust region radius";
49
50 options.set<Real>("reduce_criteria") = 0.25;
51 options.set("reduce_criteria").doc() = "The trust region radius is reduced when the merit "
52 "function reduction is below this threshold";
53
54 options.set<Real>("expand_criteria") = 0.75;
55 options.set("expand_criteria").doc() = "The trust region radius is increased when the merit "
56 "function reduction is above this threshold";
57
58 options.set<Real>("reduce_factor") = 0.25;
59 options.set("reduce_factor").doc() = "Factor to apply when reducing the trust region radius";
60
61 options.set<Real>("expand_factor") = 2.0;
62 options.set("expand_factor").doc() = "Factor to apply when increasing the trust region radius";
63
64 options.set<Real>("accept_criteria") = 0.1;
65 options.set("accept_criteria").doc() =
66 "Reject the current step when the merit function reduction is below this threshold";
67
68 options.set<Real>("subproblem_rel_tol") = 1e-6;
69 options.set("subproblem_rel_tol").doc() = "Relative tolerance used for the quadratic sub-problem";
70
71 options.set<Real>("subproblem_abs_tol") = 1e-8;
72 options.set("subproblem_abs_tol").doc() = "Absolute tolerance used for the quadratic sub-problem";
73
74 options.set<unsigned int>("subproblem_max_its") = 10;
75 options.set("subproblem_max_its").doc() =
76 "Maximum number of allowable iterations when solving the quadratic sub-problem";
77
78 return options;
79}
80
82 : Newton(options),
83 _subproblem(subproblem_options(options)),
84 _subproblem_solver(subproblem_solver_options(options)),
85 _delta_0(options.get<Real>("delta_0")),
86 _delta_max(options.get<Real>("delta_max")),
87 _reduce_criteria(options.get<Real>("reduce_criteria")),
88 _expand_criteria(options.get<Real>("expand_criteria")),
89 _reduce_factor(options.get<Real>("reduce_factor")),
90 _expand_factor(options.get<Real>("expand_factor")),
91 _accept_criteria(options.get<Real>("accept_criteria"))
92{
93}
94
97{
98 // By default the nonlinear system turns off automatic scaling (which is what we want here)
100}
101
104{
106 solver_options.set<Real>("abs_tol") = options.get<Real>("subproblem_abs_tol");
107 solver_options.set<Real>("rel_tol") = options.get<Real>("subproblem_rel_tol");
108 solver_options.set<unsigned int>("max_its") = options.get<unsigned int>("subproblem_max_its");
109 return solver_options;
110}
111
112void
114{
115 _delta = Scalar::full(x.batch_sizes(), _delta_0, x.options());
116}
117
118void
120{
121 auto p = solve_direction(system);
122
123 // Predicted reduction in the merit function
124 auto nR = system.residual_norm();
126
127 // Actual reduction in the objective function
128 auto xp = x + system.scale_direction(p);
129 auto [Rp, Jp] = system.residual_and_Jacobian(xp);
130 auto nRp = system.residual_norm();
131 auto red_a = 0.5 * torch::pow(nR, 2.0) - 0.5 * torch::pow(nRp, 2.0);
132
133 // Quality of the subproblem solution compared to the quadratic model
134 auto rho = red_a / red_b;
135
136 // Adjust the trust region based on the quality of the subproblem
140 torch::clamp(_expand_factor * _delta.batch_index({rho > _expand_criteria}),
141 c10::nullopt,
142 _delta_max));
143
144 // Accept or reject the current step
145 auto accept = (rho >= _accept_criteria).unsqueeze(-1);
146
147 // Do some printing if verbose
148 if (verbose)
149 {
150 std::cout << " RHO MIN/MAX : " << std::scientific << torch::min(rho).item<Real>()
151 << "/" << std::scientific << torch::max(rho).item<Real>() << std::endl;
152 std::cout << " ACCEPTANCE RATE : " << torch::sum(accept).item<TorchSize>() << "/"
153 << utils::storage_size(_delta.batch_sizes()) << std::endl;
154 std::cout << " ADJUSTED DELTA MIN/MAX : " << std::scientific
155 << torch::min(_delta).item<Real>() << "/" << std::scientific
156 << torch::max(_delta).item<Real>() << std::endl;
157 }
158
159 x.variable_data().copy_(torch::where(accept, xp, x));
160 system.set_solution(x);
161}
162
165{
166 // The full Newton step
168
169 // The trust region step (obtained by solving the bound constrained subproblem)
171 auto s = _subproblem.solution().clone();
173 s = BatchTensor(torch::clamp(s, 0.0), s.batch_dim());
175
176 // Now select between the two... Basically take the full Newton step whenever possible
179
180 // Do some printing if verbose
181 if (verbose)
182 {
183 std::cout << " TRUST-REGION ITERATIONS: " << iters << std::endl;
184 std::cout << " ACTIVE CONSTRAINTS : " << torch::sum(s > 0).item<TorchSize>() << "/"
185 << utils::storage_size(s.batch_sizes()) << std::endl;
186 }
187
189 p_newton.batch_dim());
190}
191
192Scalar
194 const BatchTensor & p) const
195{
196 auto Jp = math::bmv(system.Jacobian_view(), p);
197 return -math::bvv(system.residual_view(), Jp) - 0.5 * math::bvv(Jp, Jp);
198}
199
200} // namespace neml2
TorchShapeRef batch_sizes() const
Return the batch size.
Definition BatchTensorBase.cxx:149
Derived batch_index(TorchSlice indices) const
Get a batch.
Definition BatchTensorBase.cxx:184
void batch_index_put(TorchSlice indices, const torch::Tensor &other)
Set a index sliced on the batch dimensions to a value.
Definition BatchTensorBase.cxx:202
Definition BatchTensor.h:32
The wrapper (decorator) for cross-referencing unresolved values at parse time.
Definition CrossRef.h:52
static Scalar full(Real init, const torch::TensorOptions &options=default_tensor_options())
Unbatched tensor filled with a given value given base shape.
Definition FixedDimTensor.h:176
The nonlinear solver solves a nonlinear system of equations.
Definition NewtonWithTrustRegion.h:47
Newton _subproblem_solver
Solver used to solver the trust-region subproblem.
Definition NewtonWithTrustRegion.h:73
virtual BatchTensor solve_direction(const NonlinearSystem &system) override
Find the current update direction.
Definition NewtonWithTrustRegion.cxx:164
Real _reduce_criteria
Criteria for reducing the trust region.
Definition NewtonWithTrustRegion.h:85
Real _reduce_factor
Cutback factor if we do reduce the trust region.
Definition NewtonWithTrustRegion.h:91
TrustRegionSubProblem _subproblem
Trust-region subproblem.
Definition NewtonWithTrustRegion.h:70
OptionSet subproblem_solver_options(const OptionSet &) const
Extract options for the subproblem solver.
Definition NewtonWithTrustRegion.cxx:103
Real _delta_max
Maximum size of the trust region.
Definition NewtonWithTrustRegion.h:82
Scalar merit_function_reduction(const NonlinearSystem &system, const BatchTensor &p) const
Reduction in the merit function.
Definition NewtonWithTrustRegion.cxx:193
Real _delta_0
Initial size of the trust region.
Definition NewtonWithTrustRegion.h:79
OptionSet subproblem_options(const OptionSet &) const
Extract options for the subproblem.
Definition NewtonWithTrustRegion.cxx:96
NewtonWithTrustRegion(const OptionSet &options)
Definition NewtonWithTrustRegion.cxx:81
Real _expand_factor
Expansion factor if we do increase the trust region.
Definition NewtonWithTrustRegion.h:94
Scalar _delta
The trust region radius.
Definition NewtonWithTrustRegion.h:76
Real _accept_criteria
Acceptance criteria for a step.
Definition NewtonWithTrustRegion.h:97
virtual void update(NonlinearSystem &system, BatchTensor &x) override
Update trial solution.
Definition NewtonWithTrustRegion.cxx:119
static OptionSet expected_options()
Definition NewtonWithTrustRegion.cxx:35
Real _expand_criteria
Criteria for expanding the trust region.
Definition NewtonWithTrustRegion.h:88
virtual void prepare(const NonlinearSystem &system, const BatchTensor &x) override
Prepare solver internal data before the iterative update.
Definition NewtonWithTrustRegion.cxx:113
The nonlinear solver solves a nonlinear system of equations.
Definition Newton.h:39
virtual std::tuple< bool, size_t > solve(NonlinearSystem &system, BatchTensor &x) override
Solve the given nonlinear system.
Definition Newton.cxx:48
static OptionSet expected_options()
Definition Newton.cxx:34
virtual BatchTensor solve_direction(const NonlinearSystem &system)
Find the current update direction.
Definition Newton.cxx:121
Definition of a nonlinear system of equations.
Definition NonlinearSystem.h:37
static OptionSet expected_options()
Definition NonlinearSystem.cxx:31
virtual BatchTensor solution() const
Get the solution vector.
Definition NonlinearSystem.h:66
A custom map-like data structure. The keys are strings, and the values can be nonhomogeneously typed.
Definition OptionSet.h:59
const std::string & doc() const
A readonly reference to the option set's docstring.
Definition OptionSet.h:91
const T & get(const std::string &) const
Definition OptionSet.h:422
T & set(const std::string &)
Definition OptionSet.h:436
The (logical) scalar.
Definition Scalar.h:38
const bool verbose
Whether to print additional (debugging) information during the solve.
Definition Solver.h:49
virtual void reinit(const NonlinearSystem &system, const Scalar &delta)
Definition TrustRegionSubProblem.cxx:36
BatchTensor preconditioned_direction(const Scalar &s) const
Definition TrustRegionSubProblem.cxx:74
BatchTensor vector_norm(const BatchTensor &v)
Vector norm of a vector. Falls back to math::abs is v is a Scalar.
Definition math.cxx:317
BatchTensor bvv(const BatchTensor &a, const BatchTensor &b)
Batched vector-vector (dot) product.
Definition BatchTensor.cxx:137
BatchTensor bmv(const BatchTensor &a, const BatchTensor &v)
Batched matrix-vector product.
Definition BatchTensor.cxx:122
Derived sqrt(const Derived &a)
Definition BatchTensorBase.h:439
TorchSize storage_size(TorchShapeRef shape)
The flattened storage size of a tensor with given shape.
Definition utils.cxx:32
Definition CrossRef.cxx:32