NEML2 1.4.0
Loading...
Searching...
No Matches
NewtonWithTrustRegion.cxx
1// Copyright 2023, UChicago Argonne, LLC
2// All Rights Reserved
3// Software Name: NEML2 -- the New Engineering material Model Library, version 2
4// By: Argonne National Laboratory
5// OPEN SOURCE LICENSE (MIT)
6//
7// Permission is hereby granted, free of charge, to any person obtaining a copy
8// of this software and associated documentation files (the "Software"), to deal
9// in the Software without restriction, including without limitation the rights
10// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11// copies of the Software, and to permit persons to whom the Software is
12// furnished to do so, subject to the following conditions:
13//
14// The above copyright notice and this permission notice shall be included in
15// all copies or substantial portions of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23// THE SOFTWARE.
24
25#include "neml2/solvers/NewtonWithTrustRegion.h"
26#include <iomanip>
27#include "neml2/misc/math.h"
28
29namespace neml2
30{
31register_NEML2_object(NewtonWithTrustRegion);
32
35{
37 options.doc() =
38 "A trust-region Newton solver. The step size and direction are modified by solving a "
39 "constrained minimization problem using the local quadratic approximation. The default "
40 "solver parameters are chosen based on a limited set of problems we tested on and are "
41 "expected to be tuned.";
42
43 options.set<Real>("delta_0") = 1.0;
44 options.set("delta_0").doc() = "Initial trust region radius";
45
46 options.set<Real>("delta_max") = 10.0;
47 options.set("delta_max").doc() = "Maximum trust region radius";
48
49 options.set<Real>("reduce_criteria") = 0.25;
50 options.set("reduce_criteria").doc() = "The trust region radius is reduced when the merit "
51 "function reduction is below this threshold";
52
53 options.set<Real>("expand_criteria") = 0.75;
54 options.set("expand_criteria").doc() = "The trust region radius is increased when the merit "
55 "function reduction is above this threshold";
56
57 options.set<Real>("reduce_factor") = 0.25;
58 options.set("reduce_factor").doc() = "Factor to apply when reducing the trust region radius";
59
60 options.set<Real>("expand_factor") = 2.0;
61 options.set("expand_factor").doc() = "Factor to apply when increasing the trust region radius";
62
63 options.set<Real>("accept_criteria") = 0.1;
64 options.set("accept_criteria").doc() =
65 "Reject the current step when the merit function reduction is below this threshold";
66
67 options.set<Real>("subproblem_rel_tol") = 1e-6;
68 options.set("subproblem_rel_tol").doc() = "Relative tolerance used for the quadratic sub-problem";
69
70 options.set<Real>("subproblem_abs_tol") = 1e-8;
71 options.set("subproblem_abs_tol").doc() = "Absolute tolerance used for the quadratic sub-problem";
72
73 options.set<unsigned int>("subproblem_max_its") = 10;
74 options.set("subproblem_max_its").doc() =
75 "Maximum number of allowable iterations when solving the quadratic sub-problem";
76
77 return options;
78}
79
81 : Newton(options),
82 _subproblem(subproblem_options(options)),
83 _subproblem_solver(subproblem_solver_options(options)),
84 _delta_0(options.get<Real>("delta_0")),
85 _delta_max(options.get<Real>("delta_max")),
86 _reduce_criteria(options.get<Real>("reduce_criteria")),
87 _expand_criteria(options.get<Real>("expand_criteria")),
88 _reduce_factor(options.get<Real>("reduce_factor")),
89 _expand_factor(options.get<Real>("expand_factor")),
90 _accept_criteria(options.get<Real>("accept_criteria"))
91{
92}
93
96{
97 // By default the nonlinear system turns off automatic scaling (which is what we want here)
99}
100
103{
105 solver_options.set<Real>("abs_tol") = options.get<Real>("subproblem_abs_tol");
106 solver_options.set<Real>("rel_tol") = options.get<Real>("subproblem_rel_tol");
107 solver_options.set<unsigned int>("max_its") = options.get<unsigned int>("subproblem_max_its");
108 return solver_options;
109}
110
111void
113{
114 _delta = Scalar::full(x.batch_sizes(), _delta_0, x.options());
115}
116
117void
119{
120 auto p = solve_direction(system);
121
122 // Predicted reduction in the merit function
123 auto nR = system.residual_norm();
125
126 // Actual reduction in the objective function
127 auto xp = x + system.scale_direction(p);
128 auto [Rp, Jp] = system.residual_and_Jacobian(xp);
129 auto nRp = system.residual_norm();
130 auto red_a = 0.5 * torch::pow(nR, 2.0) - 0.5 * torch::pow(nRp, 2.0);
131
132 // Quality of the subproblem solution compared to the quadratic model
133 auto rho = red_a / red_b;
134
135 // Adjust the trust region based on the quality of the subproblem
139 torch::clamp(_expand_factor * _delta.batch_index({rho > _expand_criteria}),
140 c10::nullopt,
141 _delta_max));
142
143 // Accept or reject the current step
144 auto accept = (rho >= _accept_criteria).unsqueeze(-1);
145
146 // Do some printing if verbose
147 if (verbose)
148 {
149 std::cout << " RHO MIN/MAX : " << std::scientific << torch::min(rho).item<Real>()
150 << "/" << std::scientific << torch::max(rho).item<Real>() << std::endl;
151 std::cout << " ACCEPTANCE RATE : " << torch::sum(accept).item<TorchSize>() << "/"
152 << utils::storage_size(_delta.batch_sizes()) << std::endl;
153 std::cout << " ADJUSTED DELTA MIN/MAX : " << std::scientific
154 << torch::min(_delta).item<Real>() << "/" << std::scientific
155 << torch::max(_delta).item<Real>() << std::endl;
156 }
157
158 x.variable_data().copy_(torch::where(accept, xp, x));
159 system.set_solution(x);
160}
161
164{
165 // The full Newton step
167
168 // The trust region step (obtained by solving the bound constrained subproblem)
170 auto s = _subproblem.solution().clone();
172 s = BatchTensor(torch::clamp(s, 0.0), s.batch_dim());
174
175 // Now select between the two... Basically take the full Newton step whenever possible
177 (torch::linalg::vector_norm(p_newton, 2, -1, false, c10::nullopt) <= math::sqrt(2.0 * _delta))
178 .unsqueeze(-1);
179
180 // Do some printing if verbose
181 if (verbose)
182 {
183 std::cout << " TRUST-REGION ITERATIONS: " << iters << std::endl;
184 std::cout << " ACTIVE CONSTRAINTS : " << torch::sum(s > 0).item<TorchSize>() << "/"
185 << utils::storage_size(s.batch_sizes()) << std::endl;
186 }
187
189 p_newton.batch_dim());
190}
191
192Scalar
194 const BatchTensor & p) const
195{
196 auto Jp = math::bmv(system.Jacobian_view(), p);
197 return -math::bvv(system.residual_view(), Jp) - 0.5 * math::bvv(Jp, Jp);
198}
199
200} // namespace neml2
TorchShapeRef batch_sizes() const
Return the batch size.
Definition BatchTensorBase.cxx:149
Derived batch_index(TorchSlice indices) const
Get a batch.
Definition BatchTensorBase.cxx:184
void batch_index_put(TorchSlice indices, const torch::Tensor &other)
Set a index sliced on the batch dimensions to a value.
Definition BatchTensorBase.cxx:202
Definition BatchTensor.h:32
The wrapper (decorator) for cross-referencing unresolved values at parse time.
Definition CrossRef.h:52
static Scalar full(Real init, const torch::TensorOptions &options=default_tensor_options())
Unbatched tensor filled with a given value given base shape.
Definition FixedDimTensor.h:176
The nonlinear solver solves a nonlinear system of equations.
Definition NewtonWithTrustRegion.h:47
Newton _subproblem_solver
Solver used to solver the trust-region subproblem.
Definition NewtonWithTrustRegion.h:73
virtual BatchTensor solve_direction(const NonlinearSystem &system) override
Find the current update direction.
Definition NewtonWithTrustRegion.cxx:163
Real _reduce_criteria
Criteria for reducing the trust region.
Definition NewtonWithTrustRegion.h:85
Real _reduce_factor
Cutback factor if we do reduce the trust region.
Definition NewtonWithTrustRegion.h:91
TrustRegionSubProblem _subproblem
Trust-region subproblem.
Definition NewtonWithTrustRegion.h:70
OptionSet subproblem_solver_options(const OptionSet &) const
Extract options for the subproblem solver.
Definition NewtonWithTrustRegion.cxx:102
Real _delta_max
Maximum size of the trust region.
Definition NewtonWithTrustRegion.h:82
Scalar merit_function_reduction(const NonlinearSystem &system, const BatchTensor &p) const
Reduction in the merit function.
Definition NewtonWithTrustRegion.cxx:193
Real _delta_0
Initial size of the trust region.
Definition NewtonWithTrustRegion.h:79
OptionSet subproblem_options(const OptionSet &) const
Extract options for the subproblem.
Definition NewtonWithTrustRegion.cxx:95
NewtonWithTrustRegion(const OptionSet &options)
Definition NewtonWithTrustRegion.cxx:80
Real _expand_factor
Expansion factor if we do increase the trust region.
Definition NewtonWithTrustRegion.h:94
Scalar _delta
The trust region radius.
Definition NewtonWithTrustRegion.h:76
Real _accept_criteria
Acceptance criteria for a step.
Definition NewtonWithTrustRegion.h:97
virtual void update(NonlinearSystem &system, BatchTensor &x) override
Update trial solution.
Definition NewtonWithTrustRegion.cxx:118
static OptionSet expected_options()
Definition NewtonWithTrustRegion.cxx:34
Real _expand_criteria
Criteria for expanding the trust region.
Definition NewtonWithTrustRegion.h:88
virtual void prepare(const NonlinearSystem &system, const BatchTensor &x) override
Prepare solver internal data before the iterative update.
Definition NewtonWithTrustRegion.cxx:112
The nonlinear solver solves a nonlinear system of equations.
Definition Newton.h:39
virtual std::tuple< bool, size_t > solve(NonlinearSystem &system, BatchTensor &x) override
Solve the given nonlinear system.
Definition Newton.cxx:48
static OptionSet expected_options()
Definition Newton.cxx:34
virtual BatchTensor solve_direction(const NonlinearSystem &system)
Find the current update direction.
Definition Newton.cxx:121
Definition of a nonlinear system of equations.
Definition NonlinearSystem.h:37
static OptionSet expected_options()
Definition NonlinearSystem.cxx:31
virtual BatchTensor solution() const
Get the solution vector.
Definition NonlinearSystem.h:66
A custom map-like data structure. The keys are strings, and the values can be nonhomogeneously typed.
Definition OptionSet.h:59
const std::string & doc() const
A readonly reference to the option set's docstring.
Definition OptionSet.h:91
const T & get(const std::string &) const
Definition OptionSet.h:422
T & set(const std::string &)
Definition OptionSet.h:436
The (logical) scalar.
Definition Scalar.h:38
const bool verbose
Whether to print additional (debugging) information during the solve.
Definition Solver.h:49
virtual void reinit(const NonlinearSystem &system, const Scalar &delta)
Definition TrustRegionSubProblem.cxx:36
BatchTensor preconditioned_direction(const Scalar &s) const
Definition TrustRegionSubProblem.cxx:74
BatchTensor bvv(const BatchTensor &a, const BatchTensor &b)
Batched vector-vector (dot) product.
Definition BatchTensor.cxx:137
BatchTensor bmv(const BatchTensor &a, const BatchTensor &v)
Batched matrix-vector product.
Definition BatchTensor.cxx:122
Derived sqrt(const Derived &a)
Definition BatchTensorBase.h:439
TorchSize storage_size(TorchShapeRef shape)
The flattened storage size of a tensor with given shape.
Definition utils.cxx:32
Definition CrossRef.cxx:32