[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
question about the trust region in the gsl library.
From: |
Aleja Carmento |
Subject: |
question about the trust region in the gsl library. |
Date: |
Tue, 2 May 2023 13:29:49 +0200 |
Dear Sir/Madam,
I am currently working on a gsl least squares method. In this specific case
, i have it set to the levenberg-marquardt with geodesic acceleration. i
used the same example from the gsl website and added a few changes to it
like so :
#include <stdlib.h>#include <stdio.h>#include
<gsl/gsl_vector.h>#include <gsl/gsl_matrix.h>#include
<gsl/gsl_blas.h>#include <gsl/gsl_multifit_nlinear.h>#include
<gsl/gsl_rng.h>#include <gsl/gsl_errno.h>#include <gsl/gsl_randist.h>
#define max_iterations 100
struct data
{
double* t;
double* y;
size_t n;
};
static double scaled_norm(const gsl_vector* D, const gsl_vector* a)
{
const size_t n = a->size;
double e2 = 0.0;
size_t i;
for (i = 0; i < n; ++i) {
double Di = gsl_vector_get(D, i);
double ai = gsl_vector_get(a, i);
double u = Di * ai;
e2 += u * u;
}
return sqrt(e2);
}
/* model function: a * exp( -1/2 * [ (t - b) / c ]^2 )
*/doublegaussian(const double a, const double b, const double c, const
double t)
{
const double z = (t - b) / c;
return (a * exp(-0.5 * z * z));
}
intfunc_f(const gsl_vector* x, void* params, gsl_vector* f)
{
struct data* d = (struct data*)params;
double a = gsl_vector_get(x, 0);
double b = gsl_vector_get(x, 1);
double c = gsl_vector_get(x, 2);
size_t i;
for (i = 0; i < d->n; ++i)
{
double ti = d->t[i];
double yi = d->y[i];
double y = gaussian(a, b, c, ti);
gsl_vector_set(f, i, yi - y);
}
return GSL_SUCCESS;
}
intfunc_df(const gsl_vector* x, void* params, gsl_matrix* J)
{
struct data* d = (struct data*)params;
double a = gsl_vector_get(x, 0);
double b = gsl_vector_get(x, 1);
double c = gsl_vector_get(x, 2);
size_t i;
for (i = 0; i < d->n; ++i)
{
double ti = d->t[i];
double zi = (ti - b) / c;
double ei = exp(-0.5 * zi * zi);
gsl_matrix_set(J, i, 0, -ei);
gsl_matrix_set(J, i, 1, -(a / c) * ei * zi);
gsl_matrix_set(J, i, 2, -(a / c) * ei * zi * zi);
}
return GSL_SUCCESS;
}
intfunc_fvv(const gsl_vector* x, const gsl_vector* v,
void* params, gsl_vector* fvv)
{
struct data* d = (struct data*)params;
double a = gsl_vector_get(x, 0);
double b = gsl_vector_get(x, 1);
double c = gsl_vector_get(x, 2);
double va = gsl_vector_get(v, 0);
double vb = gsl_vector_get(v, 1);
double vc = gsl_vector_get(v, 2);
size_t i;
for (i = 0; i < d->n; ++i)
{
double ti = d->t[i];
double zi = (ti - b) / c;
double ei = exp(-0.5 * zi * zi);
double Dab = -zi * ei / c;
double Dac = -zi * zi * ei / c;
double Dbb = a * ei / (c * c) * (1.0 - zi * zi);
double Dbc = a * zi * ei / (c * c) * (2.0 - zi * zi);
double Dcc = a * zi * zi * ei / (c * c) * (3.0 - zi * zi);
double sum;
sum = 2.0 * va * vb * Dab +
2.0 * va * vc * Dac +
vb * vb * Dbb +
2.0 * vb * vc * Dbc +
vc * vc * Dcc;
gsl_vector_set(fvv, i, sum);
}
return GSL_SUCCESS;
}
void callback(const size_t iter, void* params,
const gsl_multifit_nlinear_workspace* w)
{
gsl_vector* f = gsl_multifit_nlinear_residual(w);
gsl_vector* x = gsl_multifit_nlinear_position(w);
double avratio = gsl_multifit_nlinear_avratio(w);
double rcond;
(void)params; /* not used */
gsl_multifit_nlinear_trust_state* state =
(gsl_multifit_nlinear_trust_state*)w->state;
/* compute reciprocal condition number of J(x) */
gsl_multifit_nlinear_rcond(&rcond, w);
fprintf(stderr, "iter %2zu: a = %.4f, b = %.4f, c = %.4f, |a|/|v|
= %.4f cond(J) = %8.4f, |f(x)| = %.4f\n",
iter,
gsl_vector_get(x, 0),
gsl_vector_get(x, 1),
gsl_vector_get(x, 2),
avratio,
1.0 / rcond,
gsl_blas_dnrm2(f));
}
voidsolve_system(gsl_vector* x, gsl_multifit_nlinear_fdf* fdf,
gsl_multifit_nlinear_parameters* params)
{
const gsl_multifit_nlinear_type* T = gsl_multifit_nlinear_trust;
const size_t max_iter = 200;
const double xtol = 1.0e-8;
const double gtol = 1.0e-8;
const double ftol = 1.0e-8;
const size_t n = fdf->n;
const size_t p = fdf->p;
size_t iter = 0;
int status, info;
gsl_multifit_nlinear_workspace* work =
gsl_multifit_nlinear_alloc(T, params, n, p);
gsl_vector* f = gsl_multifit_nlinear_residual(work);
gsl_vector* y = gsl_multifit_nlinear_position(work);
double chisq0, chisq, rcond;
double trust_radius_list[max_iterations];
double step_size_list[max_iterations];
/* initialize solver */
gsl_multifit_nlinear_init(x, fdf, work);
/* store initial cost */
gsl_blas_ddot(f, f, &chisq0);
/* iterate until convergence */
if (callback) callback(iter, NULL, work);
/*initialize scale and trust region*/
gsl_vector* diag = gsl_vector_alloc(p);
work->params.scale->init(work->J, diag);
double scale = scaled_norm(diag, work->x);
double trust_region_radius = 0.3 * GSL_MAX(1.0, scale);
trust_radius_list[0] = trust_region_radius; // initialize the
trust region radius
step_size_list[0] = gsl_blas_dnrm2(work->dx);
do
{
status = gsl_multifit_nlinear_iterate(work);
/*update the trust region information*/
work->params.scale->update(work->J, diag);
scale = scaled_norm(diag, work->dx);
trust_region_radius = scale;
trust_radius_list[iter + 1] = trust_region_radius;
step_size_list[iter + 1] = gsl_blas_dnrm2(work->dx);
if (status == GSL_ENOPROG && iter == 0)
{
info = status;
}
++iter;
if (callback) callback(iter,NULL, work);
/* test for convergence */
status = gsl_multifit_nlinear_test(xtol, gtol, ftol, &info, work);
} while (status == GSL_CONTINUE && iter < max_iter);
if (status == GSL_ETOLF || status == GSL_ETOLX || status == GSL_ETOLG)
{
info = status;
status = GSL_SUCCESS;
}
/* check if max iterations reached */
if (iter >= max_iter && status != GSL_SUCCESS)
status = GSL_EMAXITER;
int i;
for (i = 0; i < iter + 1; i++) {
fprintf(stdout, "trust-radius: %g ", trust_radius_list[i]);
}
/* store final cost */
gsl_blas_ddot(f, f, &chisq);
/* store cond(J(x)) */
gsl_multifit_nlinear_rcond(&rcond, work);
gsl_vector_memcpy(x, y);
/* print summary */
fprintf(stderr, "NITER = %zu\n", gsl_multifit_nlinear_niter(work));
fprintf(stderr, "NFEV = %zu\n", fdf->nevalf);
fprintf(stderr, "NJEV = %zu\n", fdf->nevaldf);
fprintf(stderr, "NAEV = %zu\n", fdf->nevalfvv);
fprintf(stderr, "initial cost = %.12e\n", chisq0);
fprintf(stderr, "final cost = %.12e\n", chisq);
fprintf(stderr, "final x = (%.12e, %.12e, %12e)\n",
gsl_vector_get(x, 0), gsl_vector_get(x, 1), gsl_vector_get(x, 2));
fprintf(stderr, "final cond(J) = %.12e\n", 1.0 / rcond);
gsl_multifit_nlinear_free(work);
}
intmain(void)
{
const size_t n = 300; /* number of data points to fit */
const size_t p = 3; /* number of model parameters */
const double a = 5.0; /* amplitude */
const double b = 0.4; /* center */
const double c = 0.15; /* width */
const gsl_rng_type* T = gsl_rng_default;
gsl_vector* f = gsl_vector_alloc(n);
gsl_vector* x = gsl_vector_alloc(p);
gsl_multifit_nlinear_fdf fdf;
gsl_multifit_nlinear_parameters fdf_params =
gsl_multifit_nlinear_default_parameters();
struct data fit_data;
gsl_rng* r;
size_t i;
gsl_rng_env_setup();
r = gsl_rng_alloc(T);
fit_data.t = (double *)malloc(n * sizeof(double));
fit_data.y = (double *)malloc(n * sizeof(double));
fit_data.n = n;
/* generate synthetic data with noise */
for (i = 0; i < n; ++i)
{
double t = (double)i / (double)n;
double y0 = gaussian(a, b, c, t);
double dy = gsl_ran_gaussian(r, 0.1 * y0);
fit_data.t[i] = t;
fit_data.y[i] = y0 + dy;
}
/* define function to be minimized */
fdf.f = func_f;
fdf.df = func_df;
fdf.fvv = func_fvv;
fdf.n = n;
fdf.p = p;
fdf.params = &fit_data;
/* starting point */
gsl_vector_set(x, 0, 1.0);
gsl_vector_set(x, 1, 0.0);
gsl_vector_set(x, 2, 1.0);
fdf_params.trs = gsl_multifit_nlinear_trs_lmaccel;
solve_system(x, &fdf, &fdf_params);
/* print data and model */
{
double A = gsl_vector_get(x, 0);
double B = gsl_vector_get(x, 1);
double C = gsl_vector_get(x, 2);
for (i = 0; i < n; ++i)
{
double ti = fit_data.t[i];
double yi = fit_data.y[i];
double fi = gaussian(A, B, C, ti);
printf("%f %f %f\n", ti, yi, fi);
}
}
gsl_vector_free(f);
gsl_vector_free(x);
gsl_rng_free(r);
return 0;
}
This code initializes the trust radius and updates it and stores it in a
list. I started with the same initialization method as trust.c which a
source file in the gsl library used to calculate the trust region
information.
work->params.scale->init(work->J, diag);
double scale = scaled_norm(diag, work->x);
double trust_region_radius = 0.3 * GSL_MAX(1.0, scale);
but for the update , i used the step instead:
work->params.scale->update(work->J, diag);
scale = scaled_norm(diag, work->dx);
trust_region_radius = scale;
i did not do factor up and down to make the trust region bigger or smaller
since i assumed that that was already done if i multiply the diagonal of
the scaling matrix with the step.
I tested the values by debugging both from the source file and from my
method and they were different.
the factor up and factor down parameters are accessed like so :
work->params.factor_up;
work->params.factor_down;
and those increase or decrease the trust region.
is the reason why my code isn't giving similar results because i didnt use
factor_up and factor_down. even though they should already be taken into
account by the step?
Keep in mind that my trust region calculation is done the other way around
, meaning that the trust region was already calculated internally but i am
trying to recalculate it so i can plot it.
i also realized that the predicted reduction is calculated differently for
every method.
I would really appreciate your help with this.
thank you so much,
Sincerely,
Aleja Carmento.
- question about the trust region in the gsl library.,
Aleja Carmento <=