Re: question about the trust region in the gsl library.

help-gsl

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: question about the trust region in the gsl library.

From:	Patrick Alken
Subject:	Re: question about the trust region in the gsl library.
Date:	Tue, 2 May 2023 08:50:49 -0400
User-agent:	Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Thunderbird/102.10.0

I'm unclear what you are trying to do. Do you simply want to keep trackof the trust radius at each step so you can plot it later?


On 5/2/23 07:29, Aleja Carmento wrote:

  Dear Sir/Madam,



I am currently working on a gsl least squares method. In this specific case
, i have it set to the levenberg-marquardt with geodesic acceleration. i
used the same example from the gsl website and added a few changes to it
like so :


#include <stdlib.h>#include <stdio.h>#include
<gsl/gsl_vector.h>#include <gsl/gsl_matrix.h>#include
<gsl/gsl_blas.h>#include <gsl/gsl_multifit_nlinear.h>#include
<gsl/gsl_rng.h>#include <gsl/gsl_errno.h>#include <gsl/gsl_randist.h>


#define max_iterations 100


struct data
{
     double* t;
     double* y;
     size_t n;
};

static double scaled_norm(const gsl_vector* D, const gsl_vector* a)
{
     const size_t n = a->size;
     double e2 = 0.0;
     size_t i;

     for (i = 0; i < n; ++i) {
         double Di = gsl_vector_get(D, i);
         double ai = gsl_vector_get(a, i);
         double u = Di * ai;

         e2 += u * u;
     }

     return sqrt(e2);
}
/* model function: a * exp( -1/2 * [ (t - b) / c ]^2 )
*/doublegaussian(const double a, const double b, const double c, const
double t)
{
     const double z = (t - b) / c;
     return (a * exp(-0.5 * z * z));
}
intfunc_f(const gsl_vector* x, void* params, gsl_vector* f)
{
     struct data* d = (struct data*)params;
     double a = gsl_vector_get(x, 0);
     double b = gsl_vector_get(x, 1);
     double c = gsl_vector_get(x, 2);
     size_t i;

     for (i = 0; i < d->n; ++i)
     {
         double ti = d->t[i];
         double yi = d->y[i];
         double y = gaussian(a, b, c, ti);

         gsl_vector_set(f, i, yi - y);
     }

     return GSL_SUCCESS;
}
intfunc_df(const gsl_vector* x, void* params, gsl_matrix* J)
{
     struct data* d = (struct data*)params;
     double a = gsl_vector_get(x, 0);
     double b = gsl_vector_get(x, 1);
     double c = gsl_vector_get(x, 2);
     size_t i;

     for (i = 0; i < d->n; ++i)
     {
         double ti = d->t[i];
         double zi = (ti - b) / c;
         double ei = exp(-0.5 * zi * zi);

         gsl_matrix_set(J, i, 0, -ei);
         gsl_matrix_set(J, i, 1, -(a / c) * ei * zi);
         gsl_matrix_set(J, i, 2, -(a / c) * ei * zi * zi);
     }

     return GSL_SUCCESS;
}
intfunc_fvv(const gsl_vector* x, const gsl_vector* v,
     void* params, gsl_vector* fvv)
{
     struct data* d = (struct data*)params;
     double a = gsl_vector_get(x, 0);
     double b = gsl_vector_get(x, 1);
     double c = gsl_vector_get(x, 2);
     double va = gsl_vector_get(v, 0);
     double vb = gsl_vector_get(v, 1);
     double vc = gsl_vector_get(v, 2);
     size_t i;

     for (i = 0; i < d->n; ++i)
     {
         double ti = d->t[i];
         double zi = (ti - b) / c;
         double ei = exp(-0.5 * zi * zi);
         double Dab = -zi * ei / c;
         double Dac = -zi * zi * ei / c;
         double Dbb = a * ei / (c * c) * (1.0 - zi * zi);
         double Dbc = a * zi * ei / (c * c) * (2.0 - zi * zi);
         double Dcc = a * zi * zi * ei / (c * c) * (3.0 - zi * zi);
         double sum;

         sum = 2.0 * va * vb * Dab +
             2.0 * va * vc * Dac +
             vb * vb * Dbb +
             2.0 * vb * vc * Dbc +
             vc * vc * Dcc;

         gsl_vector_set(fvv, i, sum);
     }

     return GSL_SUCCESS;
}
void callback(const size_t iter, void* params,
     const gsl_multifit_nlinear_workspace* w)
{
     gsl_vector* f = gsl_multifit_nlinear_residual(w);
     gsl_vector* x = gsl_multifit_nlinear_position(w);
     double avratio = gsl_multifit_nlinear_avratio(w);
     double rcond;


     (void)params; /* not used */


     gsl_multifit_nlinear_trust_state* state =
(gsl_multifit_nlinear_trust_state*)w->state;
     /* compute reciprocal condition number of J(x) */
     gsl_multifit_nlinear_rcond(&rcond, w);

     fprintf(stderr, "iter %2zu: a = %.4f, b = %.4f, c = %.4f, |a|/|v|
= %.4f cond(J) = %8.4f, |f(x)| = %.4f\n",
         iter,
         gsl_vector_get(x, 0),
         gsl_vector_get(x, 1),
         gsl_vector_get(x, 2),
         avratio,
         1.0 / rcond,
         gsl_blas_dnrm2(f));
}
voidsolve_system(gsl_vector* x, gsl_multifit_nlinear_fdf* fdf,
     gsl_multifit_nlinear_parameters* params)
{
     const gsl_multifit_nlinear_type* T = gsl_multifit_nlinear_trust;
     const size_t max_iter = 200;
     const double xtol = 1.0e-8;
     const double gtol = 1.0e-8;
     const double ftol = 1.0e-8;

     const size_t n = fdf->n;
     const size_t p = fdf->p;

     size_t iter = 0;
     int status, info;

     gsl_multifit_nlinear_workspace* work =
gsl_multifit_nlinear_alloc(T, params, n, p);



     gsl_vector* f = gsl_multifit_nlinear_residual(work);
     gsl_vector* y = gsl_multifit_nlinear_position(work);


     double chisq0, chisq, rcond;


     double trust_radius_list[max_iterations];
     double step_size_list[max_iterations];

     /* initialize solver */
     gsl_multifit_nlinear_init(x, fdf, work);

     /* store initial cost */
     gsl_blas_ddot(f, f, &chisq0);

     /* iterate until convergence */
     if (callback) callback(iter, NULL, work);

     /*initialize scale and trust region*/
     gsl_vector* diag = gsl_vector_alloc(p);
     work->params.scale->init(work->J, diag);

     double scale = scaled_norm(diag, work->x);
     double trust_region_radius = 0.3 * GSL_MAX(1.0, scale);


     trust_radius_list[0] = trust_region_radius; // initialize  the
trust region radius
     step_size_list[0] = gsl_blas_dnrm2(work->dx);




     do
     {
         status = gsl_multifit_nlinear_iterate(work);


         /*update the trust region information*/
         work->params.scale->update(work->J, diag);


         scale = scaled_norm(diag, work->dx);
         trust_region_radius = scale;
         trust_radius_list[iter + 1] = trust_region_radius;
         step_size_list[iter + 1] = gsl_blas_dnrm2(work->dx);

         if (status == GSL_ENOPROG && iter == 0)
         {
             info = status;
         }

         ++iter;

         if (callback) callback(iter,NULL, work);

         /* test for convergence */
         status = gsl_multifit_nlinear_test(xtol, gtol, ftol, &info, work);

     } while (status == GSL_CONTINUE && iter < max_iter);


     if (status == GSL_ETOLF || status == GSL_ETOLX || status == GSL_ETOLG)
     {
         info = status;
         status = GSL_SUCCESS;
     }

     /* check if max iterations reached */
     if (iter >= max_iter && status != GSL_SUCCESS)
         status = GSL_EMAXITER;

     int i;
     for (i = 0; i < iter + 1; i++) {


         fprintf(stdout, "trust-radius: %g ", trust_radius_list[i]);


     }

     /* store final cost */
     gsl_blas_ddot(f, f, &chisq);

     /* store cond(J(x)) */
     gsl_multifit_nlinear_rcond(&rcond, work);

     gsl_vector_memcpy(x, y);

     /* print summary */

     fprintf(stderr, "NITER         = %zu\n", gsl_multifit_nlinear_niter(work));
     fprintf(stderr, "NFEV          = %zu\n", fdf->nevalf);
     fprintf(stderr, "NJEV          = %zu\n", fdf->nevaldf);
     fprintf(stderr, "NAEV          = %zu\n", fdf->nevalfvv);
     fprintf(stderr, "initial cost  = %.12e\n", chisq0);
     fprintf(stderr, "final cost    = %.12e\n", chisq);
     fprintf(stderr, "final x       = (%.12e, %.12e, %12e)\n",
         gsl_vector_get(x, 0), gsl_vector_get(x, 1), gsl_vector_get(x, 2));
     fprintf(stderr, "final cond(J) = %.12e\n", 1.0 / rcond);

     gsl_multifit_nlinear_free(work);
}
intmain(void)
{
     const size_t n = 300;  /* number of data points to fit */
     const size_t p = 3;    /* number of model parameters */
     const double a = 5.0;  /* amplitude */
     const double b = 0.4;  /* center */
     const double c = 0.15; /* width */
     const gsl_rng_type* T = gsl_rng_default;
     gsl_vector* f = gsl_vector_alloc(n);
     gsl_vector* x = gsl_vector_alloc(p);
     gsl_multifit_nlinear_fdf fdf;
     gsl_multifit_nlinear_parameters fdf_params =
         gsl_multifit_nlinear_default_parameters();
     struct data fit_data;
     gsl_rng* r;
     size_t i;

     gsl_rng_env_setup();
     r = gsl_rng_alloc(T);

     fit_data.t = (double *)malloc(n * sizeof(double));
     fit_data.y = (double *)malloc(n * sizeof(double));
     fit_data.n = n;

     /* generate synthetic data with noise */
     for (i = 0; i < n; ++i)
     {
         double t = (double)i / (double)n;
         double y0 = gaussian(a, b, c, t);
         double dy = gsl_ran_gaussian(r, 0.1 * y0);

         fit_data.t[i] = t;
         fit_data.y[i] = y0 + dy;
     }

     /* define function to be minimized */
     fdf.f = func_f;
     fdf.df = func_df;
     fdf.fvv = func_fvv;
     fdf.n = n;
     fdf.p = p;
     fdf.params = &fit_data;

     /* starting point */
     gsl_vector_set(x, 0, 1.0);
     gsl_vector_set(x, 1, 0.0);
     gsl_vector_set(x, 2, 1.0);

     fdf_params.trs = gsl_multifit_nlinear_trs_lmaccel;
     solve_system(x, &fdf, &fdf_params);

     /* print data and model */
     {
         double A = gsl_vector_get(x, 0);
         double B = gsl_vector_get(x, 1);
         double C = gsl_vector_get(x, 2);

         for (i = 0; i < n; ++i)
         {
             double ti = fit_data.t[i];
             double yi = fit_data.y[i];
             double fi = gaussian(A, B, C, ti);

             printf("%f %f %f\n", ti, yi, fi);
         }
     }

     gsl_vector_free(f);
     gsl_vector_free(x);
     gsl_rng_free(r);

     return 0;
}




This code initializes the trust radius and updates it and stores it in a
list. I started with the same initialization method as trust.c which a
source file in the gsl library used to calculate the trust region
information.


  work->params.scale->init(work->J, diag);
  double scale = scaled_norm(diag, work->x);
  double trust_region_radius = 0.3 * GSL_MAX(1.0, scale);



but for the update , i used the step instead:

work->params.scale->update(work->J, diag);
scale = scaled_norm(diag, work->dx);
trust_region_radius = scale;

i did not do factor up and down to make the trust region bigger or smaller
since i assumed that that was already done if i multiply the diagonal of
the scaling matrix with the step.

I tested the values by debugging both from the source file and from my
method and they were different.

the factor up and factor down parameters are accessed like so :

work->params.factor_up;
work->params.factor_down;

and those increase or decrease the trust region.

is the reason why my code isn't giving similar results because i didnt use
factor_up and factor_down. even though they should already be taken into
account by the step?

Keep in mind that my trust region calculation is done the other way around
, meaning that the trust region was already calculated internally but i am
trying to recalculate it so i can plot it.


i also realized that the predicted reduction is calculated differently for
every method.


I would really appreciate your help with this.

thank you so much,

Sincerely,

Aleja Carmento.

[Prev in Thread]

Current Thread

[Next in Thread]

question about the trust region in the gsl library., Aleja Carmento, 2023/05/02
- Re: question about the trust region in the gsl library., Patrick Alken <=

Prev by Date: question about the trust region in the gsl library.
Next by Date: Static GSL and GSLCBLAS libraries for Fedora 37
Previous by thread: question about the trust region in the gsl library.
Next by thread: Static GSL and GSLCBLAS libraries for Fedora 37
Index(es):
- Date
- Thread