Advanced Introduction to C++, Scientific Computing and Machine Learning




Claudius Gros, SS 2024

Institut für theoretische Physik
Goethe-University Frankfurt a.M.

NM : Monte Carlo & Metropolis Sampling

statistical evaluation of integrals


Monte Carlo sampling


Metropolis algorithm

integration as averaging



$\hspace{10ex} \displaystyle I \ =\ \int_a^b \, f(x)\, dx \ =\ \sum_{i=1}^N f(x_i)\,\Delta x $ $\hspace{10ex} \displaystyle I \ =\ (b-a) \left(\frac{1}{N} \sum_i\, f(x_i) \right) \ =\ (b-a)\, \langle f\rangle $
$$ I\ =\ V\, \langle f\rangle$$

area integration



$\hspace{10ex} \displaystyle I\ = \ \int_{-a}^a f(x,y)\,dx\,dy $

with

$\hspace{10ex} \displaystyle f(x,y)\ =\ \left\{ \begin{array}{ccl} 1 && \mbox{inside}\\ 0 && \mbox{outside} \end{array} \right. $

Copy Copy to clipboad
Downlaod Download
/** Evaluating \pi with simple Monte Carlo evaluation.
  */
#include <iostream>
#include <stdio.h>     // for printf
#include <stdlib.h>    // srand, rand
#include <math.h>      // for pow, etc.
#include <fstream>     // file streams
using namespace std;
 
double myFunction(double x, double y, double x0, double x1, double y0, double y1) 
{
 if ( (x<x0)||(x>x1)||(y<y0)||(y>y1) )
    {
    printf("# myFunction(): x or y value out of range \n");
    return 0.0;
    }
 if (y*y<=1.0-x*x)     // x*x+y*y=1 is the circle equation
   return 1.0;
 else
   return 0.0;
  } // end of myFunction()
 
// ---
// --- main
// ---
int main() 
{
 const double x0 = 0.0, x1 = 1.0;    // x-bounds
 const double y0 = 0.0, y1 = 1.0;    // y-bounds
//
 int N = 1000000;                           // number of draws
 int deltaN = N/10;                         // printout intervals
 double x, y, sum = 0;
 srand(time(NULL));                         // seed for random()
// --- loop over all random evaluations 
 printf("%8s %8s %8s\n","# steps","pi", "error");
 for (int i=0; i<N; i++)
   {
   x = (random()/(double)RAND_MAX);     // random draws
   y = (random()/(double)RAND_MAX);     // random draws
   sum += myFunction(x,y,x0,x1,y0,y1);
   double result = 4.0*sum/(i+1.0);
   if ((i+1)%deltaN==0)
     printf("%8d %8.5f %8.5f\n",i+1,result,(M_PI-result)/M_PI);
   }
 printf("%s\n","-----------------");
     printf("%8s %8.5f\n","infinity",M_PI);
 return 1;
 }  // end of main()

basic statistics

statistics

Monte Carlo - convergence

data binning


law of large numbers

Monte Carlo with error estimation

Copy Copy to clipboad
Downlaod Download
/** Evaluating \pi with simple Monte Carlo evaluation,
  * calculating variance and standard deviation (error).
  * On input: number of measurements N.
  */
#include <iostream>
#include <stdio.h>     // for printf
#include <stdlib.h>    // srand, rand
#include <math.h>      // for pow, etc.
#include <fstream>     // file streams
 
using namespace std;
 
double myFunction(double x, double y, double x0, double x1, double y0, double y1) 
{
 if ( (x<x0)||(x>x1)||(y<y0)||(y>y1) )
    {
    printf("# myFunction(): x or y value out of range \n");
    return 0.0;
    }
 if (y*y<=1.0-x*x)     // x*x+y*y=1 is the circle equation
   return 1.0;
 else
   return 0.0;
  } // end of myFunction()
 
// ---
// --- main
// ---
int main(int argLength, char* argValues[])   // parsing command line arguments
{
 const double x0 = 0.0, x1 = 1.0;    // x-bounds
 const double y0 = 0.0, y1 = 1.0;    // y-bounds
//
 int N = 0;                          // number of MC steps
 if (argLength==2)
   N = atof(argValues[1]);
 else
   {
   printf("on input N: number of MC steps\n");
   return 0;
   }
//
// --- other parameters and variables
//
 const int K = 20;                                // number of bins
 int nK = N/K;                                    // number of data per bin
 double* data = new double[N];                    // storing all measurements
 double* meanBin = new double[K];                 // means of bins
 double mean = 0.0, sigma = 0.0;                  // result: mean and error
 double x, y;
//
// --- loop over all random evaluations 
//
 srand(time(NULL));                         // seed for random()
 for (int i=0; i<N; i++)
   {
   x = (random()/(double)RAND_MAX);            // random draws
   y = (random()/(double)RAND_MAX);            // random draws
   data[i] = 4.0*myFunction(x,y,x0,x1,y0,y1);      // storing measurement
   }  
//
// --- evaluate the overall mean and the mean of the bins
//
 for (int i=0; i<N; i++) 
    mean += data[i];
 mean = mean/N;
//
 for (int k=0; k<K; k++)
   {
   meanBin[k] = 0.0;       
   for (int n=0; n<nK; n++)                     // nK data points in each bin
      meanBin[k] += data[k*nK+n];
   meanBin[k] = meanBin[k]/nK;
   printf("# (mean of bin) - mean : %8.5f\n",meanBin[k]-mean);
   }  // end of loop over all bins
//
// --- evaluate the error: variance of means of bins
//
 for (int k=0; k<K; k++)
   sigma += (meanBin[k]-mean)*(meanBin[k]-mean);
 sigma = sqrt(sigma/K);
//
// --- printout of result
//
 printf(" number of measurements : %d\n",N);
 printf(" number of bins         : %d\n",K);
 printf(" data points per bin    : %d\n",nK);
 printf(" estimate for pi        : %8.5f\n",mean);
 printf(" estimate for accuracy  : %8.5f\n",sigma);
 printf(" true error             : %8.5f\n",M_PI-mean);
// 
 return 1;
 }  // end of main()

integrals over distribution functions


importance sampling

$\hspace{6ex}\displaystyle \rho(x) \ =\ \frac{1}{y_n}\frac{x^n}{x+a}, \quad\quad y_n\ =\ \int_0^1\, \frac{x^n}{x+a}\, dx $

random walk through configuration space

Metropolis algorithm


transition rules

detailed balance


detailed balance

evaluation of relative properties

Copy Copy to clipboad
Downlaod Download
/** Evaluating y_{n+1}/y_n, with y_n = \int_0^1 x^n/(a+x) dx
  * with Monte Carlo (Metropolis) algorithm.
  * On input: number of Monte Carlo steps N.
  */
#include <iostream>
#include <stdio.h>     // for printf
#include <stdlib.h>    // srand, rand
#include <math.h>      // for pow, etc.
#include <fstream>     // file streams
 
using namespace std;
 
/** The proability density.
  */
double myDensity(double x, double nPow, double aPar) {
 return pow(x,nPow)/(x+aPar);
  } 
 
// ---
// --- main
// ---
int main(int argLength, char* argValues[])   // parsing command line arguments
{
 const double aPar = 10.0;                 // parameter
 const double nPow = 3.0;                  // power
 srand(time(NULL));                  // seed for random()
//
 int N = 0;                          // number of MC steps
 if (argLength==2)
   N = atof(argValues[1]);
 else
   {
   printf("on input N: number of MC steps\n");
   return 0;
   }
//
// --- other parameters and variables
//
 const int K = 20;                                // number of bins
 int nK = N/K;                                    // number of data per bin
 double* data = new double[N];                    // storing all measurements
 double* meanBin = new double[K];                 // means of bins
 double mean = 0.0, sigma = 0.0;                  // result: mean and error
 double x = random()/(double)RAND_MAX;            // random starting value
 double y, ratio;                                 // variables
 data[0] = x;                                     // first measurment
//
// --- loop over all Monte Carlo steps
//
 for (int i=1; i<N; i++)
  {
  y = random()/(double)RAND_MAX;
  ratio = myDensity(y,nPow,aPar)/ myDensity(x,nPow,aPar);
//
  double probability = random()/(double)RAND_MAX; // of acceptance
  if (ratio>probability)
    x = y;                                        // accept candidate
  data[i] = x;                                    // storing measurement
  }  // end of loop over all Monte Carlo draws
//
// --- evaluate the overall mean and the mean of the bins
//
 for (int i=0; i<N; i++) 
    mean += data[i];
 mean = mean/N;
//
 for (int k=0; k<K; k++)
   {
   meanBin[k] = 0.0;       
   for (int n=0; n<nK; n++)                     // nK data points in each bin
      meanBin[k] += data[k*nK+n];
   meanBin[k] = meanBin[k]/nK;
   }  // end of loop over all bins
//
// --- evaluate the error: variance of means of bins
//
 for (int k=0; k<K; k++)
   sigma += (meanBin[k]-mean)*(meanBin[k]-mean);
 sigma = sqrt(sigma/K);
//
// --- printout of result
//
 printf(" a  in  y^n/(x+a)       : %8.4f\n",aPar);
 printf(" n  in  y^n/(x+a)       : %8.4f\n",nPow);
 printf(" number of measurements : %d\n",N);
 printf(" number of bins         : %d\n",K);
 printf(" data points per bin    : %d\n",nK);
 printf(" y_{n+1}/y_n  estimate  : %8.4f\n",mean);
 printf(" estimate for accuracy  : %8.4f\n",sigma);
 printf("\n");
 printf(" ratio y_4/y_3 (exact)  : %8.4f\n",0.797490089);
// 
 return 1;
 }  // end of main()