# Advanced Introduction to C++, Scientific Computing and Machine Learning

Claudius Gros, WS 2019/20

Institut für theoretische Physik
Goethe-University Frankfurt a.M.

# non-separable training data

• Lagrange parameters $\ \ a_k\$ diverge
when data is not linearly seperable
• classification condition
$\displaystyle\qquad\quad \mathbf{w}\cdot\mathbf{x}_i-b- l_i \ge 0, \qquad\quad l_i=\pm1$
cannot be enforced

### soft contraint

• relax classification contraint
• slag variables $\ \ \xi_i\ge0$
$$\begin{array}{rclcrcl} \mathbf{w}\cdot\mathbf{x}_i&\ge& b+1-\xi_i &\quad\mbox{for}\quad & y_i&=&+1 \\ \mathbf{w}\cdot\mathbf{x}_i&\le& b-1+\xi_i&\quad\mbox{for}\quad & y_i&=&-1 \\ \end{array}$$
• individual reduction of margin $\ \ (1-\xi_i)/|\mathbf{w}|$
• may become negative

### optimization condition

$$\mathrm{min}\left[ \frac{1}{2}\mathbf{w}^2 +C\,\sum_i \xi \right]\,,\qquad\quad l_i\big(\mathbf{w}\cdot\mathbf{x}_i-b\big)- 1+\xi_i\ge 0$$
• weighting of miss-classification/margin: $\ \ C$

# dual form

$$L_P = \frac{\mathbf{w}^2}{2}+C\,\sum_i\xi_i -\sum_i a_i \Big[l_i\big(\mathbf{w}\cdot\mathbf{x}_i-b\big)-1+\xi_i\Big] -\sum_ib_i\xi_i$$
• $b_i>0\ \$ Lagrange parameter for constraint $\ \ \xi_i>0$
$a_i>0\ \$ in analogy
• stationarity: weight vector
$$\frac{\partial L_P}{\partial\mathbf{w}} = 0 \qquad\quad \Rightarrow \qquad\quad \fbox{\displaystyle\phantom{\big|} \mathbf{w} = \sum_i a_i l_i\mathbf{x}_i \phantom{\big|}}$$
• stationarity: hyperplane offset
$$\frac{\partial L_P}{\partial b} = 0 \qquad\quad \Rightarrow \qquad\quad \fbox{\displaystyle\phantom{\big|} \sum_i a_i l_i=0 \phantom{\big|}}$$
• stationarity: slag variables
$$\frac{\partial L_P}{\partial \xi_i} = 0 \qquad \Rightarrow \qquad C=a_i+b_i \qquad \Rightarrow \qquad \fbox{\displaystyle\phantom{\big|} 0\le a_i\le C \phantom{\big|}}$$
• $C \ \$ limits runaway growth of Lagrange parameters $\ \ a_i$

# soft margins - numerics

$$\underset{\{a_l\}}{\mathrm{max}}\left[ \sum_i a_i-\frac{1}{2}\sum_{ij} a_iH_{ij}a_j\right]\,, \qquad 0\le a_i \le C, \qquad \sum_i a_il_i=0$$
• solve dual form
• weight vector: $\ \ \mathbf{w} = \sum_i a_i l_i\mathbf{x}_i$

### generalized support vectors

$$a_i \quad\to\quad \left\{\begin{array}{rcl} 0 & & \mathrm{correctly\ classified} \\ ]0,C[ & & \mathrm{support\ vector} \\ C & & \mathrm{miss-classified} \\ \end{array} \right.$$
• number of support vectors: $\ \ N_s$
• hyperplane offset
$$b=\frac{1}{N_S}\sum_{s\in S}\big(\mathbf{w}\cdot\mathbf{x}_s-l_s\big)$$
• averaging over suppoort vectors if convergence is poor

# SVM code: linear/non-seperable

• need to select free parameter $\ \ C$
: typically for machine learning
• LDA classifier removed
#include <iostream>
#include <sstream>     // for stringstream
#include <stdio.h>     // for printf
#include <string>      // c_str()
#include <cmath>       // for abs
#include <fstream>     // file streams
#include <stdlib.h>    // srand, rand
using namespace std;

/* contains
*
* double nextRandom()
* string xyValuesGnu()
* void plotViaGnuplot()
* void generateClassificationData()
*/

// ***
// *** random number in [0,1]
// ***
double nextRandom()
{ static bool firstTime = true;
if (firstTime)
{
srand( (unsigned)time( NULL ) );
firstTime = false;
}
return (rand()/(double)RAND_MAX);
} // end of nextRandom()

// ***
// *** (x,y) string for inline data input for gnuplot
// ***

template<int N>
string xyValuesGnu(double (&xyValues)[2][N], string dataName)
{
stringstream ss;
ss << dataName << " << " << "EOD\n";       // defining data block
for (int i=0; i<N; i++)
ss << xyValues[0][i] << " " << xyValues[1][i] << endl;
ss << "EOD\n";                             // terminating data block
//
return ss.str();
} // end of xyValuesGnu()

// ***
// *** plot data via gnuplot
// ***

void plotViaGnuplot(string dataNameOne,     string dataToPlotOne,
string dataNameTwo,     string dataToPlotTwo,
string dataNamePlaneSVM,string dataToPlotPlaneSVM,
string dataNameP_m1_SVM,string dataToPlotP_m1_SVM,
string dataNameP_m2_SVM,string dataToPlotP_m2_SVM,
string dataNameBasis,   string dataToPlotBasis)
{
FILE *pipeGnu = popen("gnuplot", "w");            // streaming to gnuplot
fprintf(pipeGnu, "set term qt persist size 1000,800\n");
fprintf(pipeGnu, "set nokey\n");                  // no legend
//
fprintf(pipeGnu, "%s\n",dataToPlotOne.c_str());
fprintf(pipeGnu, "%s\n",dataToPlotTwo.c_str());
fprintf(pipeGnu, "%s\n",dataToPlotPlaneSVM.c_str());
fprintf(pipeGnu, "%s\n",dataToPlotP_m1_SVM.c_str());
fprintf(pipeGnu, "%s\n",dataToPlotP_m2_SVM.c_str());
fprintf(pipeGnu, "%s\n",dataToPlotBasis.c_str());
//                                                 // plot data
fprintf(pipeGnu,   "plot \"%s\" w points pt 5 ps 4 \n", dataNameOne.c_str());
fprintf(pipeGnu, "replot \"%s\" w points pt 5 ps 4 \n", dataNameTwo.c_str());
fprintf(pipeGnu, "replot \"%s\" w lines lw 7 lt rgb \"gold\" \n", dataNamePlaneSVM.c_str());
fprintf(pipeGnu, "replot \"%s\" w lines lw 2 lt rgb \"gold\" \n", dataNameP_m1_SVM.c_str());
fprintf(pipeGnu, "replot \"%s\" w lines lw 2 lt rgb \"gold\" \n", dataNameP_m2_SVM.c_str());
fprintf(pipeGnu, "replot \"%s\" w linesp pt 7 ps 4 lw 4 \n", dataNameBasis.c_str());
//
fclose(pipeGnu);    // closing the pipe to gnuplot
} // end of plotViaGnuplot

// ***
// *** generate data to be classified; only for (dim==2)
// ***
template<int nOne, int nTwo>
void generateClassificationData(double dataClassOne[2][nOne], double dataClassTwo[2][nTwo])
{
double angleOne =  0.75*M_PI;          // angles of the large axis of the data with
double angleTwo = -0.75*M_PI;          // respect to the x-axis
double L1One[2], L1Two[2];             // large eigen-directions of data distribution
double L2One[2], L2Two[2];             // minor eigen-directions of data distribution
//
double L1OneLength = 2.0;              // respective lengths
double L2OneLength = 0.3;              // respective lengths
double L1TwoLength = 2.0;
double L2TwoLength = 0.3;
//
double rr, ss;
//
L1One[0] =  L1OneLength*cos(angleOne);
L1One[1] =  L1OneLength*sin(angleOne);
L2One[0] =  L2OneLength*sin(angleOne);   // L2 orthognal to L1
L2One[1] = -L2OneLength*cos(angleOne);
//
L1Two[0] =  L1TwoLength*cos(angleTwo);
L1Two[1] =  L1TwoLength*sin(angleTwo);
L2Two[0] =  L2TwoLength*sin(angleTwo);
L2Two[1] = -L2TwoLength*cos(angleTwo);
//
for (int iOne=0; iOne<nOne; iOne++)     // generate data for first class
{
rr = 2.0*nextRandom()-1.0;
ss = 2.0*nextRandom()-1.0;
dataClassOne[0][iOne] =  1.0 + rr*L1One[0] + ss*L2One[0];  // x component
dataClassOne[1][iOne] =  0.0 + rr*L1One[1] + ss*L2One[1];  // y component
}
//
for (int iTwo=0; iTwo<nTwo; iTwo++)     // generate data for second class
{
rr = 2.0*nextRandom()-1.0;
ss = 2.0*nextRandom()-1.0;
dataClassTwo[0][iTwo] = -1.0 + rr*L1Two[0] + ss*L2Two[0];
dataClassTwo[1][iTwo] =  0.0 + rr*L1Two[1] + ss*L2Two[1];
}
// test printing
if (1==2)
{
for (int iOne=0; iOne<nOne; iOne++)
cout <<  dataClassOne[0][iOne] << " " << dataClassOne[1][iOne] << endl;
cout << endl;
for (int iTwo=0; iTwo<nTwo; iTwo++)
cout <<  dataClassTwo[0][iTwo] << " " << dataClassTwo[1][iTwo] << endl;
}
}    // end of generateClassificationData()

// ***
// *** support vector machine optimization
// ***
template<int N>
void svmGradientAscent(double svmData[2][N], double svmL[N], double svmA[N],
double &svmB, double svmW[2], double svmDataPlane[2][2],
double svm_m1_Plane[2][2], double svm_m2_Plane[2][2])
{
int nIter      =  400000;    // fixed number of update iterations
double epsilon =    0.01;    // update rate
double svmC    =   100.0;    // upper threshold for Lagrange parameters
double rr;
//
for (int iIter=0; iIter<nIter; iIter++)
{
svmW[0] = 0.0;
svmW[1] = 0.0;
for (int i=0; i<N; i++)
for (int k=0; k<2; k++)
svmW[k] += svmA[i]*svmL[i]*svmData[k][i];
for (int i=0; i<N; i++)
{
rr = svmW[0]*svmData[0][i] + svmW[1]*svmData[1][i];
svmA[i] += epsilon*(1.0-svmL[i]*rr);
}
//                                                           // (2) orthogonalization
rr = 0.0;
for (int i=0; i<N; i++)
rr += svmA[i]*svmL[i];
for (int i=0; i<N; i++)
svmA[i] = svmA[i] - rr*svmL[i]/N;
//                                                           // (3) allowed range
for (int i=0; i<N; i++)
if (svmA[i]<0.0)
svmA[i] = 0.0;
for (int i=0; i<N; i++)
if (svmA[i]>svmC)
svmA[i] = svmC;
} // end of loop over iterations
//
//                                                           // offset
//
svmB = 0.0;
int nSupportVectors = 0;
//
for (int i=0; i<N; i++)
if ((svmA[i]>0.001)&&(svmA[i]<svmC-0.001))
{
nSupportVectors++;
svmB += svmW[0]*svmData[0][i] + svmW[1]*svmData[1][i] - svmL[i];
//
cout << "# b  " <<  svmW[0]*svmData[0][i] + svmW[1]*svmData[1][i] - svmL[i] << endl;
}
svmB = svmB / nSupportVectors;
//
//                                                           // data plane
//
rr = sqrt(svmW[0]*svmW[0]+svmW[1]*svmW[1]);
//                                         // w*b/|w|^2   on plane   w*x=b
svmDataPlane[0][0] = svmB*svmW[0]/(rr*rr) + 1.8*svmW[1]/rr;
svmDataPlane[1][0] = svmB*svmW[1]/(rr*rr) - 1.8*svmW[0]/rr;
svmDataPlane[0][1] = svmB*svmW[0]/(rr*rr) - 1.8*svmW[1]/rr;
svmDataPlane[1][1] = svmB*svmW[1]/(rr*rr) + 1.8*svmW[0]/rr;
//                                                           // margin planes
svm_m1_Plane[0][0] = (svmB-1)*svmW[0]/(rr*rr) + 1.8*svmW[1]/rr;
svm_m1_Plane[1][0] = (svmB-1)*svmW[1]/(rr*rr) - 1.8*svmW[0]/rr;
svm_m1_Plane[0][1] = (svmB-1)*svmW[0]/(rr*rr) - 1.8*svmW[1]/rr;
svm_m1_Plane[1][1] = (svmB-1)*svmW[1]/(rr*rr) + 1.8*svmW[0]/rr;
//
svm_m2_Plane[0][0] = (svmB+1)*svmW[0]/(rr*rr) + 1.8*svmW[1]/rr;
svm_m2_Plane[1][0] = (svmB+1)*svmW[1]/(rr*rr) - 1.8*svmW[0]/rr;
svm_m2_Plane[0][1] = (svmB+1)*svmW[0]/(rr*rr) - 1.8*svmW[1]/rr;
svm_m2_Plane[1][1] = (svmB+1)*svmW[1]/(rr*rr) + 1.8*svmW[0]/rr;
//
//                                                           // test printout
//
if (1==1)
{
cout << "# C  " <<  svmC << endl;
printf("#%4s %5s %10s %12s\n","", "class", "a_i", "w*x_i-b");
for (int i=0; i<N; i++)
printf("#%4d %5d %10.4f %12.4f\n",i, int(svmL[i]), svmA[i],
svmData[0][i]*svmW[0]+svmData[1][i]*svmW[1] - svmB );   // support condition
}
//

// ***
// *** main
// ***
int main()
{
const int N1 =  10;                 // number of training data per class
const int N2 =  10;
double dataClassOne[2][N1];         // class data points
double dataClassTwo[2][N2];
double dataBasis[2][2];             // connecting the center of masses
//
dataBasis[0][0] = -1.0;             // (-1,0)
dataBasis[1][0] =  0.0;
dataBasis[0][1] =  1.0;             // (1,0)
dataBasis[1][1] =  0.0;
//
generateClassificationData(dataClassOne,dataClassTwo);
//
//
// --- ------------------
// --- start code for SVM (support vector machine)
// --- ------------------
const int N = N1+N2;       // combined
double svmData[2][N];      // combined training data
double svmL[N];            // class labels
double svmA[N];            // Lagrange parameters
double svmB;               // hyperplane offset
double svmW[2];            // weight vector
double svmDataPlane[2][2]; // orthogonal to weight vector
double svm_m1_Plane[2][2]; // margin one hyperplane
double svm_m2_Plane[2][2]; // margin two hyperplane
//
for (int i=0;i<N1;i++)
{
svmData[0][i] = dataClassOne[0][i];  // merge data
svmData[1][i] = dataClassOne[1][i];
svmL[i]       = 1.0;
svmA[i]       = nextRandom();        // (positive) random initialization
}
for (int j=0;j<N2;j++)
{
svmData[0][N1+j] = dataClassTwo[0][j];
svmData[1][N1+j] = dataClassTwo[1][j];
svmL[N1+j]       = -1.0;
svmA[N1+j]       = nextRandom();
}
svmGradientAscent(svmData, svmL, svmA, svmB, svmW, svmDataPlane, svm_m1_Plane, svm_m2_Plane);
// --- ------------------
// ---  end  code for SVM
// --- ------------------
string dataNameOne      = "$dataOne"; string dataNameTwo = "$dataTwo";
string dataNamePlaneSVM = "$dataPlaneSVM"; string dataNameP_m1_SVM = "$dataP_m1_SVM";
string dataNameP_m2_SVM = "$dataP_m2_SVM"; string dataNameBasis = "$dataBasis";
string dataToPlotOne      = xyValuesGnu(dataClassOne,dataNameOne);  // data to string
string dataToPlotTwo      = xyValuesGnu(dataClassTwo,dataNameTwo);
string dataToPlotPlaneSVM = xyValuesGnu(svmDataPlane,dataNamePlaneSVM);
string dataToPlotP_m1_SVM = xyValuesGnu(svm_m1_Plane,dataNameP_m1_SVM);
string dataToPlotP_m2_SVM = xyValuesGnu(svm_m2_Plane,dataNameP_m2_SVM);
string dataToPlotBasis    = xyValuesGnu(dataBasis,dataNameBasis);
//
//
plotViaGnuplot(dataNameOne,     dataToPlotOne,
dataNameTwo,     dataToPlotTwo,
dataNamePlaneSVM,dataToPlotPlaneSVM,
dataNameP_m1_SVM,dataToPlotP_m1_SVM,
dataNameP_m2_SVM,dataToPlotP_m2_SVM,
dataNameBasis,   dataToPlotBasis);
//
return 1;
}  // end of main()


# SVM regression

### linear regression

• data points $\ \ \big(\mathbf{x}_i,y_i\big)$
• linear function $\ \ \mathbf{w}\cdot\mathbf{x}_i-b, \qquad R^d\,\to\, R$
$\displaystyle\qquad\quad \mathrm{min}\,\sum_i \big[y_i-(\mathbf{w}\cdot\mathbf{x}_i-b)\big]^2$
• least square fit

### soft SVM

• slag variables $\ \ \xi_i,\, \xi_i^*\ge 0$
• conditions
$$y_i-\mathbf{w}\cdot\mathbf{x}_i+b \le \epsilon+\xi_i, \qquad\quad \mathbf{w}\cdot\mathbf{x}_i-b-y_i \le \epsilon+\xi_i^*$$
• miminize
$$\frac{1}{2}\mathbf{w}^2 + C\,\sum_i\big(\xi_i+\xi_i^*\big)$$
• Lagrange parameters $\ \ \Rightarrow \ \$ dual form
: literature
• SVM regression involves a confidence intervall $\ \ 2\epsilon$

# non-linear classification

### overfitting

given enough parameters,
one can fit an elephant
• fitting $\ \Leftrightarrow \$ generalization dilemma of AI
• SVM parsimonious
: kernel trick

### non-linear transformation

• linear classification on transformed data
$\displaystyle\qquad\quad \mathbf{x}\ \to \mathbf{z},\qquad R^d \ \to \ R^D$
• high-dimensional $\ \ D$
: brain, neural nets, echo-state machine, $\dots$
• examples
$$\begin{array}{rcl c rcl c rcl} z_1 &=& x_1 &\quad& z_2&=& x_2 &\quad& z_3&=& x_1^2 \\ z_4 &=& \cos(x_7-x_8) &\quad& z_5&=& \tanh(x_5 x_6^2) &\quad& z_6&=& \sin(3 x_4^3) \\ \end{array}$$
• $\dots$ explosively large $\ \ D$

### classification

• parameters $\ \ \mathbf{w}_D=\big(w_1,\,\dots,\, w_{D-1}\big)$
• training data $\ \ \mathbf{x}_i \ \to\ \mathbf{z}_i$
$$\mathbf{w}_D\cdot\mathbf{z}_i-b \quad \to\quad \left\{ \begin{array}{rcl} >0 && \mbox{first class} \\ <0 && \mbox{second class} \end{array}\right.$$

# high-dimenional dual

• number of trainings data: $\ \ N$
$$\underset{\mathbf{a}}{\mathrm{max}}\left[\, \sum_{i=0}^{N-1} a_i-\frac{1}{2}\sum_{i,j=0}^{N-1} a_i H_{ij} a_j \,\right]\,, \qquad\quad H_{ij} = l_i\big(\mathbf{z}_i\cdot\mathbf{z}_j\big)l_j$$
• with slag variables
$$\mathbf{w}_D=\sum_{j=0}^N a_jl_j\mathbf{z}_j, \qquad\quad 0\le a_i\le C, \quad\qquad \sum_{i=0}^{N-1} a_il_i=0$$

### classification variable

$$\mathbf{w}_D\cdot\mathbf{z}_k-b = \sum_{j=0}^N a_jl_j\big(\mathbf{z}_j\cdot\mathbf{z}_k\big)-b$$
• depends only on D-dimensional scalar product $\ \ \mathbf{z}_j\cdot\mathbf{z}_k$
• idem for offset $$b=\mathbf{w}_D\cdot\mathbf{z}_s-l_s= \sum_{j=0}^N a_jl_j\big(\mathbf{z}_j\cdot\mathbf{z}_s\big)-l_s$$ for any $\ \ 0\le a_s\le S \ \$ (support vector $\ \mathbf{z}_s$)

# kernel trick

the large dimension D enters only implicitly
via the D-dimensional scalar products
$\ \ \mathbf{z}_i\cdot\mathbf{z}_j$
• kernel $\ \ K(\mathbf{u},\mathbf{v})$
$$\mathbf{z}_i\cdot\mathbf{z}_j\quad \to\quad K(\mathbf{x}_i,\mathbf{x}_j)$$
• rewrite everything in terms of the kernel
• Lagrangian
$$\underset{\mathbf{a}}{\mathrm{max}}\left[\, \sum_{i=0}^{N-1} a_i-\frac{1}{2}\sum_{i,j=0}^{N-1} a_i l_i K(\mathbf{x}_i,\mathbf{x}_j) l_ja_j \,\right]\,,$$
• classification argument $$\mathbf{w}_D\cdot\mathbf{z}_k-b = \sum_{j=0}^N a_jl_j K(\mathbf{x}_j,\mathbf{x}_k) -b, \qquad\quad b= \sum_j a_jl_j K(\mathbf{x}_j,\mathbf{x}_s)-l_s$$ combined
$$\mathbf{w}_D\cdot\mathbf{z}_k-b = \sum_{j=0}^N a_jl_j \Big[ K(\mathbf{x}_j,\mathbf{x}_k)-K(\mathbf{x}_j,\mathbf{x}_s) \Big] +l_s,$$
• may be averaged over support vectors

### kernel trick

• select simple kernels

# kernels

• polynomial of degeree $\ \ \alpha$
• $$K(\mathbf{u},\mathbf{v}) = \big(\mathbf{u}\cdot\mathbf{v}\big)^\alpha$$
• mixed polynomial
• $$K(\mathbf{u},\mathbf{v}) = \big(1+\mathbf{u}\cdot\mathbf{v}\big)^\alpha$$
• $$K(\mathbf{u},\mathbf{v}) = \exp\left(-\frac{\big(\mathbf{u}-\mathbf{v}\big)^2}{2\sigma^2}\right)$$
• sigmoidal (neuronal)
• $$K(\mathbf{u},\mathbf{v}) = \tanh\left(a\,\mathbf{u}\cdot\mathbf{u}-\theta\right)$$

### kernel optimization

• kernel may depend on (many) parameters
• tune kernel parameters using classification success as objective function
: stochastic search / gradient descent, $\dots$
: ML methods

# SVM kernel code

• change kernel in
double svmKernelFunction()
• classification by
double svmClassifyFunction()
• classfications curve and margins evaluated by bisection in
void findClassCurve()
• Langrange parameter optimized in
double svmKernelOpt()
: converged if all support vectors yield the same offset $\ \ b$
#include <iostream>
#include <sstream>     // for stringstream
#include <stdio.h>     // for printf
#include <string>      // c_str()
#include <cmath>       // for abs, sin, ..
#include <fstream>     // file streams
#include <stdlib.h>    // srand, rand
using namespace std;

/* contains
*
* double nextRandom()
* string xyValuesGnu()
* void plotViaGnuplot()
* void generateClassificationData()
* double svmKernelFunction()
* double svmClassifyFunction()
* double svmKernelOpt()
* void findClassCurve()
* void classifyData()
*/

// ***
// *** random number in [0,1]
// ***
double nextRandom()
{
static bool firstTime = true;
if (firstTime)
{
srand( (unsigned)time( NULL ) );
//   srand( 5450547 );                  // constant seed for testing
firstTime = false;
}
return (rand()/(double)RAND_MAX);
} // end of nextRandom()

// ***
// *** (x,y) string for inline data input for gnuplot
// ***

string xyValuesGnu(double *x, double *y, int nValues, string dataName)
{
stringstream ss;
//
ss << dataName << " << " << "EOD\n";               // defining data block
for (int i=0; i<nValues; i++)
ss << x[i] << " " << y[i] << endl;
ss << "EOD\n";                                     // terminating data block
//
return ss.str();
} // end of xyValuesGnu()

// ***
// *** plot data via gnuplot
// ***
void plotViaGnuplot(string dataNameOne,     string dataToPlotOne,
string goodNameOne,     string goodToPlotOne,
string suppNameOne,     string suppToPlotOne,
string missNameOne,     string missToPlotOne,
string dataNameTwo,     string dataToPlotTwo,
string goodNameTwo,     string goodToPlotTwo,
string suppNameTwo,     string suppToPlotTwo,
string missNameTwo,     string missToPlotTwo,
string dataNameAllCurve,string dataToPlotAllCurve,
string dataNameOneCurve,string dataToPlotOneCurve,
string dataNameTwoCurve,string dataToPlotTwoCurve
)
{
FILE *pipeGnu = popen("gnuplot", "w");            // streaming to gnuplot
fprintf(pipeGnu, "set term qt persist size 1000,800\n");
fprintf(pipeGnu, "set nokey\n");                  // no legend
//
fprintf(pipeGnu, "%s\n",dataToPlotOne.c_str());
fprintf(pipeGnu, "%s\n",goodToPlotOne.c_str());
fprintf(pipeGnu, "%s\n",suppToPlotOne.c_str());
fprintf(pipeGnu, "%s\n",missToPlotOne.c_str());
//
fprintf(pipeGnu, "%s\n",dataToPlotTwo.c_str());
fprintf(pipeGnu, "%s\n",goodToPlotTwo.c_str());
fprintf(pipeGnu, "%s\n",suppToPlotTwo.c_str());
fprintf(pipeGnu, "%s\n",missToPlotTwo.c_str());
//
fprintf(pipeGnu, "%s\n",dataToPlotAllCurve.c_str());
fprintf(pipeGnu, "%s\n",dataToPlotOneCurve.c_str());
fprintf(pipeGnu, "%s\n",dataToPlotTwoCurve.c_str());
//                                                 // plot data
fprintf(pipeGnu,   "plot \"%s\" w points pt 5 ps 4 lt rgb \"blue\"  \n", goodNameOne.c_str());
fprintf(pipeGnu, "replot \"%s\" w points pt 4 ps 4 lt rgb \"blue\"  \n", suppNameOne.c_str());
fprintf(pipeGnu, "replot \"%s\" w points pt 3 ps 4 lt rgb \"blue\"  \n", missNameOne.c_str());
//
fprintf(pipeGnu, "replot \"%s\" w points pt 5 ps 4 lt rgb \"red\"   \n", goodNameTwo.c_str());
fprintf(pipeGnu, "replot \"%s\" w points pt 4 ps 4 lt rgb \"red\"   \n", suppNameTwo.c_str());
fprintf(pipeGnu, "replot \"%s\" w points pt 3 ps 4 lt rgb \"red\"   \n", missNameTwo.c_str());
//
fprintf(pipeGnu, "replot \"%s\" w points pt 7 ps 2 lt rgb \"gold\" \n", dataNameAllCurve.c_str());
fprintf(pipeGnu, "replot \"%s\" w points pt 6 ps 2 lt rgb \"gold\" \n", dataNameOneCurve.c_str());
fprintf(pipeGnu, "replot \"%s\" w points pt 6 ps 2 lt rgb \"gold\" \n", dataNameTwoCurve.c_str());
//
fclose(pipeGnu);    // closing the pipe to gnuplot
} // end of plotViaGnuplot

// ***
// *** generate data to be classified; only for (dim==2)
// ***
template<int nOne, int nTwo>
void generateClassificationData(double dataClassOne[2][nOne], double dataClassTwo[2][nTwo])
{
double angleOne = -0.00*M_PI;          // angles of the large axis of the data with
double angleTwo =  1.10*M_PI;          // respect to the x-axis
double L1One[2], L1Two[2];             // large eigen-directions of data distribution
double L2One[2], L2Two[2];             // minor eigen-directions of data distribution
//
double L1OneLength = 2.5;              // respective lengths
double L2OneLength = 0.2;              // respective lengths
double L1TwoLength = 2.0;
double L2TwoLength = 0.3;
//
double rr, ss;
//
L1One[0] =  L1OneLength*cos(angleOne);
L1One[1] =  L1OneLength*sin(angleOne);
L2One[0] =  L2OneLength*sin(angleOne);   // L2 orthognal to L1
L2One[1] = -L2OneLength*cos(angleOne);
//
L1Two[0] =  L1TwoLength*cos(angleTwo);
L1Two[1] =  L1TwoLength*sin(angleTwo);
L2Two[0] =  L2TwoLength*sin(angleTwo);
L2Two[1] = -L2TwoLength*cos(angleTwo);
//
for (int iOne=0; iOne<nOne; iOne++)     // generate data for first class
{
rr = 2.0*nextRandom()-1.0;
ss = 2.0*nextRandom()-1.0;
dataClassOne[0][iOne] =  1.0 + rr*L1One[0] + ss*L2One[0];  // x component
dataClassOne[1][iOne] =  0.0 + rr*L1One[1] + ss*L2One[1];  // y component
}
//
for (int iTwo=0; iTwo<nTwo; iTwo++)     // generate data for second class
{
rr = 2.0*nextRandom()-1.0;
ss = 2.0*nextRandom()-1.0;
dataClassTwo[0][iTwo] = -1.0 + rr*L1Two[0] + ss*L2Two[0];
dataClassTwo[1][iTwo] =  0.0 + rr*L1Two[1] + ss*L2Two[1];
}
//
if (1==2)                               // test priting
{
for (int iOne=0; iOne<nOne; iOne++)
cout <<  dataClassOne[0][iOne] << " " << dataClassOne[1][iOne] << endl;
cout << endl;
for (int iTwo=0; iTwo<nTwo; iTwo++)
cout <<  dataClassTwo[0][iTwo] << " " << dataClassTwo[1][iTwo] << endl;
}
}    // end of generateClassificationData()

// ***
// *** kernel function
// ***
double svmKernelFunction(double uu[2], double vv[2])
{
double u_u, u_v, v_v;       // scalar prodocts
u_u = uu[0]*uu[0] + uu[1]*uu[1];
u_v = uu[0]*vv[0] + uu[1]*vv[1];
v_v = vv[0]*vv[0] + vv[1]*vv[1];
return u_v;                 // linear classfier
// return u_v+0.05*u_v*u_v;
// return u_v+0.05*u_u*v_v;
} // end of svmKernelFunction()

// ***
// *** discrimination function, returns \pm 1.0
// ***
template<int N>
double svmClassifyFunction(double svmData[2][N], double svmA[N], double svmL[N],
double kVector[2], double svmB)
{
double uu[2];
double rr = -svmB;
//
for (int j=0; j<N; j++)
{
uu[0] = svmData[0][j];
uu[1] = svmData[1][j];
rr += svmA[j]*svmL[j]*svmKernelFunction(uu,kVector);
}
return (rr>0.0)?1.0:(-1.0);
} // end of svmClassifyFunction()

// ***
// *** support vector machine optimization with kernel
// ***
template<int N>
double svmKernelOpt(double svmKernel[N][N], double svmL[N], double svmA[N])
{
int nIter      =  400000;    // fixed number of update iterations
double epsilon =    0.01;    // update rate
double svmC    =   100.0;    // upper threshold for Lagrange parameters
double rr;
double svmA_new[N];          // for synchroneous update
//
for (int iIter=0; iIter<nIter; iIter++)
{
for (int i=0; i<N; i++)
svmA_new[i] = svmA[i];
for (int i=0; i<N; i++)
{
rr = 0.0;
for (int j=0; j<N; j++)
rr += svmKernel[i][j]*svmL[j]*svmA[j];
svmA_new[i] += epsilon*(1.0-svmL[i]*rr);
}
for (int i=0; i<N; i++)
svmA[i] = svmA_new[i];
//                                                           // (2) orthogonalization
rr = 0.0;
for (int i=0; i<N; i++)
rr += svmA[i]*svmL[i];
for (int i=0; i<N; i++)
svmA[i] = svmA[i] - rr*svmL[i]/N;
//                                                           // (3) allowed range
for (int i=0; i<N; i++)
if (svmA[i]<0.0)
svmA[i] = 0.0;
for (int i=0; i<N; i++)
if (svmA[i]>svmC)
svmA[i] = svmC;
} // end of loop over iterations
//
if (1==2)                                                   // test printing
{
cout << "# C  " <<  svmC << endl;
printf("#%4s %5s %10s\n","", "class", "a_i");
for (int i=0; i<N; i++)
printf("#%4d %5d %10.4f\n",i, int(svmL[i]), svmA[i]);
}
//
return svmC;
} // end of svmKernelOpt()

// ***
// *** classification curve
// ***
template<int N, int nC>
void findClassCurve(double svmKernel[N][N], double svmData[2][N],
double svmL[N], double svmA[N], double svmB,
double svmAllCurve[2][nC], int &allCurveN,
double svmOneCurve[2][nC], int &oneCurveN,
double svmTwoCurve[2][nC], int &twoCurveN)
{
double  upper[2];                     // for bisection
double middle[2];
double  lower[2];
double upperYesNo, middleYesNo, lowerYesNo;
//
double xMax = -1000.0;   // range for separating curve
double xMin =  1000.0;
double yMax = -1000.0;
double yMin =  1000.0;
for (int i=0; i<N; i++)
{
if (svmData[0][i]>xMax) xMax = svmData[0][i];
if (svmData[1][i]>yMax) yMax = svmData[1][i];
if (svmData[0][i]<xMin) xMin = svmData[0][i];
if (svmData[1][i]<yMin) yMin = svmData[1][i];
}
//
if (1==2)                // test priniting
{
printf("# xMin, xMax : %8.2f %8.2f\n", xMin, xMax);
printf("# yMin, yMax : %8.2f %8.2f\n", yMin, yMax);
}
// -- --------------------------------------
// -- loop over x-range
// -- whichCurve = 0      : classifing curve
// -- whichCurve = \pm 1  : right/left margin
// -- --------------------------------------
for (int whichCurve=-1; whichCurve<2; whichCurve++)
for (int iC=0; iC<nC; iC++)          // loop over x-range
{
double xx  = xMin + (iC+0.5)*(xMax-xMin)/nC;
//
upper[0] = xx;
upper[1] = yMax;
//
middle[0] = xx;
middle[1] = 0.5*(yMax+yMin);
//
lower[0] = xx;
lower[1] = yMin;
//
upperYesNo = svmClassifyFunction(svmData, svmA, svmL, upper, svmB+whichCurve);
lowerYesNo = svmClassifyFunction(svmData, svmA, svmL, lower, svmB+whichCurve);
// ---
// --- bisection-loop
// ---
int nIter = 30;
if (upperYesNo*lowerYesNo<0.0)                     // of different sign
{
for (int iIter=0; iIter<nIter; iIter++)
{
middleYesNo = svmClassifyFunction(svmData, svmA, svmL, middle, svmB+whichCurve);
if (upperYesNo*middleYesNo<0.0)
{
lower[1] = middle[1];
lowerYesNo = middleYesNo;
}
else
{
upper[1] = middle[1];
upperYesNo = middleYesNo;
}
middle[1] = 0.5*(upper[1]+lower[1]);
}  // end of bisection iteration
//
if (whichCurve==-1)
{
svmOneCurve[0][oneCurveN] = middle[0];
svmOneCurve[1][oneCurveN] = middle[1];
oneCurveN++;
}
//
if (whichCurve==0)
{
svmAllCurve[0][allCurveN] = middle[0];
svmAllCurve[1][allCurveN] = middle[1];
allCurveN++;
}
//
if (whichCurve==1)
{
svmTwoCurve[0][twoCurveN] = middle[0];
svmTwoCurve[1][twoCurveN] = middle[1];
twoCurveN++;
}
}  // end if there is a curve
} // end of loop over x-range
} // end of  findClassCurve()

// ***
// *** classify data for plotting
// ***
template<int N1, int N2>
void classifyData(double dataClassOne[2][N1], double goodClassOne[2][N1],
double suppClassOne[2][N1], double missClassOne[2][N1],
double dataClassTwo[2][N2], double goodClassTwo[2][N2],
double suppClassTwo[2][N2], double missClassTwo[2][N2],
int &N1good, int &N1supp, int &N1miss,
int &N2good, int &N2supp, int &N2miss,
double svmC, double svmA[N1+N2], double svmL[N1+N2],
double svmData[2][N1+N2], double svmB
)
{
double kVector[2];
for (int i=0;i<N1;i++)
{
kVector[0] = svmData[0][i];
kVector[1] = svmData[1][i];
double yesNo = svmClassifyFunction(svmData, svmA, svmL, kVector, svmB);
//
if (yesNo*svmL[i]<0.0)                             // miss-classification
{
missClassOne[0][N1miss] = dataClassOne[0][i];
missClassOne[1][N1miss] = dataClassOne[1][i];
N1miss++;
}
else
if ((svmA[i]>0.001)&&(svmA[i]<(svmC-0.001)))     // support vector
{
suppClassOne[0][N1supp] = dataClassOne[0][i];
suppClassOne[1][N1supp] = dataClassOne[1][i];
N1supp++;
}
else                                           // correctly classified
{
goodClassOne[0][N1good] = dataClassOne[0][i];
goodClassOne[1][N1good] = dataClassOne[1][i];
N1good++;
}
}  // end of i..N1 loop
//
for (int j=0;j<N2;j++)
{
kVector[0] = svmData[0][N1+j];
kVector[1] = svmData[1][N1+j];
double yesNo = svmClassifyFunction(svmData, svmA, svmL, kVector, svmB);
//
if (yesNo*svmL[N1+j]<0.0)
{
missClassTwo[0][N2miss] = dataClassTwo[0][j];
missClassTwo[1][N2miss] = dataClassTwo[1][j];
N2miss++;
}
else
if ((svmA[N1+j]>0.001)&&(svmA[N1+j]<(svmC-0.001)))
{
suppClassTwo[0][N2supp] = dataClassTwo[0][j];
suppClassTwo[1][N2supp] = dataClassTwo[1][j];
N2supp++;
}
else
{
goodClassTwo[0][N2good] = dataClassTwo[0][j];
goodClassTwo[1][N2good] = dataClassTwo[1][j];
N2good++;
}
}  // end of j..N2 loop
} // end of classifyData()

// ***
// *** main
// ***
int main()
{
// --- ----------------------
// --- generate training data
// --- ----------------------
const int N1 = 22;                 // number of training data per class
const int N2 = 22;
double dataClassOne[2][N1];         // class data points
double dataClassTwo[2][N2];
//
generateClassificationData(dataClassOne,dataClassTwo);
// --- -------------------
// --- merge training data
// --- -------------------
const int N = N1+N2;       // combined
double svmData[2][N];      // combined training data
double svmL[N];            // class labels
double svmA[N];            // Lagrange parameters
//
for (int i=0;i<N1;i++)
{
svmData[0][i] = dataClassOne[0][i];  // merge data
svmData[1][i] = dataClassOne[1][i];
svmL[i]       = 1.0;
svmA[i]       = nextRandom();        // (positive) random initialization
}
for (int j=0;j<N2;j++)
{
svmData[0][N1+j] = dataClassTwo[0][j];
svmData[1][N1+j] = dataClassTwo[1][j];
svmL[N1+j]       = -1.0;
svmA[N1+j]       = nextRandom();
}
// --- ----------
// --- SVM kernel
// --- ----------
double svmKernel[N][N];    // the kernel
double uu[2], vv[2];
//
for (int i=0;i<N;i++)
for (int j=0;j<N;j++)
{
uu[0] = svmData[0][i];
uu[1] = svmData[1][i];
vv[0] = svmData[0][j];
vv[1] = svmData[1][j];
svmKernel[i][j] = svmKernelFunction(uu,vv);
}
// --- ----------------------------
// --- SVM (support vector machine)
// --- ----------------------------
double svmB;               // hyperplane offset
double svmC;               // upper limit for Lagrange parameters
double svmDataPlane[2][2]; // orthogonal to weight vector
double svm_m1_Plane[2][2]; // margin one hyperplane
double svm_m2_Plane[2][2]; // margin two hyperplane
//
svmC = svmKernelOpt(svmKernel, svmL, svmA);    // determine the Lagrange parameters
// --- --------------------
// --- find support indices
// --- --------------------
int svmSuppN      = 0;        // total numbe of support vectors
int svmSuppIndices[N];        // set of indices to support vectors
//
for (int i=0; i<N; i++)
if ((svmA[i]>0.001)&&(svmA[i]<(svmC-0.001)))
{
svmSuppIndices[svmSuppN] = i;
svmSuppN++;
}
// --- ----------------------
// --- calculate curve offset
// --- ----------------------
double supportVector[2];
//
for (int iSupp=0; iSupp<svmSuppN; iSupp++)
{
int ss = svmSuppIndices[iSupp];
svmB = -svmL[ss];
for (int j=0; j<N; j++)
svmB  += svmA[j]*svmL[j]*svmKernel[j][ss];
printf("# ss, svmB %6d %10.4f\n", ss, svmB);  // all svmB identical if converged
}
// --- -------
// --- testing
// --- -------
if (1==2)
for (int i=0; i<N; i++)
{
double kVector[2];
kVector[0] = svmData[0][i];
kVector[1] = svmData[1][i];
//
double yesNo = svmClassifyFunction(svmData, svmA, svmL, kVector, svmB);
printf("#%4d %5d %10.4f %6.2f %6.2f\n",
i, int(svmL[i]), svmA[i], yesNo, yesNo*svmL[i]);
}
// --- -------------------------
// --- find classification curve
// --- -------------------------
double svmAllCurve[2][100];   // location of classification curve
double svmOneCurve[2][100];   // of margins
double svmTwoCurve[2][100];
int allCurveN=0, oneCurveN=0, twoCurveN=0;  // # of non-zero entries
//
findClassCurve(svmKernel, svmData,
svmL, svmA, svmB,
svmAllCurve, allCurveN,
svmOneCurve, oneCurveN,
svmTwoCurve, twoCurveN);
// --- -----------------------------------
// --- classify training data for plotting
// --- -----------------------------------
double goodClassOne[2][N1];        // correctly classified data
double suppClassOne[2][N1];        // support vectors
double missClassOne[2][N1];        // miss-classified
double goodClassTwo[2][N2];
double suppClassTwo[2][N2];
double missClassTwo[2][N2];
int N1good=0, N1supp=0, N1miss=0;  // # of correctly classified / SV /
int N2good=0, N2supp=0, N2miss=0;  //   miss-classified
//
classifyData(dataClassOne, goodClassOne,
suppClassOne, missClassOne,
dataClassTwo, goodClassTwo,
suppClassTwo, missClassTwo,
N1good, N1supp, N1miss,
N2good, N2supp, N2miss,
svmC, svmA, svmL,
svmData, svmB);
if (1==1)                          // test printing
{
printf("# N1 good/supp/miss %6d %6d %6d \n", N1good, N1supp, N1miss);
printf("# N2 good/supp/miss %6d %6d %6d \n", N2good, N2supp, N2miss);
}
// --- --------
// --- plotting
// --- --------
string dataNameOne      = "$dataOne"; string goodNameOne = "$goodOne";
string suppNameOne      = "$suppOne"; string missNameOne = "$missOne";
///
string dataNameTwo      = "$dataTwo"; string goodNameTwo = "$goodTwo";
string suppNameTwo      = "$suppTwo"; string missNameTwo = "$missTwo";
//
string dataNameAllCurve = "$dataAllCurve"; string dataNameOneCurve = "$dataOneCurve";
string dataNameTwoCurve = "\$dataTwoCurve";
//
string dataToPlotOne      = xyValuesGnu(dataClassOne[0],
dataClassOne[1],N1    ,dataNameOne);  // data to string
string goodToPlotOne      = xyValuesGnu(goodClassOne[0],
goodClassOne[1],N1good,goodNameOne);  // data to string
string suppToPlotOne      = xyValuesGnu(suppClassOne[0],
suppClassOne[1],N1supp,suppNameOne);  // data to string
string missToPlotOne      = xyValuesGnu(missClassOne[0],
missClassOne[1],N1miss,missNameOne);  // data to string
//
string dataToPlotTwo      = xyValuesGnu(dataClassTwo[0],
dataClassTwo[1],N2    ,dataNameTwo);
string goodToPlotTwo      = xyValuesGnu(goodClassTwo[0],
goodClassTwo[1],N2good,goodNameTwo);
string suppToPlotTwo      = xyValuesGnu(suppClassTwo[0],
suppClassTwo[1],N2supp,suppNameTwo);
string missToPlotTwo      = xyValuesGnu(missClassTwo[0],
missClassTwo[1],N2miss,missNameTwo);
//
string dataToPlotAllCurve = xyValuesGnu(svmAllCurve[0],
svmAllCurve[1],allCurveN,dataNameAllCurve);
string dataToPlotOneCurve = xyValuesGnu(svmOneCurve[0],
svmOneCurve[1],oneCurveN,dataNameOneCurve);
string dataToPlotTwoCurve = xyValuesGnu(svmTwoCurve[0],
svmTwoCurve[1],oneCurveN,dataNameTwoCurve);
//
plotViaGnuplot(dataNameOne,     dataToPlotOne,
goodNameOne,     goodToPlotOne,
suppNameOne,     suppToPlotOne,
missNameOne,     missToPlotOne,
dataNameTwo,     dataToPlotTwo,
goodNameTwo,     goodToPlotTwo,
suppNameTwo,     suppToPlotTwo,
missNameTwo,     missToPlotTwo,
dataNameAllCurve,dataToPlotAllCurve,
dataNameOneCurve,dataToPlotOneCurve,
dataNameTwoCurve,dataToPlotTwoCurve
);
//
return 1;
}  // end of main()