Advanced Introduction to C++, Scientific Computing and Machine Learning




Claudius Gros, WS 2019/20

Institut für theoretische Physik
Goethe-University Frankfurt a.M.

SVM - Soft Margins & Kernel Trick

non-separable training data



soft contraint

$$ \begin{array}{rclcrcl} \mathbf{w}\cdot\mathbf{x}_i&\ge& b+1-\xi_i &\quad\mbox{for}\quad & y_i&=&+1 \\ \mathbf{w}\cdot\mathbf{x}_i&\le& b-1+\xi_i&\quad\mbox{for}\quad & y_i&=&-1 \\ \end{array} $$

optimization condition

$$ \mathrm{min}\left[ \frac{1}{2}\mathbf{w}^2 +C\,\sum_i \xi \right]\,,\qquad\quad l_i\big(\mathbf{w}\cdot\mathbf{x}_i-b\big)- 1+\xi_i\ge 0 $$

dual form

$$ L_P = \frac{\mathbf{w}^2}{2}+C\,\sum_i\xi_i -\sum_i a_i \Big[l_i\big(\mathbf{w}\cdot\mathbf{x}_i-b\big)-1+\xi_i\Big] -\sum_ib_i\xi_i $$ $$ \frac{\partial L_P}{\partial\mathbf{w}} = 0 \qquad\quad \Rightarrow \qquad\quad \fbox{$\displaystyle\phantom{\big|} \mathbf{w} = \sum_i a_i l_i\mathbf{x}_i \phantom{\big|}$} $$ $$ \frac{\partial L_P}{\partial b} = 0 \qquad\quad \Rightarrow \qquad\quad \fbox{$\displaystyle\phantom{\big|} \sum_i a_i l_i=0 \phantom{\big|}$} $$ $$ \frac{\partial L_P}{\partial \xi_i} = 0 \qquad \Rightarrow \qquad C=a_i+b_i \qquad \Rightarrow \qquad \fbox{$\displaystyle\phantom{\big|} 0\le a_i\le C \phantom{\big|}$} $$

soft margins - numerics

$$ \underset{\{a_l\}}{\mathrm{max}}\left[ \sum_i a_i-\frac{1}{2}\sum_{ij} a_iH_{ij}a_j\right]\,, \qquad 0\le a_i \le C, \qquad \sum_i a_il_i=0 $$

generalized support vectors

$$ a_i \quad\to\quad \left\{\begin{array}{rcl} 0 & & \mathrm{correctly\ classified} \\ ]0,C[ & & \mathrm{support\ vector} \\ C & & \mathrm{miss-classified} \\ \end{array} \right. $$
$$ b=\frac{1}{N_S}\sum_{s\in S}\big(\mathbf{w}\cdot\mathbf{x}_s-l_s\big) $$

SVM code: linear/non-seperable

Copy Copy to clipboad
Downlaod Download
#include <iostream>
#include <sstream>     // for stringstream 
#include <stdio.h>     // for printf
#include <string>      // c_str()
#include <cmath>       // for abs
#include <fstream>     // file streams
#include <stdlib.h>    // srand, rand
using namespace std;

/* contains
 *
 * double nextRandom()
 * string xyValuesGnu()
 * void plotViaGnuplot()
 * void generateClassificationData()
 * void svmGradientAscent() 
 */


// ***
// *** random number in [0,1] 
// ***
double nextRandom()
{ static bool firstTime = true;
 if (firstTime)
   {
   srand( (unsigned)time( NULL ) );
   firstTime = false;
   }
 return (rand()/(double)RAND_MAX); 
} // end of nextRandom()


// ***
// *** (x,y) string for inline data input for gnuplot
// ***

template<int N>
string xyValuesGnu(double (&xyValues)[2][N], string dataName)
{
 stringstream ss;
 ss << dataName << " << " << "EOD\n";       // defining data block
 for (int i=0; i<N; i++)
   ss << xyValues[0][i] << " " << xyValues[1][i] << endl;
 ss << "EOD\n";                             // terminating data block
//
 return ss.str();
} // end of xyValuesGnu()


// *** 
// *** plot data via gnuplot
// *** 

void plotViaGnuplot(string dataNameOne,     string dataToPlotOne,
                    string dataNameTwo,     string dataToPlotTwo,
                    string dataNamePlaneSVM,string dataToPlotPlaneSVM,
                    string dataNameP_m1_SVM,string dataToPlotP_m1_SVM,
                    string dataNameP_m2_SVM,string dataToPlotP_m2_SVM,
                    string dataNameBasis,   string dataToPlotBasis)
{
 FILE *pipeGnu = popen("gnuplot", "w");            // streaming to gnuplot
 fprintf(pipeGnu, "set term qt persist size 1000,800\n");
 fprintf(pipeGnu, "set nokey\n");                  // no legend
//
 fprintf(pipeGnu, "%s\n",dataToPlotOne.c_str());   
 fprintf(pipeGnu, "%s\n",dataToPlotTwo.c_str());   
 fprintf(pipeGnu, "%s\n",dataToPlotPlaneSVM.c_str());
 fprintf(pipeGnu, "%s\n",dataToPlotP_m1_SVM.c_str());
 fprintf(pipeGnu, "%s\n",dataToPlotP_m2_SVM.c_str());
 fprintf(pipeGnu, "%s\n",dataToPlotBasis.c_str());   
//                                                 // plot data
 fprintf(pipeGnu,   "plot \"%s\" w points pt 5 ps 4 \n", dataNameOne.c_str());
 fprintf(pipeGnu, "replot \"%s\" w points pt 5 ps 4 \n", dataNameTwo.c_str());
 fprintf(pipeGnu, "replot \"%s\" w lines lw 7 lt rgb \"gold\" \n", dataNamePlaneSVM.c_str());
 fprintf(pipeGnu, "replot \"%s\" w lines lw 2 lt rgb \"gold\" \n", dataNameP_m1_SVM.c_str());
 fprintf(pipeGnu, "replot \"%s\" w lines lw 2 lt rgb \"gold\" \n", dataNameP_m2_SVM.c_str());
 fprintf(pipeGnu, "replot \"%s\" w linesp pt 7 ps 4 lw 4 \n", dataNameBasis.c_str());
//
 fclose(pipeGnu);    // closing the pipe to gnuplot
} // end of plotViaGnuplot


// ***
// *** generate data to be classified; only for (dim==2)
// ***
template<int nOne, int nTwo>
void generateClassificationData(double dataClassOne[2][nOne], double dataClassTwo[2][nTwo]) 
{
 double angleOne =  0.75*M_PI;          // angles of the large axis of the data with
 double angleTwo = -0.75*M_PI;          // respect to the x-axis
 double L1One[2], L1Two[2];             // large eigen-directions of data distribution
 double L2One[2], L2Two[2];             // minor eigen-directions of data distribution
//
 double L1OneLength = 2.0;              // respective lengths
 double L2OneLength = 0.3;              // respective lengths
 double L1TwoLength = 2.0;         
 double L2TwoLength = 0.3;        
//
 double rr, ss;
//
 L1One[0] =  L1OneLength*cos(angleOne);
 L1One[1] =  L1OneLength*sin(angleOne);
 L2One[0] =  L2OneLength*sin(angleOne);   // L2 orthognal to L1
 L2One[1] = -L2OneLength*cos(angleOne);
//
 L1Two[0] =  L1TwoLength*cos(angleTwo);
 L1Two[1] =  L1TwoLength*sin(angleTwo);
 L2Two[0] =  L2TwoLength*sin(angleTwo);   
 L2Two[1] = -L2TwoLength*cos(angleTwo);
//
 for (int iOne=0; iOne<nOne; iOne++)     // generate data for first class
  {
  rr = 2.0*nextRandom()-1.0;
  ss = 2.0*nextRandom()-1.0;
  dataClassOne[0][iOne] =  1.0 + rr*L1One[0] + ss*L2One[0];  // x component
  dataClassOne[1][iOne] =  0.0 + rr*L1One[1] + ss*L2One[1];  // y component
  }
//
 for (int iTwo=0; iTwo<nTwo; iTwo++)     // generate data for second class
  {
  rr = 2.0*nextRandom()-1.0;
  ss = 2.0*nextRandom()-1.0;
  dataClassTwo[0][iTwo] = -1.0 + rr*L1Two[0] + ss*L2Two[0];
  dataClassTwo[1][iTwo] =  0.0 + rr*L1Two[1] + ss*L2Two[1]; 
  }
// test printing
 if (1==2)
   {
   for (int iOne=0; iOne<nOne; iOne++)   
     cout <<  dataClassOne[0][iOne] << " " << dataClassOne[1][iOne] << endl;
   cout << endl;
   for (int iTwo=0; iTwo<nTwo; iTwo++)   
     cout <<  dataClassTwo[0][iTwo] << " " << dataClassTwo[1][iTwo] << endl;
   }
}    // end of generateClassificationData()
 

// ***
// *** support vector machine optimization
// ***
template<int N>
void svmGradientAscent(double svmData[2][N], double svmL[N], double svmA[N], 
                       double &svmB, double svmW[2], double svmDataPlane[2][2],
                       double svm_m1_Plane[2][2], double svm_m2_Plane[2][2])
{
 int nIter      =  400000;    // fixed number of update iterations
 double epsilon =    0.01;    // update rate
 double svmC    =   100.0;    // upper threshold for Lagrange parameters
 double rr; 
//
 for (int iIter=0; iIter<nIter; iIter++)
   {
   svmW[0] = 0.0;
   svmW[1] = 0.0;
   for (int i=0; i<N; i++)
     for (int k=0; k<2; k++)
        svmW[k] += svmA[i]*svmL[i]*svmData[k][i];
//                                                           // (1) gradient
   for (int i=0; i<N; i++)
     {
     rr = svmW[0]*svmData[0][i] + svmW[1]*svmData[1][i];
     svmA[i] += epsilon*(1.0-svmL[i]*rr);
     }
//                                                           // (2) orthogonalization
   rr = 0.0;
   for (int i=0; i<N; i++)
     rr += svmA[i]*svmL[i];
   for (int i=0; i<N; i++)
     svmA[i] = svmA[i] - rr*svmL[i]/N;
//                                                           // (3) allowed range
   for (int i=0; i<N; i++)
     if (svmA[i]<0.0)
       svmA[i] = 0.0;
   for (int i=0; i<N; i++)
     if (svmA[i]>svmC)
       svmA[i] = svmC;
   } // end of loop over iterations
//
//                                                           // offset
//
 svmB = 0.0;
 int nSupportVectors = 0;
//
 for (int i=0; i<N; i++)
   if ((svmA[i]>0.001)&&(svmA[i]<svmC-0.001))
     {
     nSupportVectors++;
     svmB += svmW[0]*svmData[0][i] + svmW[1]*svmData[1][i] - svmL[i];
//
     cout << "# b  " <<  svmW[0]*svmData[0][i] + svmW[1]*svmData[1][i] - svmL[i] << endl;
     }
 svmB = svmB / nSupportVectors;
//
//                                                           // data plane
//
 rr = sqrt(svmW[0]*svmW[0]+svmW[1]*svmW[1]);
//                                         // w*b/|w|^2   on plane   w*x=b
 svmDataPlane[0][0] = svmB*svmW[0]/(rr*rr) + 1.8*svmW[1]/rr;  
 svmDataPlane[1][0] = svmB*svmW[1]/(rr*rr) - 1.8*svmW[0]/rr;
 svmDataPlane[0][1] = svmB*svmW[0]/(rr*rr) - 1.8*svmW[1]/rr; 
 svmDataPlane[1][1] = svmB*svmW[1]/(rr*rr) + 1.8*svmW[0]/rr;
//                                                           // margin planes
 svm_m1_Plane[0][0] = (svmB-1)*svmW[0]/(rr*rr) + 1.8*svmW[1]/rr;  
 svm_m1_Plane[1][0] = (svmB-1)*svmW[1]/(rr*rr) - 1.8*svmW[0]/rr;
 svm_m1_Plane[0][1] = (svmB-1)*svmW[0]/(rr*rr) - 1.8*svmW[1]/rr; 
 svm_m1_Plane[1][1] = (svmB-1)*svmW[1]/(rr*rr) + 1.8*svmW[0]/rr;
//
 svm_m2_Plane[0][0] = (svmB+1)*svmW[0]/(rr*rr) + 1.8*svmW[1]/rr;  
 svm_m2_Plane[1][0] = (svmB+1)*svmW[1]/(rr*rr) - 1.8*svmW[0]/rr;
 svm_m2_Plane[0][1] = (svmB+1)*svmW[0]/(rr*rr) - 1.8*svmW[1]/rr;
 svm_m2_Plane[1][1] = (svmB+1)*svmW[1]/(rr*rr) + 1.8*svmW[0]/rr;
//
//                                                           // test printout
//
 if (1==1)
   {
   cout << "# C  " <<  svmC << endl;
   printf("#%4s %5s %10s %12s\n","", "class", "a_i", "w*x_i-b");
   for (int i=0; i<N; i++)
     printf("#%4d %5d %10.4f %12.4f\n",i, int(svmL[i]), svmA[i], 
             svmData[0][i]*svmW[0]+svmData[1][i]*svmW[1] - svmB );   // support condition
   }
//
} // end of svmGradientAscent()


// ***
// *** main
// ***
int main() 
{
 const int N1 =  10;                 // number of training data per class
 const int N2 =  10;
 double dataClassOne[2][N1];         // class data points 
 double dataClassTwo[2][N2];
 double dataBasis[2][2];             // connecting the center of masses
//
 dataBasis[0][0] = -1.0;             // (-1,0)
 dataBasis[1][0] =  0.0;    
 dataBasis[0][1] =  1.0;             // (1,0)
 dataBasis[1][1] =  0.0;    
//
 generateClassificationData(dataClassOne,dataClassTwo); 
//
//
// --- ------------------
// --- start code for SVM (support vector machine)
// --- ------------------
 const int N = N1+N2;       // combined 
 double svmData[2][N];      // combined training data
 double svmL[N];            // class labels
 double svmA[N];            // Lagrange parameters
 double svmB;               // hyperplane offset
 double svmW[2];            // weight vector
 double svmDataPlane[2][2]; // orthogonal to weight vector
 double svm_m1_Plane[2][2]; // margin one hyperplane
 double svm_m2_Plane[2][2]; // margin two hyperplane
//
 for (int i=0;i<N1;i++)     
   {
   svmData[0][i] = dataClassOne[0][i];  // merge data
   svmData[1][i] = dataClassOne[1][i]; 
   svmL[i]       = 1.0;
   svmA[i]       = nextRandom();        // (positive) random initialization
   }
 for (int j=0;j<N2;j++)     
   {
   svmData[0][N1+j] = dataClassTwo[0][j]; 
   svmData[1][N1+j] = dataClassTwo[1][j]; 
   svmL[N1+j]       = -1.0;
   svmA[N1+j]       = nextRandom();    
   }
 svmGradientAscent(svmData, svmL, svmA, svmB, svmW, svmDataPlane, svm_m1_Plane, svm_m2_Plane);
// --- ------------------
// ---  end  code for SVM 
// --- ------------------
 string dataNameOne      = "$dataOne";     
 string dataNameTwo      = "$dataTwo";                             
 string dataNamePlaneSVM = "$dataPlaneSVM";                             
 string dataNameP_m1_SVM = "$dataP_m1_SVM";                             
 string dataNameP_m2_SVM = "$dataP_m2_SVM";                             
 string dataNameBasis    = "$dataBasis";                             
 string dataToPlotOne      = xyValuesGnu(dataClassOne,dataNameOne);  // data to string
 string dataToPlotTwo      = xyValuesGnu(dataClassTwo,dataNameTwo);  
 string dataToPlotPlaneSVM = xyValuesGnu(svmDataPlane,dataNamePlaneSVM);  
 string dataToPlotP_m1_SVM = xyValuesGnu(svm_m1_Plane,dataNameP_m1_SVM);  
 string dataToPlotP_m2_SVM = xyValuesGnu(svm_m2_Plane,dataNameP_m2_SVM);  
 string dataToPlotBasis    = xyValuesGnu(dataBasis,dataNameBasis);  
//
//
 plotViaGnuplot(dataNameOne,     dataToPlotOne,
                dataNameTwo,     dataToPlotTwo, 
                dataNamePlaneSVM,dataToPlotPlaneSVM,
                dataNameP_m1_SVM,dataToPlotP_m1_SVM,
                dataNameP_m2_SVM,dataToPlotP_m2_SVM,
                dataNameBasis,   dataToPlotBasis);
//
 return 1;
}  // end of main()

SVM regression



linear regression

$\displaystyle\qquad\quad \mathrm{min}\,\sum_i \big[y_i-(\mathbf{w}\cdot\mathbf{x}_i-b)\big]^2 $

soft SVM

$$ y_i-\mathbf{w}\cdot\mathbf{x}_i+b \le \epsilon+\xi_i, \qquad\quad \mathbf{w}\cdot\mathbf{x}_i-b-y_i \le \epsilon+\xi_i^* $$
$$ \frac{1}{2}\mathbf{w}^2 + C\,\sum_i\big(\xi_i+\xi_i^*\big) $$

non-linear classification



overfitting

given enough parameters,
one can fit an elephant

non-linear transformation

$$ \begin{array}{rcl c rcl c rcl} z_1 &=& x_1 &\quad& z_2&=& x_2 &\quad& z_3&=& x_1^2 \\ z_4 &=& \cos(x_7-x_8) &\quad& z_5&=& \tanh(x_5 x_6^2) &\quad& z_6&=& \sin(3 x_4^3) \\ \end{array} $$

classification

$$ \mathbf{w}_D\cdot\mathbf{z}_i-b \quad \to\quad \left\{ \begin{array}{rcl} >0 && \mbox{first class} \\ <0 && \mbox{second class} \end{array}\right. $$

high-dimenional dual

$$ \underset{\mathbf{a}}{\mathrm{max}}\left[\, \sum_{i=0}^{N-1} a_i-\frac{1}{2}\sum_{i,j=0}^{N-1} a_i H_{ij} a_j \,\right]\,, \qquad\quad H_{ij} = l_i\big(\mathbf{z}_i\cdot\mathbf{z}_j\big)l_j $$ $$ \mathbf{w}_D=\sum_{j=0}^N a_jl_j\mathbf{z}_j, \qquad\quad 0\le a_i\le C, \quad\qquad \sum_{i=0}^{N-1} a_il_i=0 $$

classification variable

$$ \mathbf{w}_D\cdot\mathbf{z}_k-b = \sum_{j=0}^N a_jl_j\big(\mathbf{z}_j\cdot\mathbf{z}_k\big)-b $$

kernel trick

the large dimension D enters only implicitly
via the D-dimensional scalar products
$\ \ \mathbf{z}_i\cdot\mathbf{z}_j$
$$ \mathbf{z}_i\cdot\mathbf{z}_j\quad \to\quad K(\mathbf{x}_i,\mathbf{x}_j) $$ $$ \underset{\mathbf{a}}{\mathrm{max}}\left[\, \sum_{i=0}^{N-1} a_i-\frac{1}{2}\sum_{i,j=0}^{N-1} a_i l_i K(\mathbf{x}_i,\mathbf{x}_j) l_ja_j \,\right]\,, $$
$$ \mathbf{w}_D\cdot\mathbf{z}_k-b = \sum_{j=0}^N a_jl_j \Big[ K(\mathbf{x}_j,\mathbf{x}_k)-K(\mathbf{x}_j,\mathbf{x}_s) \Big] +l_s, $$

kernel trick

kernels

kernel optimization

SVM kernel code

Copy Copy to clipboad
Downlaod Download
#include <iostream>
#include <sstream>     // for stringstream 
#include <stdio.h>     // for printf
#include <string>      // c_str()
#include <cmath>       // for abs, sin, ..
#include <fstream>     // file streams
#include <stdlib.h>    // srand, rand
using namespace std;

/* contains
 *
 * double nextRandom()
 * string xyValuesGnu()
 * void plotViaGnuplot()
 * void generateClassificationData()
 * double svmKernelFunction()
 * double svmClassifyFunction()
 * double svmKernelOpt()
 * void findClassCurve()
 * void classifyData() 
 */


// ***
// *** random number in [0,1] 
// ***
double nextRandom()
{ 
 static bool firstTime = true;
 if (firstTime)
   {
   srand( (unsigned)time( NULL ) );
//   srand( 5450547 );                  // constant seed for testing
   firstTime = false;
   }
 return (rand()/(double)RAND_MAX); 
} // end of nextRandom()


// ***
// *** (x,y) string for inline data input for gnuplot
// ***

string xyValuesGnu(double *x, double *y, int nValues, string dataName)
{
 stringstream ss;
//
 ss << dataName << " << " << "EOD\n";               // defining data block
 for (int i=0; i<nValues; i++)
   ss << x[i] << " " << y[i] << endl;
 ss << "EOD\n";                                     // terminating data block
//
 return ss.str();
} // end of xyValuesGnu()


// *** 
// *** plot data via gnuplot
// *** 
void plotViaGnuplot(string dataNameOne,     string dataToPlotOne,
                    string goodNameOne,     string goodToPlotOne,
                    string suppNameOne,     string suppToPlotOne,
                    string missNameOne,     string missToPlotOne,
                    string dataNameTwo,     string dataToPlotTwo,
                    string goodNameTwo,     string goodToPlotTwo,
                    string suppNameTwo,     string suppToPlotTwo,
                    string missNameTwo,     string missToPlotTwo,
                    string dataNameAllCurve,string dataToPlotAllCurve,
                    string dataNameOneCurve,string dataToPlotOneCurve,
                    string dataNameTwoCurve,string dataToPlotTwoCurve
                   )
{
 FILE *pipeGnu = popen("gnuplot", "w");            // streaming to gnuplot
 fprintf(pipeGnu, "set term qt persist size 1000,800\n");
 fprintf(pipeGnu, "set nokey\n");                  // no legend
//
 fprintf(pipeGnu, "%s\n",dataToPlotOne.c_str());   
 fprintf(pipeGnu, "%s\n",goodToPlotOne.c_str());   
 fprintf(pipeGnu, "%s\n",suppToPlotOne.c_str());   
 fprintf(pipeGnu, "%s\n",missToPlotOne.c_str());   
//
 fprintf(pipeGnu, "%s\n",dataToPlotTwo.c_str());   
 fprintf(pipeGnu, "%s\n",goodToPlotTwo.c_str());   
 fprintf(pipeGnu, "%s\n",suppToPlotTwo.c_str());   
 fprintf(pipeGnu, "%s\n",missToPlotTwo.c_str());   
//
 fprintf(pipeGnu, "%s\n",dataToPlotAllCurve.c_str());   
 fprintf(pipeGnu, "%s\n",dataToPlotOneCurve.c_str());   
 fprintf(pipeGnu, "%s\n",dataToPlotTwoCurve.c_str());   
//                                                 // plot data
 fprintf(pipeGnu,   "plot \"%s\" w points pt 5 ps 4 lt rgb \"blue\"  \n", goodNameOne.c_str());
 fprintf(pipeGnu, "replot \"%s\" w points pt 4 ps 4 lt rgb \"blue\"  \n", suppNameOne.c_str());
 fprintf(pipeGnu, "replot \"%s\" w points pt 3 ps 4 lt rgb \"blue\"  \n", missNameOne.c_str());
//
 fprintf(pipeGnu, "replot \"%s\" w points pt 5 ps 4 lt rgb \"red\"   \n", goodNameTwo.c_str());
 fprintf(pipeGnu, "replot \"%s\" w points pt 4 ps 4 lt rgb \"red\"   \n", suppNameTwo.c_str());
 fprintf(pipeGnu, "replot \"%s\" w points pt 3 ps 4 lt rgb \"red\"   \n", missNameTwo.c_str());
//
 fprintf(pipeGnu, "replot \"%s\" w points pt 7 ps 2 lt rgb \"gold\" \n", dataNameAllCurve.c_str());
 fprintf(pipeGnu, "replot \"%s\" w points pt 6 ps 2 lt rgb \"gold\" \n", dataNameOneCurve.c_str());
 fprintf(pipeGnu, "replot \"%s\" w points pt 6 ps 2 lt rgb \"gold\" \n", dataNameTwoCurve.c_str());
//
 fclose(pipeGnu);    // closing the pipe to gnuplot
} // end of plotViaGnuplot


// ***
// *** generate data to be classified; only for (dim==2)
// ***
template<int nOne, int nTwo>
void generateClassificationData(double dataClassOne[2][nOne], double dataClassTwo[2][nTwo]) 
{
 double angleOne = -0.00*M_PI;          // angles of the large axis of the data with
 double angleTwo =  1.10*M_PI;          // respect to the x-axis
 double L1One[2], L1Two[2];             // large eigen-directions of data distribution
 double L2One[2], L2Two[2];             // minor eigen-directions of data distribution
//
 double L1OneLength = 2.5;              // respective lengths
 double L2OneLength = 0.2;              // respective lengths
 double L1TwoLength = 2.0;         
 double L2TwoLength = 0.3;        
//
 double rr, ss;
//
 L1One[0] =  L1OneLength*cos(angleOne);
 L1One[1] =  L1OneLength*sin(angleOne);
 L2One[0] =  L2OneLength*sin(angleOne);   // L2 orthognal to L1
 L2One[1] = -L2OneLength*cos(angleOne);
//
 L1Two[0] =  L1TwoLength*cos(angleTwo);
 L1Two[1] =  L1TwoLength*sin(angleTwo);
 L2Two[0] =  L2TwoLength*sin(angleTwo);   
 L2Two[1] = -L2TwoLength*cos(angleTwo);
//
 for (int iOne=0; iOne<nOne; iOne++)     // generate data for first class
  {
  rr = 2.0*nextRandom()-1.0;
  ss = 2.0*nextRandom()-1.0;
  dataClassOne[0][iOne] =  1.0 + rr*L1One[0] + ss*L2One[0];  // x component
  dataClassOne[1][iOne] =  0.0 + rr*L1One[1] + ss*L2One[1];  // y component
  }
//
 for (int iTwo=0; iTwo<nTwo; iTwo++)     // generate data for second class
  {
  rr = 2.0*nextRandom()-1.0;
  ss = 2.0*nextRandom()-1.0;
  dataClassTwo[0][iTwo] = -1.0 + rr*L1Two[0] + ss*L2Two[0];
  dataClassTwo[1][iTwo] =  0.0 + rr*L1Two[1] + ss*L2Two[1]; 
  }
// 
 if (1==2)                               // test priting
   {
   for (int iOne=0; iOne<nOne; iOne++)   
     cout <<  dataClassOne[0][iOne] << " " << dataClassOne[1][iOne] << endl;
   cout << endl;
   for (int iTwo=0; iTwo<nTwo; iTwo++)   
     cout <<  dataClassTwo[0][iTwo] << " " << dataClassTwo[1][iTwo] << endl;
   }
}    // end of generateClassificationData()
 

// ***
// *** kernel function
// ***
double svmKernelFunction(double uu[2], double vv[2])
{
 double u_u, u_v, v_v;       // scalar prodocts
 u_u = uu[0]*uu[0] + uu[1]*uu[1];
 u_v = uu[0]*vv[0] + uu[1]*vv[1];
 v_v = vv[0]*vv[0] + vv[1]*vv[1];
 return u_v;                 // linear classfier
// return u_v+0.05*u_v*u_v;
// return u_v+0.05*u_u*v_v;
} // end of svmKernelFunction()


// ***
// *** discrimination function, returns \pm 1.0
// ***
template<int N>
double svmClassifyFunction(double svmData[2][N], double svmA[N], double svmL[N],
                           double kVector[2], double svmB)
{
 double uu[2];
 double rr = -svmB;
// 
 for (int j=0; j<N; j++)
   {
   uu[0] = svmData[0][j];
   uu[1] = svmData[1][j];
   rr += svmA[j]*svmL[j]*svmKernelFunction(uu,kVector);
   }
 return (rr>0.0)?1.0:(-1.0);
} // end of svmClassifyFunction()


// ***
// *** support vector machine optimization with kernel
// ***
template<int N>
double svmKernelOpt(double svmKernel[N][N], double svmL[N], double svmA[N])
{
 int nIter      =  400000;    // fixed number of update iterations
 double epsilon =    0.01;    // update rate
 double svmC    =   100.0;    // upper threshold for Lagrange parameters
 double rr; 
 double svmA_new[N];          // for synchroneous update
//
 for (int iIter=0; iIter<nIter; iIter++)
   {
//                                                           // (1) gradient
   for (int i=0; i<N; i++)
     svmA_new[i] = svmA[i];
   for (int i=0; i<N; i++)
     {
     rr = 0.0;
     for (int j=0; j<N; j++)
       rr += svmKernel[i][j]*svmL[j]*svmA[j];
     svmA_new[i] += epsilon*(1.0-svmL[i]*rr);
     }
   for (int i=0; i<N; i++)
     svmA[i] = svmA_new[i];
//                                                           // (2) orthogonalization
   rr = 0.0;
   for (int i=0; i<N; i++)
     rr += svmA[i]*svmL[i];
   for (int i=0; i<N; i++)
     svmA[i] = svmA[i] - rr*svmL[i]/N;
//                                                           // (3) allowed range
   for (int i=0; i<N; i++)
     if (svmA[i]<0.0)
       svmA[i] = 0.0;
   for (int i=0; i<N; i++)
     if (svmA[i]>svmC)
       svmA[i] = svmC;
   } // end of loop over iterations
//
 if (1==2)                                                   // test printing
   {
   cout << "# C  " <<  svmC << endl;
   printf("#%4s %5s %10s\n","", "class", "a_i");
   for (int i=0; i<N; i++)
     printf("#%4d %5d %10.4f\n",i, int(svmL[i]), svmA[i]);
   }
//
 return svmC;
} // end of svmKernelOpt()


// ***
// *** classification curve
// ***
template<int N, int nC>
void findClassCurve(double svmKernel[N][N], double svmData[2][N], 
                    double svmL[N], double svmA[N], double svmB,
                    double svmAllCurve[2][nC], int &allCurveN,
                    double svmOneCurve[2][nC], int &oneCurveN,
                    double svmTwoCurve[2][nC], int &twoCurveN)
{
 double  upper[2];                     // for bisection
 double middle[2]; 
 double  lower[2]; 
 double upperYesNo, middleYesNo, lowerYesNo;
//
 double xMax = -1000.0;   // range for separating curve
 double xMin =  1000.0;
 double yMax = -1000.0;
 double yMin =  1000.0;
 for (int i=0; i<N; i++)
   {
   if (svmData[0][i]>xMax) xMax = svmData[0][i];
   if (svmData[1][i]>yMax) yMax = svmData[1][i];
   if (svmData[0][i]<xMin) xMin = svmData[0][i];
   if (svmData[1][i]<yMin) yMin = svmData[1][i];
   }
//
 if (1==2)                // test priniting
   {
   printf("# xMin, xMax : %8.2f %8.2f\n", xMin, xMax);
   printf("# yMin, yMax : %8.2f %8.2f\n", yMin, yMax);
   }
// -- --------------------------------------
// -- loop over x-range 
// -- whichCurve = 0      : classifing curve
// -- whichCurve = \pm 1  : right/left margin
// -- --------------------------------------
 for (int whichCurve=-1; whichCurve<2; whichCurve++)
 for (int iC=0; iC<nC; iC++)          // loop over x-range
   {
   double xx  = xMin + (iC+0.5)*(xMax-xMin)/nC;
//
   upper[0] = xx;
   upper[1] = yMax;
//
   middle[0] = xx;
   middle[1] = 0.5*(yMax+yMin);
//
   lower[0] = xx;
   lower[1] = yMin;
//
   upperYesNo = svmClassifyFunction(svmData, svmA, svmL, upper, svmB+whichCurve);
   lowerYesNo = svmClassifyFunction(svmData, svmA, svmL, lower, svmB+whichCurve);
// ---
// --- bisection-loop
// ---
   int nIter = 30;
   if (upperYesNo*lowerYesNo<0.0)                     // of different sign
     {
     for (int iIter=0; iIter<nIter; iIter++)
       {
       middleYesNo = svmClassifyFunction(svmData, svmA, svmL, middle, svmB+whichCurve);
       if (upperYesNo*middleYesNo<0.0)
         {
         lower[1] = middle[1];
         lowerYesNo = middleYesNo;
         }
       else
         {
         upper[1] = middle[1];
         upperYesNo = middleYesNo;
         }
       middle[1] = 0.5*(upper[1]+lower[1]);
       }  // end of bisection iteration
//
     if (whichCurve==-1)
       {
       svmOneCurve[0][oneCurveN] = middle[0];
       svmOneCurve[1][oneCurveN] = middle[1];
       oneCurveN++;
       }
//
     if (whichCurve==0)
       {
       svmAllCurve[0][allCurveN] = middle[0];
       svmAllCurve[1][allCurveN] = middle[1];
       allCurveN++;
       }
//
     if (whichCurve==1)
       {
       svmTwoCurve[0][twoCurveN] = middle[0];
       svmTwoCurve[1][twoCurveN] = middle[1];
       twoCurveN++;
       }
     }  // end if there is a curve
   } // end of loop over x-range
} // end of  findClassCurve()


// ***
// *** classify data for plotting
// ***
template<int N1, int N2>
void classifyData(double dataClassOne[2][N1], double goodClassOne[2][N1],
                  double suppClassOne[2][N1], double missClassOne[2][N1], 
                  double dataClassTwo[2][N2], double goodClassTwo[2][N2], 
                  double suppClassTwo[2][N2], double missClassTwo[2][N2],
                  int &N1good, int &N1supp, int &N1miss,
                  int &N2good, int &N2supp, int &N2miss,
                  double svmC, double svmA[N1+N2], double svmL[N1+N2],
                  double svmData[2][N1+N2], double svmB
                  )
{
 double kVector[2];
 for (int i=0;i<N1;i++)     
   {
   kVector[0] = svmData[0][i];
   kVector[1] = svmData[1][i];
   double yesNo = svmClassifyFunction(svmData, svmA, svmL, kVector, svmB);
//
   if (yesNo*svmL[i]<0.0)                             // miss-classification
       {
       missClassOne[0][N1miss] = dataClassOne[0][i]; 
       missClassOne[1][N1miss] = dataClassOne[1][i]; 
       N1miss++;
       }
   else
     if ((svmA[i]>0.001)&&(svmA[i]<(svmC-0.001)))     // support vector
       {
       suppClassOne[0][N1supp] = dataClassOne[0][i]; 
       suppClassOne[1][N1supp] = dataClassOne[1][i]; 
       N1supp++;
       }
       else                                           // correctly classified
       {
       goodClassOne[0][N1good] = dataClassOne[0][i]; 
       goodClassOne[1][N1good] = dataClassOne[1][i]; 
       N1good++;
       }
   }  // end of i..N1 loop
//
 for (int j=0;j<N2;j++)     
   {
   kVector[0] = svmData[0][N1+j];
   kVector[1] = svmData[1][N1+j];
   double yesNo = svmClassifyFunction(svmData, svmA, svmL, kVector, svmB);
//
   if (yesNo*svmL[N1+j]<0.0)
       {
       missClassTwo[0][N2miss] = dataClassTwo[0][j]; 
       missClassTwo[1][N2miss] = dataClassTwo[1][j]; 
       N2miss++;
       }
   else
     if ((svmA[N1+j]>0.001)&&(svmA[N1+j]<(svmC-0.001)))
       {
       suppClassTwo[0][N2supp] = dataClassTwo[0][j]; 
       suppClassTwo[1][N2supp] = dataClassTwo[1][j]; 
       N2supp++;
       }
       else
       {
       goodClassTwo[0][N2good] = dataClassTwo[0][j]; 
       goodClassTwo[1][N2good] = dataClassTwo[1][j]; 
       N2good++;
       }
   }  // end of j..N2 loop
} // end of classifyData()


// ***
// *** main
// ***
int main() 
{
// --- ----------------------
// --- generate training data
// --- ----------------------
 const int N1 = 22;                 // number of training data per class
 const int N2 = 22;
 double dataClassOne[2][N1];         // class data points 
 double dataClassTwo[2][N2];
//
 generateClassificationData(dataClassOne,dataClassTwo); 
// --- -------------------
// --- merge training data
// --- -------------------
 const int N = N1+N2;       // combined 
 double svmData[2][N];      // combined training data
 double svmL[N];            // class labels
 double svmA[N];            // Lagrange parameters
//
 for (int i=0;i<N1;i++)     
   {
   svmData[0][i] = dataClassOne[0][i];  // merge data
   svmData[1][i] = dataClassOne[1][i]; 
   svmL[i]       = 1.0;
   svmA[i]       = nextRandom();        // (positive) random initialization
   }
 for (int j=0;j<N2;j++)     
   {
   svmData[0][N1+j] = dataClassTwo[0][j]; 
   svmData[1][N1+j] = dataClassTwo[1][j]; 
   svmL[N1+j]       = -1.0;
   svmA[N1+j]       = nextRandom();    
   }
// --- ----------
// --- SVM kernel
// --- ----------
 double svmKernel[N][N];    // the kernel
 double uu[2], vv[2];
//
 for (int i=0;i<N;i++)     
   for (int j=0;j<N;j++)     
     {
     uu[0] = svmData[0][i];
     uu[1] = svmData[1][i];
     vv[0] = svmData[0][j];
     vv[1] = svmData[1][j];
     svmKernel[i][j] = svmKernelFunction(uu,vv);
     }
// --- ----------------------------
// --- SVM (support vector machine)
// --- ----------------------------
 double svmB;               // hyperplane offset
 double svmC;               // upper limit for Lagrange parameters
 double svmDataPlane[2][2]; // orthogonal to weight vector
 double svm_m1_Plane[2][2]; // margin one hyperplane
 double svm_m2_Plane[2][2]; // margin two hyperplane
//
 svmC = svmKernelOpt(svmKernel, svmL, svmA);    // determine the Lagrange parameters
// --- --------------------
// --- find support indices
// --- --------------------
 int svmSuppN      = 0;        // total numbe of support vectors
 int svmSuppIndices[N];        // set of indices to support vectors
//
 for (int i=0; i<N; i++)
   if ((svmA[i]>0.001)&&(svmA[i]<(svmC-0.001)))
     {
     svmSuppIndices[svmSuppN] = i;
     svmSuppN++;
     }
// --- ----------------------
// --- calculate curve offset
// --- ----------------------
 double supportVector[2];
// 
 for (int iSupp=0; iSupp<svmSuppN; iSupp++)
   {
   int ss = svmSuppIndices[iSupp];
   svmB = -svmL[ss];
   for (int j=0; j<N; j++)
     svmB  += svmA[j]*svmL[j]*svmKernel[j][ss];
   printf("# ss, svmB %6d %10.4f\n", ss, svmB);  // all svmB identical if converged
   }
// --- -------
// --- testing
// --- -------
 if (1==2)
   for (int i=0; i<N; i++)
     {
     double kVector[2];
     kVector[0] = svmData[0][i];
     kVector[1] = svmData[1][i];
//
     double yesNo = svmClassifyFunction(svmData, svmA, svmL, kVector, svmB);
     printf("#%4d %5d %10.4f %6.2f %6.2f\n",  
             i, int(svmL[i]), svmA[i], yesNo, yesNo*svmL[i]);
     }
// --- -------------------------
// --- find classification curve
// --- -------------------------
 double svmAllCurve[2][100];   // location of classification curve
 double svmOneCurve[2][100];   // of margins
 double svmTwoCurve[2][100];
 int allCurveN=0, oneCurveN=0, twoCurveN=0;  // # of non-zero entries
//
 findClassCurve(svmKernel, svmData,
                svmL, svmA, svmB,
                svmAllCurve, allCurveN,
                svmOneCurve, oneCurveN,
                svmTwoCurve, twoCurveN);
// --- -----------------------------------
// --- classify training data for plotting
// --- -----------------------------------
 double goodClassOne[2][N1];        // correctly classified data
 double suppClassOne[2][N1];        // support vectors
 double missClassOne[2][N1];        // miss-classified 
 double goodClassTwo[2][N2];     
 double suppClassTwo[2][N2];    
 double missClassTwo[2][N2];   
 int N1good=0, N1supp=0, N1miss=0;  // # of correctly classified / SV / 
 int N2good=0, N2supp=0, N2miss=0;  //   miss-classified 
//
 classifyData(dataClassOne, goodClassOne,
              suppClassOne, missClassOne,
              dataClassTwo, goodClassTwo,
              suppClassTwo, missClassTwo,
              N1good, N1supp, N1miss,
              N2good, N2supp, N2miss,
              svmC, svmA, svmL, 
              svmData, svmB);
 if (1==1)                          // test printing
   {
   printf("# N1 good/supp/miss %6d %6d %6d \n", N1good, N1supp, N1miss);
   printf("# N2 good/supp/miss %6d %6d %6d \n", N2good, N2supp, N2miss);
   }
// --- --------
// --- plotting
// --- --------
 string dataNameOne      = "$dataOne";     
 string goodNameOne      = "$goodOne";     
 string suppNameOne      = "$suppOne";     
 string missNameOne      = "$missOne";     
///
 string dataNameTwo      = "$dataTwo";                             
 string goodNameTwo      = "$goodTwo";                             
 string suppNameTwo      = "$suppTwo";                             
 string missNameTwo      = "$missTwo";                             
//
 string dataNameAllCurve = "$dataAllCurve";                             
 string dataNameOneCurve = "$dataOneCurve";                             
 string dataNameTwoCurve = "$dataTwoCurve";                             
//
 string dataToPlotOne      = xyValuesGnu(dataClassOne[0],
                                         dataClassOne[1],N1    ,dataNameOne);  // data to string
 string goodToPlotOne      = xyValuesGnu(goodClassOne[0],
                                         goodClassOne[1],N1good,goodNameOne);  // data to string
 string suppToPlotOne      = xyValuesGnu(suppClassOne[0],
                                         suppClassOne[1],N1supp,suppNameOne);  // data to string
 string missToPlotOne      = xyValuesGnu(missClassOne[0],
                                         missClassOne[1],N1miss,missNameOne);  // data to string
//
 string dataToPlotTwo      = xyValuesGnu(dataClassTwo[0],
                                         dataClassTwo[1],N2    ,dataNameTwo);  
 string goodToPlotTwo      = xyValuesGnu(goodClassTwo[0],
                                         goodClassTwo[1],N2good,goodNameTwo);  
 string suppToPlotTwo      = xyValuesGnu(suppClassTwo[0],
                                         suppClassTwo[1],N2supp,suppNameTwo);  
 string missToPlotTwo      = xyValuesGnu(missClassTwo[0],
                                         missClassTwo[1],N2miss,missNameTwo);  
//
 string dataToPlotAllCurve = xyValuesGnu(svmAllCurve[0],
                                         svmAllCurve[1],allCurveN,dataNameAllCurve);  
 string dataToPlotOneCurve = xyValuesGnu(svmOneCurve[0],
                                         svmOneCurve[1],oneCurveN,dataNameOneCurve);  
 string dataToPlotTwoCurve = xyValuesGnu(svmTwoCurve[0],
                                         svmTwoCurve[1],oneCurveN,dataNameTwoCurve);  
//
 plotViaGnuplot(dataNameOne,     dataToPlotOne,
                goodNameOne,     goodToPlotOne,
                suppNameOne,     suppToPlotOne,
                missNameOne,     missToPlotOne,
                dataNameTwo,     dataToPlotTwo, 
                goodNameTwo,     goodToPlotTwo, 
                suppNameTwo,     suppToPlotTwo, 
                missNameTwo,     missToPlotTwo, 
                dataNameAllCurve,dataToPlotAllCurve,
                dataNameOneCurve,dataToPlotOneCurve,
                dataNameTwoCurve,dataToPlotTwoCurve
               );
//
 return 1;
}  // end of main()