unsupervised learning
clustering
dimensionality reduction
supervised learning
classification
regression
#include <iostream>
#include <sstream> // for stringstream
#include <stdio.h> // for printf
#include <string> // c_str()
#include <cmath> // for abs
#include <fstream> // file streams
#include <stdlib.h> // srand, rand
using namespace std;
/* contains
*
* double nextRandom()
* string xyValuesGnu()
* void plotViaGnuplot()
* void gaussElimination()
* void generateClassificationData()
* void calcCoVarMatrices()
* void evaluatePlane()
*/
// ***
// *** random number in [0,1]
// ***
double nextRandom()
{ static bool firstTime = true;
if (firstTime)
{
srand( (unsigned)time( NULL ) );
firstTime = false;
}
return (rand()/(double)RAND_MAX);
} // end of nextRandom()
// ***
// *** (x,y) string for inline data input for gnuplot
// ***
template<int N>
string xyValuesGnu(double (&xyValues)[2][N], string dataName)
{
stringstream ss;
ss << dataName << " << " << "EOD\n"; // defining data block
for (int i=0; i<N; i++)
ss << xyValues[0][i] << " " << xyValues[1][i] << endl;
ss << "EOD\n"; // terminating data block
//
return ss.str();
} // end of xyValuesGnu()
// ***
// *** plot data via gnuplot
// ***
void plotViaGnuplot(string dataNameOne, string dataToPlotOne,
string dataNameTwo, string dataToPlotTwo,
string dataNamePlane,string dataToPlotPlane,
string dataNameBasis,string dataToPlotBasis)
{
FILE *pipeGnu = popen("gnuplot", "w"); // streaming to gnuplot
fprintf(pipeGnu, "set term qt persist size 1000,800\n");
fprintf(pipeGnu, "set nokey\n"); // no legend
//
fprintf(pipeGnu, "%s\n",dataToPlotOne.c_str());
fprintf(pipeGnu, "%s\n",dataToPlotTwo.c_str());
fprintf(pipeGnu, "%s\n",dataToPlotPlane.c_str()); // send data block
fprintf(pipeGnu, "%s\n",dataToPlotBasis.c_str()); // send data block
// // plot data
fprintf(pipeGnu, "plot \"%s\" w points pt 5 ps 2 \n", dataNameOne.c_str());
fprintf(pipeGnu, "replot \"%s\" w points pt 5 ps 2 \n", dataNameTwo.c_str());
fprintf(pipeGnu, "replot \"%s\" w lines lw 7 \n", dataNamePlane.c_str());
fprintf(pipeGnu, "replot \"%s\" w linesp pt 7 ps 4 lw 4 \n", dataNameBasis.c_str());
//
fclose(pipeGnu); // closing the pipe to gnuplot
} // end of plotViaGnuplot
// *** Gauss elimination of Ax=b with pivoting.
// *** On input quadratic matrix A and vector b.
// *** On ouput solution x.
template<int N>
void gaussElimination(double (&A)[N][N], double (&b)[N], double (&x)[N])
{
double temp; // temp variable
const double smallNumber = 1e-10; // a small number
//
//--- loop over all columns A[][col]; A[][col]
for (int col=0; col<N; col++)
{
//--- find pivot row; swap pivot with actualy row
int pivotRow = col;
for (int row=col+1;row<N;row++)
if ( abs(A[row][col]) > abs(A[pivotRow][col]) )
pivotRow = row;
//--- swap row A[col][] with A[pivotRow][]
for (int i=0;i<N;i++)
{
temp = A[col][i];
A[col][i] = A[pivotRow][i];
A[pivotRow][i] = temp;
}
//--- swap elements b[col] with b[pivotRow]
temp = b[col];
b[col] = b[pivotRow];
b[pivotRow] = temp;
//--- throw a warning if matrix is singular or nearly singular
if (abs(A[col][col]) <= smallNumber)
cerr << "Matrix is singular or nearly singular" << endl;
//--- Gauss with pivot within A and b
for (int row=col+1;row<N;row++)
{
double alpha = A[row][col] / A[col][col]; // divide by pivot
b[row] -= alpha*b[col];
for (int j=col;j<N;j++)
A[row][j] -= alpha * A[col][j];
}
} // end of loop over all columns
//--- back substitution
for (int i=N-1;i>=0;i--)
{
double sum = 0.0;
for (int j=i+1;j<N;j++)
sum += A[i][j] * x[j];
x[i] = (b[i] - sum) / A[i][i]; // pivots on diagonal after swapping
}
} // end of gaussElimination()
// ***
// *** generate data to be classified; only for (dim==2)
// ***
template<int nOne, int nTwo>
void generateClassificationData(double dataClassOne[2][nOne], double dataClassTwo[2][nTwo])
{
double angleOne = 0.75*M_PI; // angles of the large axis of the data with
double angleTwo = 0.75*M_PI; // respect to the x-axis
double L1One[2], L1Two[2]; // large eigen-directions of data distribution
double L2One[2], L2Two[2]; // minor eigen-directions of data distribution
//
double L1OneLength = 1.5; // respective lengths
double L2OneLength = 0.2; // respective lengths
double L1TwoLength = 1.0;
double L2TwoLength = 0.2;
//
double rr, ss;
//
L1One[0] = L1OneLength*cos(angleOne);
L1One[1] = L1OneLength*sin(angleOne);
L2One[0] = L2OneLength*sin(angleOne); // L2 orthognal to L1
L2One[1] = -L2OneLength*cos(angleOne);
//
L1Two[0] = L1TwoLength*cos(angleTwo);
L1Two[1] = L1TwoLength*sin(angleTwo);
L2Two[0] = L2TwoLength*sin(angleTwo);
L2Two[1] = -L2TwoLength*cos(angleTwo);
//
for (int iOne=0; iOne<nOne; iOne++) // generate data for first class
{
rr = 2.0*nextRandom()-1.0;
ss = 2.0*nextRandom()-1.0;
dataClassOne[0][iOne] = 1.0 + rr*L1One[0] + ss*L2One[0]; // x component
dataClassOne[1][iOne] = 0.0 + rr*L1One[1] + ss*L2One[1]; // y component
}
//
for (int iTwo=0; iTwo<nTwo; iTwo++) // generate data for second class
{
rr = 2.0*nextRandom()-1.0;
ss = 2.0*nextRandom()-1.0;
dataClassTwo[0][iTwo] = -1.0 + rr*L1Two[0] + ss*L2Two[0];
dataClassTwo[1][iTwo] = 0.0 + rr*L1Two[1] + ss*L2Two[1];
}
// test printing
if (1==2)
{
for (int iOne=0; iOne<nOne; iOne++)
cout << dataClassOne[0][iOne] << " " << dataClassOne[1][iOne] << endl;
cout << endl;
for (int iTwo=0; iTwo<nTwo; iTwo++)
cout << dataClassTwo[0][iTwo] << " " << dataClassTwo[1][iTwo] << endl;
}
} // end of generateClassificationData()
// ***
// *** calculate the covariance matrices
// ***
template<int nOne, int nTwo>
void calcCoVarMatrices(double dataClassOne[2][nOne], double dataClassTwo[2][nTwo],
double coVarOne[2][2], double coVarTwo[2][2],
double mOne[2], double mTwo[2],
double S[2][2], double deltaM[2])
{
mOne[0] = 0;
mOne[1] = 0;
mTwo[0] = 0;
mTwo[1] = 0;
//
for (int iOne=0; iOne<nOne; iOne++)
{
mOne[0] += dataClassOne[0][iOne]/nOne;
mOne[1] += dataClassOne[1][iOne]/nOne;
}
for (int iTwo=0; iTwo<nTwo; iTwo++)
{
mTwo[0] += dataClassTwo[0][iTwo]/nTwo;
mTwo[1] += dataClassTwo[1][iTwo]/nTwo;
}
//
deltaM[0] = mTwo[0] - mOne[0];
deltaM[1] = mTwo[1] - mOne[1];
//
for (int iOne=0; iOne<nOne; iOne++)
for (int i=0; i<2; i++)
for (int j=0; j<2; j++)
coVarOne[i][j] += (dataClassOne[i][iOne]-mOne[i])*
(dataClassOne[j][iOne]-mOne[j])/nOne;
for (int iTwo=0; iTwo<nTwo; iTwo++)
for (int i=0; i<2; i++)
for (int j=0; j<2; j++)
coVarTwo[i][j] += (dataClassTwo[i][iTwo]-mTwo[i])*
(dataClassTwo[j][iTwo]-mTwo[j])/nTwo;
//
for (int i=0; i<2; i++)
for (int j=0; j<2; j++)
S[i][j] = coVarOne[i][j] + coVarTwo[i][j];
//
} // end of calcCoVarMatrices()
// ***
// *** evaluate the plane
// ***
void evaluatePlane(double w[2], double dataPlane[2][2])
{
double rr = sqrt(w[0]*w[0]+w[1]*w[1]);
w[0] = w[0]/rr;
w[1] = w[1]/rr; // normalization
//
dataPlane[0][0] = 1.8*w[1]; // orthogonal
dataPlane[1][0] = -1.8*w[0];
dataPlane[0][1] = -dataPlane[0][0];
dataPlane[1][1] = -dataPlane[1][0];
//
// test printing
if (1==1)
cout << w[0] << " " << w[1] << endl;
} // end of evaluatePlane()
// ***
// *** main
// ***
int main()
{
double dataClassOne[2][900]; // the data points
double dataClassTwo[2][800];
double dataBasis[2][2]; // connecting the center of masses
//
dataBasis[0][0] = -1.0; // (-1,0)
dataBasis[1][0] = 0.0;
dataBasis[0][1] = 1.0; // (1,0)
dataBasis[1][1] = 0.0;
//
double mOne[2]; // center of masses
double mTwo[2];
double deltaM[2]; // mTwo-mOne
double w[2]; // feature vector
double dataPlane[2][2]; // orthogonal to the feature vector
//
double coVarOne[2][2]; // covariance matrices
double coVarTwo[2][2];
double S[2][2]; // sum of covariance matrices
//
generateClassificationData(dataClassOne,dataClassTwo);
//
calcCoVarMatrices(dataClassOne, dataClassTwo, coVarOne, coVarTwo,
mOne, mTwo, S, deltaM);
//
gaussElimination(S,deltaM,w); // solve "S deltaM = w"
evaluatePlane(w, dataPlane); // w defines the plane
//
string dataNameOne = "$dataOne"; // dataName
string dataNameTwo = "$dataTwo";
string dataNamePlane = "$dataPlane";
string dataNameBasis = "$dataBasis";
string dataToPlotOne = xyValuesGnu(dataClassOne,dataNameOne); // data to string
string dataToPlotTwo = xyValuesGnu(dataClassTwo,dataNameTwo);
string dataToPlotPlane = xyValuesGnu(dataPlane,dataNamePlane);
string dataToPlotBasis = xyValuesGnu(dataBasis,dataNameBasis);
//
plotViaGnuplot(dataNameOne ,dataToPlotOne,
dataNameTwo ,dataToPlotTwo,
dataNamePlane,dataToPlotPlane,
dataNameBasis,dataToPlotBasis);
return 1;
} // end of main()