unsupervised learning
clustering
dimensionality reduction
supervised learning
classification
regression
#include <iostream> #include <sstream> // for stringstream #include <stdio.h> // for printf #include <string> // c_str() #include <cmath> // for abs #include <fstream> // file streams #include <stdlib.h> // srand, rand using namespace std; /* contains * * double nextRandom() * string xyValuesGnu() * void plotViaGnuplot() * void gaussElimination() * void generateClassificationData() * void calcCoVarMatrices() * void evaluatePlane() */ // *** // *** random number in [0,1] // *** double nextRandom() { static bool firstTime = true; if (firstTime) { srand( (unsigned)time( NULL ) ); firstTime = false; } return (rand()/(double)RAND_MAX); } // end of nextRandom() // *** // *** (x,y) string for inline data input for gnuplot // *** template<int N> string xyValuesGnu(double (&xyValues)[2][N], string dataName) { stringstream ss; ss << dataName << " << " << "EOD\n"; // defining data block for (int i=0; i<N; i++) ss << xyValues[0][i] << " " << xyValues[1][i] << endl; ss << "EOD\n"; // terminating data block // return ss.str(); } // end of xyValuesGnu() // *** // *** plot data via gnuplot // *** void plotViaGnuplot(string dataNameOne, string dataToPlotOne, string dataNameTwo, string dataToPlotTwo, string dataNamePlane,string dataToPlotPlane, string dataNameBasis,string dataToPlotBasis) { FILE *pipeGnu = popen("gnuplot", "w"); // streaming to gnuplot fprintf(pipeGnu, "set term qt persist size 1000,800\n"); fprintf(pipeGnu, "set nokey\n"); // no legend // fprintf(pipeGnu, "%s\n",dataToPlotOne.c_str()); fprintf(pipeGnu, "%s\n",dataToPlotTwo.c_str()); fprintf(pipeGnu, "%s\n",dataToPlotPlane.c_str()); // send data block fprintf(pipeGnu, "%s\n",dataToPlotBasis.c_str()); // send data block // // plot data fprintf(pipeGnu, "plot \"%s\" w points pt 5 ps 2 \n", dataNameOne.c_str()); fprintf(pipeGnu, "replot \"%s\" w points pt 5 ps 2 \n", dataNameTwo.c_str()); fprintf(pipeGnu, "replot \"%s\" w lines lw 7 \n", dataNamePlane.c_str()); fprintf(pipeGnu, "replot \"%s\" w linesp pt 7 ps 4 lw 4 \n", dataNameBasis.c_str()); // fclose(pipeGnu); // closing the pipe to gnuplot } // end of plotViaGnuplot // *** Gauss elimination of Ax=b with pivoting. // *** On input quadratic matrix A and vector b. // *** On ouput solution x. template<int N> void gaussElimination(double (&A)[N][N], double (&b)[N], double (&x)[N]) { double temp; // temp variable const double smallNumber = 1e-10; // a small number // //--- loop over all columns A[][col]; A[][col] for (int col=0; col<N; col++) { //--- find pivot row; swap pivot with actualy row int pivotRow = col; for (int row=col+1;row<N;row++) if ( abs(A[row][col]) > abs(A[pivotRow][col]) ) pivotRow = row; //--- swap row A[col][] with A[pivotRow][] for (int i=0;i<N;i++) { temp = A[col][i]; A[col][i] = A[pivotRow][i]; A[pivotRow][i] = temp; } //--- swap elements b[col] with b[pivotRow] temp = b[col]; b[col] = b[pivotRow]; b[pivotRow] = temp; //--- throw a warning if matrix is singular or nearly singular if (abs(A[col][col]) <= smallNumber) cerr << "Matrix is singular or nearly singular" << endl; //--- Gauss with pivot within A and b for (int row=col+1;row<N;row++) { double alpha = A[row][col] / A[col][col]; // divide by pivot b[row] -= alpha*b[col]; for (int j=col;j<N;j++) A[row][j] -= alpha * A[col][j]; } } // end of loop over all columns //--- back substitution for (int i=N-1;i>=0;i--) { double sum = 0.0; for (int j=i+1;j<N;j++) sum += A[i][j] * x[j]; x[i] = (b[i] - sum) / A[i][i]; // pivots on diagonal after swapping } } // end of gaussElimination() // *** // *** generate data to be classified; only for (dim==2) // *** template<int nOne, int nTwo> void generateClassificationData(double dataClassOne[2][nOne], double dataClassTwo[2][nTwo]) { double angleOne = 0.75*M_PI; // angles of the large axis of the data with double angleTwo = 0.75*M_PI; // respect to the x-axis double L1One[2], L1Two[2]; // large eigen-directions of data distribution double L2One[2], L2Two[2]; // minor eigen-directions of data distribution // double L1OneLength = 1.5; // respective lengths double L2OneLength = 0.2; // respective lengths double L1TwoLength = 1.0; double L2TwoLength = 0.2; // double rr, ss; // L1One[0] = L1OneLength*cos(angleOne); L1One[1] = L1OneLength*sin(angleOne); L2One[0] = L2OneLength*sin(angleOne); // L2 orthognal to L1 L2One[1] = -L2OneLength*cos(angleOne); // L1Two[0] = L1TwoLength*cos(angleTwo); L1Two[1] = L1TwoLength*sin(angleTwo); L2Two[0] = L2TwoLength*sin(angleTwo); L2Two[1] = -L2TwoLength*cos(angleTwo); // for (int iOne=0; iOne<nOne; iOne++) // generate data for first class { rr = 2.0*nextRandom()-1.0; ss = 2.0*nextRandom()-1.0; dataClassOne[0][iOne] = 1.0 + rr*L1One[0] + ss*L2One[0]; // x component dataClassOne[1][iOne] = 0.0 + rr*L1One[1] + ss*L2One[1]; // y component } // for (int iTwo=0; iTwo<nTwo; iTwo++) // generate data for second class { rr = 2.0*nextRandom()-1.0; ss = 2.0*nextRandom()-1.0; dataClassTwo[0][iTwo] = -1.0 + rr*L1Two[0] + ss*L2Two[0]; dataClassTwo[1][iTwo] = 0.0 + rr*L1Two[1] + ss*L2Two[1]; } // test printing if (1==2) { for (int iOne=0; iOne<nOne; iOne++) cout << dataClassOne[0][iOne] << " " << dataClassOne[1][iOne] << endl; cout << endl; for (int iTwo=0; iTwo<nTwo; iTwo++) cout << dataClassTwo[0][iTwo] << " " << dataClassTwo[1][iTwo] << endl; } } // end of generateClassificationData() // *** // *** calculate the covariance matrices // *** template<int nOne, int nTwo> void calcCoVarMatrices(double dataClassOne[2][nOne], double dataClassTwo[2][nTwo], double coVarOne[2][2], double coVarTwo[2][2], double mOne[2], double mTwo[2], double S[2][2], double deltaM[2]) { mOne[0] = 0; mOne[1] = 0; mTwo[0] = 0; mTwo[1] = 0; // for (int iOne=0; iOne<nOne; iOne++) { mOne[0] += dataClassOne[0][iOne]/nOne; mOne[1] += dataClassOne[1][iOne]/nOne; } for (int iTwo=0; iTwo<nTwo; iTwo++) { mTwo[0] += dataClassTwo[0][iTwo]/nTwo; mTwo[1] += dataClassTwo[1][iTwo]/nTwo; } // deltaM[0] = mTwo[0] - mOne[0]; deltaM[1] = mTwo[1] - mOne[1]; // for (int iOne=0; iOne<nOne; iOne++) for (int i=0; i<2; i++) for (int j=0; j<2; j++) coVarOne[i][j] += (dataClassOne[i][iOne]-mOne[i])* (dataClassOne[j][iOne]-mOne[j])/nOne; for (int iTwo=0; iTwo<nTwo; iTwo++) for (int i=0; i<2; i++) for (int j=0; j<2; j++) coVarTwo[i][j] += (dataClassTwo[i][iTwo]-mTwo[i])* (dataClassTwo[j][iTwo]-mTwo[j])/nTwo; // for (int i=0; i<2; i++) for (int j=0; j<2; j++) S[i][j] = coVarOne[i][j] + coVarTwo[i][j]; // } // end of calcCoVarMatrices() // *** // *** evaluate the plane // *** void evaluatePlane(double w[2], double dataPlane[2][2]) { double rr = sqrt(w[0]*w[0]+w[1]*w[1]); w[0] = w[0]/rr; w[1] = w[1]/rr; // normalization // dataPlane[0][0] = 1.8*w[1]; // orthogonal dataPlane[1][0] = -1.8*w[0]; dataPlane[0][1] = -dataPlane[0][0]; dataPlane[1][1] = -dataPlane[1][0]; // // test printing if (1==1) cout << w[0] << " " << w[1] << endl; } // end of evaluatePlane() // *** // *** main // *** int main() { double dataClassOne[2][900]; // the data points double dataClassTwo[2][800]; double dataBasis[2][2]; // connecting the center of masses // dataBasis[0][0] = -1.0; // (-1,0) dataBasis[1][0] = 0.0; dataBasis[0][1] = 1.0; // (1,0) dataBasis[1][1] = 0.0; // double mOne[2]; // center of masses double mTwo[2]; double deltaM[2]; // mTwo-mOne double w[2]; // feature vector double dataPlane[2][2]; // orthogonal to the feature vector // double coVarOne[2][2]; // covariance matrices double coVarTwo[2][2]; double S[2][2]; // sum of covariance matrices // generateClassificationData(dataClassOne,dataClassTwo); // calcCoVarMatrices(dataClassOne, dataClassTwo, coVarOne, coVarTwo, mOne, mTwo, S, deltaM); // gaussElimination(S,deltaM,w); // solve "S deltaM = w" evaluatePlane(w, dataPlane); // w defines the plane // string dataNameOne = "$dataOne"; // dataName string dataNameTwo = "$dataTwo"; string dataNamePlane = "$dataPlane"; string dataNameBasis = "$dataBasis"; string dataToPlotOne = xyValuesGnu(dataClassOne,dataNameOne); // data to string string dataToPlotTwo = xyValuesGnu(dataClassTwo,dataNameTwo); string dataToPlotPlane = xyValuesGnu(dataPlane,dataNamePlane); string dataToPlotBasis = xyValuesGnu(dataBasis,dataNameBasis); // plotViaGnuplot(dataNameOne ,dataToPlotOne, dataNameTwo ,dataToPlotTwo, dataNamePlane,dataToPlotPlane, dataNameBasis,dataToPlotBasis); return 1; } // end of main()