ECOCPAK: fn_sparse_random_ecoc.hpp Source File

ECOCPAK v0.9
00001 // Copyright (C) 2011 the authors listed below
00002 // http://ecocpak.sourceforge.net
00003 //
00004 // Authors:
00005 // - Dimitrios Bouzas (bouzas at ieee dot org)
00006 // - Nikolaos Arvanitopoulos (niarvani at ieee dot org)
00007 // - Anastasios Tefas (tefas at aiia dot csd dot auth dot gr)
00008 //
00009 // This file is part of the ECOC PAK C++ library. It is
00010 // provided without any warranty of fitness for any purpose.
00011 //
00012 // You can redistribute this file and/or modify it under
00013 // the terms of the GNU Lesser General Public License (LGPL)
00014 // as published by the Free Software Foundation, either
00015 // version 3 of the License or (at your option) any later
00016 // version.
00017 // (see http://www.opensource.org/licenses for more info)
00018 
00019 
00022 
00023 
00024 
00042 imat
00043 create_sparse_matrix_from_rand
00044   (
00045   const double* random_matrix,
00046   const u32 n_rows,
00047   const u32 n_cols
00048   )
00049   {
00050   // number of matrix elements to be processed
00051   const u32 n_elem = n_rows * n_cols;
00052 
00053   // allocate coding matrix
00054   imat coding_matrix = zeros<imat>(n_rows, n_cols);
00055 
00056   // pointer to coding_matrix
00057   int* coding_matrix_ptr = coding_matrix.memptr();
00058 
00059   // iterate through elements of input matrix column wise
00060   for(u32 i = 0; i < n_elem; i++)
00061     {
00062     // 0.5 probability for zero
00063     if(random_matrix[i] >= 0.5)
00064       {
00065       // 0.25 probability for 1 and -1
00066       (random_matrix[i] >= 0.75)?
00067         coding_matrix_ptr[i] = 1
00068         :
00069         coding_matrix_ptr[i] = -1;
00070       }
00071 
00072     }
00073 
00074   return coding_matrix;
00075   }
00076 
00077 
00078 
00094 bool
00095 identical_columns_sparse
00096   (
00097   const imat& coding_matrix
00098   )
00099   {
00100   u32 counter = 0;
00101 
00102   for(u32 i = 0; i < coding_matrix.n_cols - 1; i++)
00103     {
00104     u32 n_zeros_i = accu(coding_matrix.col(i) == 0);
00105 
00106     for(u32 j = i + 1; j < coding_matrix.n_cols; j++)
00107       {
00108       u32 n_zeros_j = accu(coding_matrix.col(j) == 0);
00109 
00110       u32 matches = accu(coding_matrix.col(i) == coding_matrix.col(j));
00111 
00112       if(
00113         (matches == coding_matrix.n_rows) ||
00114         (matches == 0 && n_zeros_i == 0 && n_zeros_j == 0)
00115         )
00116         {
00117         return true;
00118         }
00119       else
00120         {
00121         counter = 0;
00122         for(u32 k = 0; k < coding_matrix.n_rows; k++)
00123           {
00124           if(
00125             (coding_matrix(k, i) != coding_matrix(k, j)) &&
00126             (coding_matrix(k, i) != 0) && (coding_matrix(k, j) != 0)
00127             )
00128             {
00129             counter++;
00130             }
00131 
00132           if(coding_matrix(k, i) == 0 && coding_matrix(k, j) == 0)
00133             {
00134             counter++;
00135             }
00136 
00137           }
00138 
00139         if(counter == coding_matrix.n_rows)
00140           {
00141           return true;
00142           }
00143 
00144         }
00145 
00146       }
00147 
00148     }
00149 
00150   return false;
00151   }
00152 
00153 
00154 
00171 imat
00172 create_sparse_random_matrix
00173   (
00174   const u32 n_classes,
00175   const u32 n_classifiers,
00176   const u32 n_matrices
00177   )
00178   {
00179   // declare coding matrix to be filled
00180   imat coding_matrix;
00181 
00182   // distance between pairs of rows of the coding matrix
00183   double dist = 0.0;
00184 
00185   int n_iterations = n_matrices;
00186   while(n_iterations > 0)
00187     {
00188     // temporary random matrix auxiliary for the creation of the coding
00189     // matrix
00190     mat tmp_rand_matrix = randu<mat>(n_classes, n_classifiers);
00191 
00192     // create current sparse random coding matrix
00193     imat current_coding_matrix = create_sparse_matrix_from_rand
00194                                    (
00195                                    tmp_rand_matrix.memptr(),
00196                                    n_classes,
00197                                    n_classifiers
00198                                    );
00199 
00200     // denotes if current coding matrix is valid
00201     bool is_valid = true;
00202 
00203     // distance between pairs of rows of the current examined matrix
00204     double current_dist = 0.0;
00205 
00206     // iterate through number of columns
00207     for(u32 i = 0; i < n_classifiers; i++)
00208       {
00209       const u32 n_plus_ones  = accu(current_coding_matrix.col(i) ==  1);
00210       const u32 n_minus_ones = accu(current_coding_matrix.col(i) == -1);
00211       const u32 n_zeros      = accu(current_coding_matrix.col(i) ==  0);
00212 
00213       // check if the current coding matrix is valid (i.e., does not
00214       // contain columns with only +1 or only -1 only zeros e.t.c)
00215       if(    n_plus_ones  == n_classes // column contains all plus ones
00216           || n_minus_ones == n_classes // column contains all minus ones
00217           || n_zeros      == n_classes // column contains all zeros
00218           || n_plus_ones  == 0         // column doesn't contain not
00219                                        // even one plus one
00220           || n_minus_ones == 0         // column doesn't contain not
00221                                        // even one minus one
00222           // column is identical with some other column of the coding
00223           // matrix
00224           || identical_columns_sparse(current_coding_matrix) == true
00225           )
00226         {
00227         // if something of the above is true set the is_valid centintel
00228         // to false (i.e., signal that current coding matrix is not
00229         // valid)
00230         is_valid = false;
00231 
00232         // break from the for loop
00233         break;
00234         }
00235 
00236       }
00237 
00238     // if the current coding matrix is valid
00239     if(is_valid == true)
00240       {
00241       //reduce number of matrices examined of iterations by one
00242       n_iterations--;
00243 
00244       // for each pair of rows of the current coding matrix
00245       for(u32 j = 0; j < n_classes - 1; j++)
00246         {
00247         for(u32 k = j + 1; k < n_classes; k++)
00248           {
00249           // compute current distance between rows j and k
00250           current_dist =
00251           norm
00252           (
00253           conv_to<rowvec>::from(current_coding_matrix.row(j))
00254           - conv_to<rowvec>::from(current_coding_matrix.row(k)),
00255           2
00256           );
00257 
00258           // if current distance is greater than the distance of any
00259           // pair of rows of the previously encountered coding matrices
00260           if(current_dist > dist)
00261             {
00262             dist = current_dist;
00263             coding_matrix = current_coding_matrix;
00264             }
00265 
00266           }
00267 
00268         }
00269 
00270       }
00271 
00272     }
00273 
00274   return coding_matrix;
00275   }
00276 
00277 
00278 
00307 u32
00308 sparse_random_ecoc
00309   (
00310   const mat& training_samples,
00311   const icolvec& training_labels,
00312   const mat& testing_samples,
00313   const icolvec& testing_labels,
00314   const int decoding_strategy,
00315   const int classifiers_type,
00316   const u32 n_matrices,
00317   const u32 n_desired_classifiers,
00318   const bool verbose,
00319   ofstream& verbose_output,
00320   double& execution_time
00321   )
00322   {
00323   // timer object to count execution times
00324   wall_clock timer;
00325 
00326   // start timer
00327   timer.tic();
00328 
00329   // number of training samples
00330   const u32 n_training_samples = training_samples.n_rows;
00331 
00332   // number of samples attributes
00333   const u32 n_attributes = training_samples.n_cols;
00334 
00335   // number of testing samples
00336   const u32 n_testing_samples = testing_samples.n_rows;
00337 
00338   // variable to hold the number of classes
00339   u32 n_classes = 0;
00340 
00341   // vector to hold number of samples per class
00342   ucolvec n_samples_per_class;
00343 
00344   // adjust the training samples class labels to start from one
00345   // and count number of classes
00346   const ucolvec tmp_training_labels = process_labels
00347                                         (
00348                                         training_labels,
00349                                         n_classes
00350                                         );
00351 
00352   // adjust the testing samples class labels to start from one
00353   const ucolvec tmp_testing_labels = process_labels(testing_labels);
00354 
00355   // decompose the training samples matrix into ClassData object
00356   vector<ClassData> classes_vector =
00357     create_class_vector
00358       (
00359       training_samples,
00360       conv_to<icolvec>::from(tmp_training_labels)
00361       );
00362 
00363   // declare empty classifiers vector
00364   // vector<Classifier*> classifiers_vector;
00365 
00366   // compute coding matrix for sparse random ecoc design
00367   imat coding_matrix = create_sparse_random_matrix
00368                          (
00369                          n_classes,
00370                          n_desired_classifiers,
00371                          n_matrices
00372                          );
00373 
00374   // classifiers vector
00375   vector<Classifier*> classifiers_vector;
00376 
00377   // ================================================================ //
00378   // ||                        Training Step                       || //
00379   // ================================================================ //
00380 
00381     // start training
00382     for(u32 i = 0; i < coding_matrix.n_cols; i++)
00383       {
00384       // data matrix of positive classes for current column of coding
00385       // matrix
00386       mat first_bipartition;
00387 
00388       // data matrix of positive classes for current column of coding
00389       // matrix
00390       mat second_bipartition;
00391 
00392       // temporary vector to store pointers of positive classes
00393       vector<ClassData*> pos_classes;
00394 
00395       // temporary vector to store pointers of negative classes
00396       vector<ClassData*> neg_classes;
00397 
00398       // temporary number of possitive samples
00399       u32 n_pos = 0;
00400 
00401       // temporary number of negative samples
00402       u32 n_neg = 0;
00403 
00404       // iterate through number of classes
00405       for(u32 j = 0; j < n_classes; j++)
00406         {
00407         // if current class is considered positive
00408         if(coding_matrix(j, i) == 1)
00409           {
00410           // append samples of current class to the positive classes
00411           // data matrix
00412           first_bipartition = join_cols
00413                                 (
00414                                 first_bipartition,
00415                                 classes_vector[j].Data()
00416                                 );
00417 
00418           // add pointer of current class to temporary vector of
00419           // positive classes
00420           pos_classes.push_back(&(classes_vector[j]));
00421 
00422           // update number of positive samples
00423           n_pos += classes_vector[j].Samples();
00424           }
00425         else
00426           {
00427           // if current class is considered negative
00428           if(coding_matrix(j, i) == -1)
00429             {
00430             // append samples of current class to the negative classe
00431             // data matrix
00432             second_bipartition = join_cols
00433                                    (
00434                                    second_bipartition,
00435                                    classes_vector[j].Data()
00436                                    );
00437 
00438             // add pointer of current class to temporary vector of
00439             // negative classes
00440             neg_classes.push_back(&(classes_vector[j]));
00441 
00442             // update number of positive samples
00443             n_neg += classes_vector[j].Samples();
00444             }
00445 
00446           // otherwise current class is not considered
00447 
00448           }
00449 
00450         }
00451 
00452       // according to user specified classifier
00453       switch(classifiers_type)
00454         {
00455          // Nearest Class Centroid Classifier
00456         case NCC:
00457           {
00458           Classifier_ncc* tmp = new Classifier_ncc
00459                                       (
00460                                       first_bipartition,
00461                                       second_bipartition
00462                                       );
00463 
00464           // update classifier classes
00465           tmp->pos = pos_classes;
00466           tmp->neg = neg_classes;
00467           tmp->n_pos = n_pos;
00468           tmp->n_neg = n_neg;
00469 
00470           // store classifier
00471           classifiers_vector.push_back(tmp);
00472 
00473           break;
00474           }
00475 
00476         // Fisher Linear Discriminant followed by NCC
00477         case FLDA:
00478           {
00479           Classifier_flda* tmp = new Classifier_flda
00480                                        (
00481                                        first_bipartition,
00482                                        second_bipartition
00483                                        );
00484 
00485           // update classifier classes
00486           tmp->pos = pos_classes;
00487           tmp->neg = neg_classes;
00488           tmp->n_pos = n_pos;
00489           tmp->n_neg = n_neg;
00490 
00491           // store classifier
00492           classifiers_vector.push_back(tmp);
00493 
00494           break;
00495           }
00496 
00497         // Support Vector Machine Classifier
00498         case SVM:
00499           {
00500           Classifier_svm* tmp = new Classifier_svm
00501                                       (
00502                                       first_bipartition,
00503                                       second_bipartition
00504                                       );
00505 
00506           // update classifier classes
00507           tmp->pos = pos_classes;
00508           tmp->neg = neg_classes;
00509           tmp->n_pos = n_pos;
00510           tmp->n_neg = n_neg;
00511 
00512           // store classifier
00513           classifiers_vector.push_back(tmp);
00514 
00515           break;
00516           }
00517 
00518         // AdaBoost Classifier
00519         case ADABOOST:
00520           {
00521           Classifier_adaBoost* tmp = new Classifier_adaBoost
00522                                            (
00523                                            first_bipartition,
00524                                            second_bipartition
00525                                            );
00526 
00527           // update classifier classes
00528           tmp->pos = pos_classes;
00529           tmp->neg = neg_classes;
00530           tmp->n_pos = n_pos;
00531           tmp->n_neg = n_neg;
00532 
00533           // store classifier
00534           classifiers_vector.push_back(tmp);
00535 
00536           break;
00537           }
00538 
00539         // Sum of Error Squares Classifier
00540         case LEAST_SQUARES:
00541           {
00542           Classifier_ls* tmp = new Classifier_ls
00543                                      (
00544                                      first_bipartition,
00545                                      second_bipartition
00546                                      );
00547 
00548           // update classifier classes
00549           tmp->pos = pos_classes;
00550           tmp->neg = neg_classes;
00551           tmp->n_pos = n_pos;
00552           tmp->n_neg = n_neg;
00553 
00554           // store classifier
00555           classifiers_vector.push_back(tmp);
00556 
00557           break;
00558           }
00559 
00560         // Custom Classifier
00561         case CUSTOM_CLASSIFIER:
00562           {
00563           Classifier_custom* tmp = new Classifier_custom
00564                                          (
00565                                          first_bipartition,
00566                                          second_bipartition
00567                                          );
00568 
00569           // update classifier classes
00570           tmp->pos = pos_classes;
00571           tmp->neg = neg_classes;
00572           tmp->n_pos = n_pos;
00573           tmp->n_neg = n_neg;
00574 
00575           // store classifier
00576           classifiers_vector.push_back(tmp);
00577 
00578           break;
00579           }
00580 
00581         default:
00582           {
00583           arma_debug_print
00584             (
00585             "sparse_random_ecoc(): Unknown classifier's option"
00586             );
00587 
00588           }
00589 
00590         }
00591 
00592       }
00593 
00594   // ================================================================ //
00595   // ||                        Testing Step                        || //
00596   // ================================================================ //
00597 
00598     // classification error
00599     double error = 0.0;
00600 
00601     // predictions for each sample
00602     uvec predictions;
00603 
00604     // confussion matrix
00605     umat confussion;
00606 
00607     // number of misclassified samples
00608     u32 n_missed = 0;
00609 
00610     // used to hold the number of missclassified testing samples
00611     decode
00612       (
00613       testing_samples,
00614       tmp_testing_labels,
00615       coding_matrix,
00616       classifiers_vector,
00617       classes_vector,
00618       decoding_strategy,
00619       predictions,
00620       n_missed,
00621       error,
00622       confussion
00623       );
00624 
00625   // if verbose output is activated
00626   if(verbose == true)
00627     {
00628     predictions = join_rows(predictions, tmp_testing_labels);
00629     verbose_output << "* Predictions vs Labels: " << endl << predictions << endl << endl;
00630     verbose_output << "* Coding Matrix: " << endl << coding_matrix << endl << endl;
00631     verbose_output << "* Confusion Matrix: " << endl << confussion << endl;
00632     }
00633 
00634   // clean up classifiers vector
00635   for(u32 i = 0; i < classifiers_vector.size(); i++)
00636     {
00637     delete classifiers_vector[i];
00638     }
00639 
00640   // stop timer
00641   execution_time = timer.toc();
00642 
00643   // reset class counter
00644   ClassData::globalIndex = 0;
00645 
00646   // return number of misclassified samples
00647   return n_missed;
00648   }
00649 
00650