ECOCPAK: fn_dense_random_ecoc.hpp Source File

ECOCPAK v0.9
00001 // Copyright (C) 2011 the authors listed below
00002 // http://ecocpak.sourceforge.net
00003 //
00004 // Authors:
00005 // - Dimitrios Bouzas (bouzas at ieee dot org)
00006 // - Nikolaos Arvanitopoulos (niarvani at ieee dot org)
00007 // - Anastasios Tefas (tefas at aiia dot csd dot auth dot gr)
00008 //
00009 // This file is part of the ECOC PAK C++ library. It is
00010 // provided without any warranty of fitness for any purpose.
00011 //
00012 // You can redistribute this file and/or modify it under
00013 // the terms of the GNU Lesser General Public License (LGPL)
00014 // as published by the Free Software Foundation, either
00015 // version 3 of the License or (at your option) any later
00016 // version.
00017 // (see http://www.opensource.org/licenses for more info)
00018 
00019 
00022 
00023 
00024 
00042 imat
00043 create_dense_matrix_from_rand
00044   (
00045   const double* random_matrix,
00046   const u32 n_rows,
00047   const u32 n_cols
00048   )
00049   {
00050   // number of elements
00051   const u32 n_elem = n_rows * n_cols;
00052 
00053   // allocate coding matrix
00054   imat coding_matrix = zeros<imat>(n_rows, n_cols);
00055 
00056   // pointer to coding_matrix
00057   int* coding_matrix_ptr = coding_matrix.memptr();
00058 
00059   // iterate through elements of input matrix column wise
00060   for(u32 i = 0; i < n_elem; i++)
00061     {
00062 
00063     (random_matrix[i] >= 0.5)?
00064       coding_matrix_ptr[i] = 1 : coding_matrix_ptr[i] = -1;
00065 
00066     }
00067 
00068   return coding_matrix;
00069   }
00070 
00071 
00072 
00088 bool
00089 identical_columns_dense
00090   (
00091   const imat& coding_matrix
00092   )
00093   {
00094 
00095   for(u32 i = 0; i < coding_matrix.n_cols - 1; i++)
00096     {
00097 
00098     for(u32 j = i + 1; j < coding_matrix.n_cols; j++)
00099       {
00100 
00101       u32 matches = accu(coding_matrix.col(i) == coding_matrix.col(j));
00102 
00103       if(matches == coding_matrix.n_rows || matches == 0)
00104         {
00105           return true;
00106         }
00107 
00108       }
00109 
00110     }
00111 
00112   return false;
00113   }
00114 
00115 
00116 
00133 imat
00134 create_dense_random_matrix
00135   (
00136   const u32 n_classes,
00137   const u32 n_classifiers,
00138   const u32 n_matrices
00139   )
00140   {
00141   imat coding_matrix;
00142 
00143   // distance between pairs of rows of the coding matrix
00144   double dist = 0.0;
00145 
00146   int n_iterations = n_matrices;
00147   while(n_iterations > 0)
00148     {
00149     // temporary random matrix auxiliary for the creation of the coding
00150     // matrix
00151     mat tmp_rand_matrix = randu<mat>(n_classes, n_classifiers);
00152 
00153     // create current dense random coding matrix
00154     imat current_coding_matrix = create_dense_matrix_from_rand
00155                                    (
00156                                    tmp_rand_matrix.memptr(),
00157                                    n_classes,
00158                                    n_classifiers
00159                                    );
00160 
00161     // denotes if current coding matrix is valid
00162     bool is_valid = true;
00163 
00164     // distance between pairs of rows of the current examined matrix
00165     double current_dist = 0.0;
00166 
00167     // iterate through number of columns
00168     for(u32 i = 0; i < n_classifiers; i++)
00169       {
00170 
00171       // check wether the current coding matrix is valid (i.e., does not
00172       // contain columns with only +1 or only -1)
00173       if(
00174           (
00175           (accu(current_coding_matrix.col(i) == 1) == 0)
00176           ||
00177           (accu(current_coding_matrix.col(i) == -1) == 0)
00178           )
00179 
00180         || identical_columns_dense(current_coding_matrix)
00181         )
00182         {
00183         is_valid = false;
00184         break;
00185         }
00186 
00187       }
00188 
00189       // if the current coding matrix is valid
00190       if(is_valid == true)
00191         {
00192 
00193         // reduce the number of matrices examined by one
00194         n_iterations--;
00195 
00196         // for each pair of rows of the current coding matrix
00197         for(u32 j = 0; j < n_classes - 1; j++)
00198           {
00199 
00200           for(u32 k = j + 1; k < n_classes; k++)
00201             {
00202 
00203             // compute current distance between rows j and k
00204             current_dist =
00205               norm
00206                 (
00207                 conv_to<rowvec>::from(current_coding_matrix.row(j))
00208                 - conv_to<rowvec>::from(current_coding_matrix.row(k)),
00209                 2
00210                 );
00211 
00212             // if current distance is greater than the distance of any
00213             // pair of rows of the previously encountered coding
00214             // matrices
00215             if(current_dist > dist)
00216               {
00217               dist = current_dist;
00218               coding_matrix = current_coding_matrix;
00219               }
00220 
00221             }
00222 
00223           }
00224 
00225         }
00226 
00227       }
00228 
00229   return coding_matrix;
00230   }
00231 
00232 
00233 
00262 u32
00263 dense_random_ecoc
00264   (
00265   const mat& training_samples,
00266   const icolvec& training_labels,
00267   const mat& testing_samples,
00268   const icolvec& testing_labels,
00269   const int decoding_strategy,
00270   const int classifiers_type,
00271   const u32 n_matrices,
00272   const u32 n_desired_classifiers,
00273   const bool verbose,
00274   ofstream& verbose_output,
00275   double& execution_time
00276   )
00277   {
00278   // timer object to count execution times
00279   wall_clock timer;
00280 
00281   // start timer
00282   timer.tic();
00283 
00284   // number of training samples
00285   const u32 n_training_samples = training_samples.n_rows;
00286 
00287   // number of samples attributes
00288   const u32 n_attributes = training_samples.n_cols;
00289 
00290   // number of testing samples
00291   const u32 n_testing_samples = testing_samples.n_rows;
00292 
00293   // variable to hold the number of classes
00294   u32 n_classes = 0;
00295 
00296   // vector to hold number of samples per class
00297   ucolvec n_samples_per_class;
00298 
00299   // adjust the training samples class labels to start from one
00300   // and count number of classes
00301   const ucolvec tmp_training_labels = process_labels
00302                                         (
00303                                         training_labels,
00304                                         n_classes
00305                                         );
00306 
00307   // adjust the testing samples class labels to start from one
00308   const ucolvec tmp_testing_labels = process_labels(testing_labels);
00309 
00310   // decompose the training samples matrix into ClassData object
00311   vector<ClassData>
00312   classes_vector = create_class_vector
00313                      (
00314                      training_samples,
00315                      conv_to<icolvec>::from(tmp_training_labels)
00316                      );
00317 
00318   // compute coding matrix for dense random ecoc design
00319   imat coding_matrix = create_dense_random_matrix
00320                          (
00321                          n_classes,
00322                          n_desired_classifiers,
00323                          n_matrices
00324                          );
00325 
00326   // classifiers vector
00327   vector<Classifier*> classifiers_vector;
00328 
00329   // ================================================================ //
00330   // ||                        Training Step                       || //
00331   // ================================================================ //
00332 
00333     // start training
00334     for(u32 i = 0; i < coding_matrix.n_cols; i++)
00335       {
00336       // data matrix of positive classes for current column of coding
00337       // matrix
00338       mat first_bipartition;
00339 
00340       // data matrix of positive classes for current column of coding
00341       // matrix
00342       mat second_bipartition;
00343 
00344       // temporary vector to store pointers of positive classes
00345       vector<ClassData*> pos_classes;
00346 
00347       // temporary vector to store pointers of negative classes
00348       vector<ClassData*> neg_classes;
00349 
00350       // temporary number of possitive samples
00351       u32 n_pos = 0;
00352 
00353       // temporary number of negative samples
00354       u32 n_neg = 0;
00355 
00356       // iterate through number of classes
00357       for(u32 j = 0; j < n_classes; j++)
00358         {
00359         // if current class is considered positive
00360         if(coding_matrix(j, i) == 1)
00361           {
00362           // append samples of current class to the positive classes data
00363           // matrix
00364           first_bipartition = join_cols
00365                                 (
00366                                 first_bipartition,
00367                                 classes_vector[j].Data()
00368                                 );
00369 
00370           // add pointer of current class to temporary vector of positive
00371           // classes
00372           pos_classes.push_back(&(classes_vector[j]));
00373 
00374           // update number of positive samples
00375           n_pos += classes_vector[j].Samples();
00376           }
00377         else
00378           {
00379           // append samples of current class to the negative classe data
00380           // matrix
00381           second_bipartition = join_cols
00382                                  (
00383                                  second_bipartition,
00384                                  classes_vector[j].Data()
00385                                  );
00386 
00387           // add pointer of current class to temporary vector of
00388           // negative classes
00389           neg_classes.push_back(&(classes_vector[j]));
00390 
00391           // update number of positive samples
00392           n_neg += classes_vector[j].Samples();
00393           }
00394 
00395         }
00396 
00397       // according to user specified classifier
00398       switch(classifiers_type)
00399         {
00400         // Nearest Class Centroid Classifier
00401         case NCC:
00402           {
00403           Classifier_ncc* tmp = new Classifier_ncc
00404                                       (
00405                                       first_bipartition,
00406                                       second_bipartition
00407                                       );
00408 
00409           // update classifier classes
00410           tmp->pos = pos_classes;
00411           tmp->neg = neg_classes;
00412           tmp->n_pos = n_pos;
00413           tmp->n_neg = n_neg;
00414 
00415           // store classifier
00416           classifiers_vector.push_back(tmp);
00417 
00418           break;
00419           }
00420 
00421         // Fisher Linear Discriminant followed by NCC
00422         case FLDA:
00423           {
00424           Classifier_flda* tmp = new Classifier_flda
00425                                        (
00426                                        first_bipartition,
00427                                        second_bipartition
00428                                        );
00429 
00430           // update classifier classes
00431           tmp->pos = pos_classes;
00432           tmp->neg = neg_classes;
00433           tmp->n_pos = n_pos;
00434           tmp->n_neg = n_neg;
00435 
00436           // store classifier
00437           classifiers_vector.push_back(tmp);
00438 
00439           break;
00440           }
00441 
00442         // Support Vector Machine Classifier
00443         case SVM:
00444           {
00445           Classifier_svm* tmp = new Classifier_svm
00446                                       (
00447                                       first_bipartition,
00448                                       second_bipartition
00449                                       );
00450 
00451           // update classifier classes
00452           tmp->pos = pos_classes;
00453           tmp->neg = neg_classes;
00454           tmp->n_pos = n_pos;
00455           tmp->n_neg = n_neg;
00456 
00457           // store classifier
00458           classifiers_vector.push_back(tmp);
00459 
00460           break;
00461           }
00462 
00463         // AdaBoost Classifier
00464         case ADABOOST:
00465           {
00466           Classifier_adaBoost* tmp = new Classifier_adaBoost
00467                                            (
00468                                            first_bipartition,
00469                                            second_bipartition
00470                                            );
00471 
00472           // update classifier classes
00473           tmp->pos = pos_classes;
00474           tmp->neg = neg_classes;
00475           tmp->n_pos = n_pos;
00476           tmp->n_neg = n_neg;
00477 
00478           // store classifier
00479           classifiers_vector.push_back(tmp);
00480 
00481           break;
00482           }
00483 
00484         // Sum of Error Squares Classifier
00485         case LEAST_SQUARES:
00486           {
00487           Classifier_ls* tmp = new Classifier_ls
00488                                      (
00489                                      first_bipartition,
00490                                      second_bipartition
00491                                      );
00492 
00493           // update classifier classes
00494           tmp->pos = pos_classes;
00495           tmp->neg = neg_classes;
00496           tmp->n_pos = n_pos;
00497           tmp->n_neg = n_neg;
00498 
00499           // store classifier
00500           classifiers_vector.push_back(tmp);
00501 
00502           break;
00503           }
00504 
00505         // Custom Classifier
00506         case CUSTOM_CLASSIFIER:
00507           {
00508           Classifier_custom* tmp = new Classifier_custom
00509                                          (
00510                                          first_bipartition,
00511                                          second_bipartition
00512                                          );
00513 
00514           // update classifier classes
00515           tmp->pos = pos_classes;
00516           tmp->neg = neg_classes;
00517           tmp->n_pos = n_pos;
00518           tmp->n_neg = n_neg;
00519 
00520           // store classifier
00521           classifiers_vector.push_back(tmp);
00522 
00523           break;
00524           }
00525 
00526         default:
00527           {
00528           arma_debug_print
00529             (
00530             "dense_random_ecoc(): Unknown classifier's option"
00531             );
00532 
00533           }
00534 
00535         }
00536 
00537       }
00538 
00539   // ================================================================ //
00540   // ||                        Testing Step                        || //
00541   // ================================================================ //
00542 
00543     // classification error
00544     double error = 0.0;
00545 
00546     // predictions for each sample
00547     uvec predictions;
00548 
00549     // confussion matrix
00550     umat confussion;
00551 
00552     // number of misclassified samples
00553     u32 n_missed = 0;
00554 
00555     // used to hold the number of missclassified testing samples
00556     decode
00557       (
00558       testing_samples,
00559       tmp_testing_labels,
00560       coding_matrix,
00561       classifiers_vector,
00562       classes_vector,
00563       decoding_strategy,
00564       predictions,
00565       n_missed,
00566       error,
00567       confussion
00568       );
00569 
00570   // if verbose output is activated
00571   if(verbose == true)
00572     {
00573     predictions = join_rows(predictions, tmp_testing_labels);
00574     verbose_output << "* Predictions vs Labels: " << endl << predictions << endl << endl;
00575     verbose_output << "* Coding Matrix: " << endl << coding_matrix << endl << endl;
00576     verbose_output << "* Confusion Matrix: " << endl << confussion << endl;
00577     }
00578 
00579   // clean up classifiers vector
00580   for(u32 i = 0; i < classifiers_vector.size(); i++)
00581     {
00582     delete classifiers_vector[i];
00583     }
00584 
00585   // stop timer
00586   execution_time = timer.toc();
00587 
00588   // reset class counter
00589   ClassData::globalIndex = 0;
00590 
00591   // return number of misclassified samples
00592   return n_missed;
00593   }
00594 
00595