ECOCPAK v0.9
|
00001 // Copyright (C) 2011 the authors listed below 00002 // http://ecocpak.sourceforge.net 00003 // 00004 // Authors: 00005 // - Dimitrios Bouzas (bouzas at ieee dot org) 00006 // - Nikolaos Arvanitopoulos (niarvani at ieee dot org) 00007 // - Anastasios Tefas (tefas at aiia dot csd dot auth dot gr) 00008 // 00009 // This file is part of the ECOC PAK C++ library. It is 00010 // provided without any warranty of fitness for any purpose. 00011 // 00012 // You can redistribute this file and/or modify it under 00013 // the terms of the GNU Lesser General Public License (LGPL) 00014 // as published by the Free Software Foundation, either 00015 // version 3 of the License or (at your option) any later 00016 // version. 00017 // (see http://www.opensource.org/licenses for more info) 00018 00019 00022 00023 00024 00042 imat 00043 create_sparse_matrix_from_rand 00044 ( 00045 const double* random_matrix, 00046 const u32 n_rows, 00047 const u32 n_cols 00048 ) 00049 { 00050 // number of matrix elements to be processed 00051 const u32 n_elem = n_rows * n_cols; 00052 00053 // allocate coding matrix 00054 imat coding_matrix = zeros<imat>(n_rows, n_cols); 00055 00056 // pointer to coding_matrix 00057 int* coding_matrix_ptr = coding_matrix.memptr(); 00058 00059 // iterate through elements of input matrix column wise 00060 for(u32 i = 0; i < n_elem; i++) 00061 { 00062 // 0.5 probability for zero 00063 if(random_matrix[i] >= 0.5) 00064 { 00065 // 0.25 probability for 1 and -1 00066 (random_matrix[i] >= 0.75)? 00067 coding_matrix_ptr[i] = 1 00068 : 00069 coding_matrix_ptr[i] = -1; 00070 } 00071 00072 } 00073 00074 return coding_matrix; 00075 } 00076 00077 00078 00094 bool 00095 identical_columns_sparse 00096 ( 00097 const imat& coding_matrix 00098 ) 00099 { 00100 u32 counter = 0; 00101 00102 for(u32 i = 0; i < coding_matrix.n_cols - 1; i++) 00103 { 00104 u32 n_zeros_i = accu(coding_matrix.col(i) == 0); 00105 00106 for(u32 j = i + 1; j < coding_matrix.n_cols; j++) 00107 { 00108 u32 n_zeros_j = accu(coding_matrix.col(j) == 0); 00109 00110 u32 matches = accu(coding_matrix.col(i) == coding_matrix.col(j)); 00111 00112 if( 00113 (matches == coding_matrix.n_rows) || 00114 (matches == 0 && n_zeros_i == 0 && n_zeros_j == 0) 00115 ) 00116 { 00117 return true; 00118 } 00119 else 00120 { 00121 counter = 0; 00122 for(u32 k = 0; k < coding_matrix.n_rows; k++) 00123 { 00124 if( 00125 (coding_matrix(k, i) != coding_matrix(k, j)) && 00126 (coding_matrix(k, i) != 0) && (coding_matrix(k, j) != 0) 00127 ) 00128 { 00129 counter++; 00130 } 00131 00132 if(coding_matrix(k, i) == 0 && coding_matrix(k, j) == 0) 00133 { 00134 counter++; 00135 } 00136 00137 } 00138 00139 if(counter == coding_matrix.n_rows) 00140 { 00141 return true; 00142 } 00143 00144 } 00145 00146 } 00147 00148 } 00149 00150 return false; 00151 } 00152 00153 00154 00171 imat 00172 create_sparse_random_matrix 00173 ( 00174 const u32 n_classes, 00175 const u32 n_classifiers, 00176 const u32 n_matrices 00177 ) 00178 { 00179 // declare coding matrix to be filled 00180 imat coding_matrix; 00181 00182 // distance between pairs of rows of the coding matrix 00183 double dist = 0.0; 00184 00185 int n_iterations = n_matrices; 00186 while(n_iterations > 0) 00187 { 00188 // temporary random matrix auxiliary for the creation of the coding 00189 // matrix 00190 mat tmp_rand_matrix = randu<mat>(n_classes, n_classifiers); 00191 00192 // create current sparse random coding matrix 00193 imat current_coding_matrix = create_sparse_matrix_from_rand 00194 ( 00195 tmp_rand_matrix.memptr(), 00196 n_classes, 00197 n_classifiers 00198 ); 00199 00200 // denotes if current coding matrix is valid 00201 bool is_valid = true; 00202 00203 // distance between pairs of rows of the current examined matrix 00204 double current_dist = 0.0; 00205 00206 // iterate through number of columns 00207 for(u32 i = 0; i < n_classifiers; i++) 00208 { 00209 const u32 n_plus_ones = accu(current_coding_matrix.col(i) == 1); 00210 const u32 n_minus_ones = accu(current_coding_matrix.col(i) == -1); 00211 const u32 n_zeros = accu(current_coding_matrix.col(i) == 0); 00212 00213 // check if the current coding matrix is valid (i.e., does not 00214 // contain columns with only +1 or only -1 only zeros e.t.c) 00215 if( n_plus_ones == n_classes // column contains all plus ones 00216 || n_minus_ones == n_classes // column contains all minus ones 00217 || n_zeros == n_classes // column contains all zeros 00218 || n_plus_ones == 0 // column doesn't contain not 00219 // even one plus one 00220 || n_minus_ones == 0 // column doesn't contain not 00221 // even one minus one 00222 // column is identical with some other column of the coding 00223 // matrix 00224 || identical_columns_sparse(current_coding_matrix) == true 00225 ) 00226 { 00227 // if something of the above is true set the is_valid centintel 00228 // to false (i.e., signal that current coding matrix is not 00229 // valid) 00230 is_valid = false; 00231 00232 // break from the for loop 00233 break; 00234 } 00235 00236 } 00237 00238 // if the current coding matrix is valid 00239 if(is_valid == true) 00240 { 00241 //reduce number of matrices examined of iterations by one 00242 n_iterations--; 00243 00244 // for each pair of rows of the current coding matrix 00245 for(u32 j = 0; j < n_classes - 1; j++) 00246 { 00247 for(u32 k = j + 1; k < n_classes; k++) 00248 { 00249 // compute current distance between rows j and k 00250 current_dist = 00251 norm 00252 ( 00253 conv_to<rowvec>::from(current_coding_matrix.row(j)) 00254 - conv_to<rowvec>::from(current_coding_matrix.row(k)), 00255 2 00256 ); 00257 00258 // if current distance is greater than the distance of any 00259 // pair of rows of the previously encountered coding matrices 00260 if(current_dist > dist) 00261 { 00262 dist = current_dist; 00263 coding_matrix = current_coding_matrix; 00264 } 00265 00266 } 00267 00268 } 00269 00270 } 00271 00272 } 00273 00274 return coding_matrix; 00275 } 00276 00277 00278 00307 u32 00308 sparse_random_ecoc 00309 ( 00310 const mat& training_samples, 00311 const icolvec& training_labels, 00312 const mat& testing_samples, 00313 const icolvec& testing_labels, 00314 const int decoding_strategy, 00315 const int classifiers_type, 00316 const u32 n_matrices, 00317 const u32 n_desired_classifiers, 00318 const bool verbose, 00319 ofstream& verbose_output, 00320 double& execution_time 00321 ) 00322 { 00323 // timer object to count execution times 00324 wall_clock timer; 00325 00326 // start timer 00327 timer.tic(); 00328 00329 // number of training samples 00330 const u32 n_training_samples = training_samples.n_rows; 00331 00332 // number of samples attributes 00333 const u32 n_attributes = training_samples.n_cols; 00334 00335 // number of testing samples 00336 const u32 n_testing_samples = testing_samples.n_rows; 00337 00338 // variable to hold the number of classes 00339 u32 n_classes = 0; 00340 00341 // vector to hold number of samples per class 00342 ucolvec n_samples_per_class; 00343 00344 // adjust the training samples class labels to start from one 00345 // and count number of classes 00346 const ucolvec tmp_training_labels = process_labels 00347 ( 00348 training_labels, 00349 n_classes 00350 ); 00351 00352 // adjust the testing samples class labels to start from one 00353 const ucolvec tmp_testing_labels = process_labels(testing_labels); 00354 00355 // decompose the training samples matrix into ClassData object 00356 vector<ClassData> classes_vector = 00357 create_class_vector 00358 ( 00359 training_samples, 00360 conv_to<icolvec>::from(tmp_training_labels) 00361 ); 00362 00363 // declare empty classifiers vector 00364 // vector<Classifier*> classifiers_vector; 00365 00366 // compute coding matrix for sparse random ecoc design 00367 imat coding_matrix = create_sparse_random_matrix 00368 ( 00369 n_classes, 00370 n_desired_classifiers, 00371 n_matrices 00372 ); 00373 00374 // classifiers vector 00375 vector<Classifier*> classifiers_vector; 00376 00377 // ================================================================ // 00378 // || Training Step || // 00379 // ================================================================ // 00380 00381 // start training 00382 for(u32 i = 0; i < coding_matrix.n_cols; i++) 00383 { 00384 // data matrix of positive classes for current column of coding 00385 // matrix 00386 mat first_bipartition; 00387 00388 // data matrix of positive classes for current column of coding 00389 // matrix 00390 mat second_bipartition; 00391 00392 // temporary vector to store pointers of positive classes 00393 vector<ClassData*> pos_classes; 00394 00395 // temporary vector to store pointers of negative classes 00396 vector<ClassData*> neg_classes; 00397 00398 // temporary number of possitive samples 00399 u32 n_pos = 0; 00400 00401 // temporary number of negative samples 00402 u32 n_neg = 0; 00403 00404 // iterate through number of classes 00405 for(u32 j = 0; j < n_classes; j++) 00406 { 00407 // if current class is considered positive 00408 if(coding_matrix(j, i) == 1) 00409 { 00410 // append samples of current class to the positive classes 00411 // data matrix 00412 first_bipartition = join_cols 00413 ( 00414 first_bipartition, 00415 classes_vector[j].Data() 00416 ); 00417 00418 // add pointer of current class to temporary vector of 00419 // positive classes 00420 pos_classes.push_back(&(classes_vector[j])); 00421 00422 // update number of positive samples 00423 n_pos += classes_vector[j].Samples(); 00424 } 00425 else 00426 { 00427 // if current class is considered negative 00428 if(coding_matrix(j, i) == -1) 00429 { 00430 // append samples of current class to the negative classe 00431 // data matrix 00432 second_bipartition = join_cols 00433 ( 00434 second_bipartition, 00435 classes_vector[j].Data() 00436 ); 00437 00438 // add pointer of current class to temporary vector of 00439 // negative classes 00440 neg_classes.push_back(&(classes_vector[j])); 00441 00442 // update number of positive samples 00443 n_neg += classes_vector[j].Samples(); 00444 } 00445 00446 // otherwise current class is not considered 00447 00448 } 00449 00450 } 00451 00452 // according to user specified classifier 00453 switch(classifiers_type) 00454 { 00455 // Nearest Class Centroid Classifier 00456 case NCC: 00457 { 00458 Classifier_ncc* tmp = new Classifier_ncc 00459 ( 00460 first_bipartition, 00461 second_bipartition 00462 ); 00463 00464 // update classifier classes 00465 tmp->pos = pos_classes; 00466 tmp->neg = neg_classes; 00467 tmp->n_pos = n_pos; 00468 tmp->n_neg = n_neg; 00469 00470 // store classifier 00471 classifiers_vector.push_back(tmp); 00472 00473 break; 00474 } 00475 00476 // Fisher Linear Discriminant followed by NCC 00477 case FLDA: 00478 { 00479 Classifier_flda* tmp = new Classifier_flda 00480 ( 00481 first_bipartition, 00482 second_bipartition 00483 ); 00484 00485 // update classifier classes 00486 tmp->pos = pos_classes; 00487 tmp->neg = neg_classes; 00488 tmp->n_pos = n_pos; 00489 tmp->n_neg = n_neg; 00490 00491 // store classifier 00492 classifiers_vector.push_back(tmp); 00493 00494 break; 00495 } 00496 00497 // Support Vector Machine Classifier 00498 case SVM: 00499 { 00500 Classifier_svm* tmp = new Classifier_svm 00501 ( 00502 first_bipartition, 00503 second_bipartition 00504 ); 00505 00506 // update classifier classes 00507 tmp->pos = pos_classes; 00508 tmp->neg = neg_classes; 00509 tmp->n_pos = n_pos; 00510 tmp->n_neg = n_neg; 00511 00512 // store classifier 00513 classifiers_vector.push_back(tmp); 00514 00515 break; 00516 } 00517 00518 // AdaBoost Classifier 00519 case ADABOOST: 00520 { 00521 Classifier_adaBoost* tmp = new Classifier_adaBoost 00522 ( 00523 first_bipartition, 00524 second_bipartition 00525 ); 00526 00527 // update classifier classes 00528 tmp->pos = pos_classes; 00529 tmp->neg = neg_classes; 00530 tmp->n_pos = n_pos; 00531 tmp->n_neg = n_neg; 00532 00533 // store classifier 00534 classifiers_vector.push_back(tmp); 00535 00536 break; 00537 } 00538 00539 // Sum of Error Squares Classifier 00540 case LEAST_SQUARES: 00541 { 00542 Classifier_ls* tmp = new Classifier_ls 00543 ( 00544 first_bipartition, 00545 second_bipartition 00546 ); 00547 00548 // update classifier classes 00549 tmp->pos = pos_classes; 00550 tmp->neg = neg_classes; 00551 tmp->n_pos = n_pos; 00552 tmp->n_neg = n_neg; 00553 00554 // store classifier 00555 classifiers_vector.push_back(tmp); 00556 00557 break; 00558 } 00559 00560 // Custom Classifier 00561 case CUSTOM_CLASSIFIER: 00562 { 00563 Classifier_custom* tmp = new Classifier_custom 00564 ( 00565 first_bipartition, 00566 second_bipartition 00567 ); 00568 00569 // update classifier classes 00570 tmp->pos = pos_classes; 00571 tmp->neg = neg_classes; 00572 tmp->n_pos = n_pos; 00573 tmp->n_neg = n_neg; 00574 00575 // store classifier 00576 classifiers_vector.push_back(tmp); 00577 00578 break; 00579 } 00580 00581 default: 00582 { 00583 arma_debug_print 00584 ( 00585 "sparse_random_ecoc(): Unknown classifier's option" 00586 ); 00587 00588 } 00589 00590 } 00591 00592 } 00593 00594 // ================================================================ // 00595 // || Testing Step || // 00596 // ================================================================ // 00597 00598 // classification error 00599 double error = 0.0; 00600 00601 // predictions for each sample 00602 uvec predictions; 00603 00604 // confussion matrix 00605 umat confussion; 00606 00607 // number of misclassified samples 00608 u32 n_missed = 0; 00609 00610 // used to hold the number of missclassified testing samples 00611 decode 00612 ( 00613 testing_samples, 00614 tmp_testing_labels, 00615 coding_matrix, 00616 classifiers_vector, 00617 classes_vector, 00618 decoding_strategy, 00619 predictions, 00620 n_missed, 00621 error, 00622 confussion 00623 ); 00624 00625 // if verbose output is activated 00626 if(verbose == true) 00627 { 00628 predictions = join_rows(predictions, tmp_testing_labels); 00629 verbose_output << "* Predictions vs Labels: " << endl << predictions << endl << endl; 00630 verbose_output << "* Coding Matrix: " << endl << coding_matrix << endl << endl; 00631 verbose_output << "* Confusion Matrix: " << endl << confussion << endl; 00632 } 00633 00634 // clean up classifiers vector 00635 for(u32 i = 0; i < classifiers_vector.size(); i++) 00636 { 00637 delete classifiers_vector[i]; 00638 } 00639 00640 // stop timer 00641 execution_time = timer.toc(); 00642 00643 // reset class counter 00644 ClassData::globalIndex = 0; 00645 00646 // return number of misclassified samples 00647 return n_missed; 00648 } 00649 00650