ECOCPAK v0.9
|
00001 // Copyright (C) 2011 the authors listed below 00002 // http://ecocpak.sourceforge.net 00003 // 00004 // Authors: 00005 // - Dimitrios Bouzas (bouzas at ieee dot org) 00006 // - Nikolaos Arvanitopoulos (niarvani at ieee dot org) 00007 // - Anastasios Tefas (tefas at aiia dot csd dot auth dot gr) 00008 // 00009 // This file is part of the ECOC PAK C++ library. It is 00010 // provided without any warranty of fitness for any purpose. 00011 // 00012 // You can redistribute this file and/or modify it under 00013 // the terms of the GNU Lesser General Public License (LGPL) 00014 // as published by the Free Software Foundation, either 00015 // version 3 of the License or (at your option) any later 00016 // version. 00017 // (see http://www.opensource.org/licenses for more info) 00018 00019 00022 00023 00024 00042 imat 00043 create_dense_matrix_from_rand 00044 ( 00045 const double* random_matrix, 00046 const u32 n_rows, 00047 const u32 n_cols 00048 ) 00049 { 00050 // number of elements 00051 const u32 n_elem = n_rows * n_cols; 00052 00053 // allocate coding matrix 00054 imat coding_matrix = zeros<imat>(n_rows, n_cols); 00055 00056 // pointer to coding_matrix 00057 int* coding_matrix_ptr = coding_matrix.memptr(); 00058 00059 // iterate through elements of input matrix column wise 00060 for(u32 i = 0; i < n_elem; i++) 00061 { 00062 00063 (random_matrix[i] >= 0.5)? 00064 coding_matrix_ptr[i] = 1 : coding_matrix_ptr[i] = -1; 00065 00066 } 00067 00068 return coding_matrix; 00069 } 00070 00071 00072 00088 bool 00089 identical_columns_dense 00090 ( 00091 const imat& coding_matrix 00092 ) 00093 { 00094 00095 for(u32 i = 0; i < coding_matrix.n_cols - 1; i++) 00096 { 00097 00098 for(u32 j = i + 1; j < coding_matrix.n_cols; j++) 00099 { 00100 00101 u32 matches = accu(coding_matrix.col(i) == coding_matrix.col(j)); 00102 00103 if(matches == coding_matrix.n_rows || matches == 0) 00104 { 00105 return true; 00106 } 00107 00108 } 00109 00110 } 00111 00112 return false; 00113 } 00114 00115 00116 00133 imat 00134 create_dense_random_matrix 00135 ( 00136 const u32 n_classes, 00137 const u32 n_classifiers, 00138 const u32 n_matrices 00139 ) 00140 { 00141 imat coding_matrix; 00142 00143 // distance between pairs of rows of the coding matrix 00144 double dist = 0.0; 00145 00146 int n_iterations = n_matrices; 00147 while(n_iterations > 0) 00148 { 00149 // temporary random matrix auxiliary for the creation of the coding 00150 // matrix 00151 mat tmp_rand_matrix = randu<mat>(n_classes, n_classifiers); 00152 00153 // create current dense random coding matrix 00154 imat current_coding_matrix = create_dense_matrix_from_rand 00155 ( 00156 tmp_rand_matrix.memptr(), 00157 n_classes, 00158 n_classifiers 00159 ); 00160 00161 // denotes if current coding matrix is valid 00162 bool is_valid = true; 00163 00164 // distance between pairs of rows of the current examined matrix 00165 double current_dist = 0.0; 00166 00167 // iterate through number of columns 00168 for(u32 i = 0; i < n_classifiers; i++) 00169 { 00170 00171 // check wether the current coding matrix is valid (i.e., does not 00172 // contain columns with only +1 or only -1) 00173 if( 00174 ( 00175 (accu(current_coding_matrix.col(i) == 1) == 0) 00176 || 00177 (accu(current_coding_matrix.col(i) == -1) == 0) 00178 ) 00179 00180 || identical_columns_dense(current_coding_matrix) 00181 ) 00182 { 00183 is_valid = false; 00184 break; 00185 } 00186 00187 } 00188 00189 // if the current coding matrix is valid 00190 if(is_valid == true) 00191 { 00192 00193 // reduce the number of matrices examined by one 00194 n_iterations--; 00195 00196 // for each pair of rows of the current coding matrix 00197 for(u32 j = 0; j < n_classes - 1; j++) 00198 { 00199 00200 for(u32 k = j + 1; k < n_classes; k++) 00201 { 00202 00203 // compute current distance between rows j and k 00204 current_dist = 00205 norm 00206 ( 00207 conv_to<rowvec>::from(current_coding_matrix.row(j)) 00208 - conv_to<rowvec>::from(current_coding_matrix.row(k)), 00209 2 00210 ); 00211 00212 // if current distance is greater than the distance of any 00213 // pair of rows of the previously encountered coding 00214 // matrices 00215 if(current_dist > dist) 00216 { 00217 dist = current_dist; 00218 coding_matrix = current_coding_matrix; 00219 } 00220 00221 } 00222 00223 } 00224 00225 } 00226 00227 } 00228 00229 return coding_matrix; 00230 } 00231 00232 00233 00262 u32 00263 dense_random_ecoc 00264 ( 00265 const mat& training_samples, 00266 const icolvec& training_labels, 00267 const mat& testing_samples, 00268 const icolvec& testing_labels, 00269 const int decoding_strategy, 00270 const int classifiers_type, 00271 const u32 n_matrices, 00272 const u32 n_desired_classifiers, 00273 const bool verbose, 00274 ofstream& verbose_output, 00275 double& execution_time 00276 ) 00277 { 00278 // timer object to count execution times 00279 wall_clock timer; 00280 00281 // start timer 00282 timer.tic(); 00283 00284 // number of training samples 00285 const u32 n_training_samples = training_samples.n_rows; 00286 00287 // number of samples attributes 00288 const u32 n_attributes = training_samples.n_cols; 00289 00290 // number of testing samples 00291 const u32 n_testing_samples = testing_samples.n_rows; 00292 00293 // variable to hold the number of classes 00294 u32 n_classes = 0; 00295 00296 // vector to hold number of samples per class 00297 ucolvec n_samples_per_class; 00298 00299 // adjust the training samples class labels to start from one 00300 // and count number of classes 00301 const ucolvec tmp_training_labels = process_labels 00302 ( 00303 training_labels, 00304 n_classes 00305 ); 00306 00307 // adjust the testing samples class labels to start from one 00308 const ucolvec tmp_testing_labels = process_labels(testing_labels); 00309 00310 // decompose the training samples matrix into ClassData object 00311 vector<ClassData> 00312 classes_vector = create_class_vector 00313 ( 00314 training_samples, 00315 conv_to<icolvec>::from(tmp_training_labels) 00316 ); 00317 00318 // compute coding matrix for dense random ecoc design 00319 imat coding_matrix = create_dense_random_matrix 00320 ( 00321 n_classes, 00322 n_desired_classifiers, 00323 n_matrices 00324 ); 00325 00326 // classifiers vector 00327 vector<Classifier*> classifiers_vector; 00328 00329 // ================================================================ // 00330 // || Training Step || // 00331 // ================================================================ // 00332 00333 // start training 00334 for(u32 i = 0; i < coding_matrix.n_cols; i++) 00335 { 00336 // data matrix of positive classes for current column of coding 00337 // matrix 00338 mat first_bipartition; 00339 00340 // data matrix of positive classes for current column of coding 00341 // matrix 00342 mat second_bipartition; 00343 00344 // temporary vector to store pointers of positive classes 00345 vector<ClassData*> pos_classes; 00346 00347 // temporary vector to store pointers of negative classes 00348 vector<ClassData*> neg_classes; 00349 00350 // temporary number of possitive samples 00351 u32 n_pos = 0; 00352 00353 // temporary number of negative samples 00354 u32 n_neg = 0; 00355 00356 // iterate through number of classes 00357 for(u32 j = 0; j < n_classes; j++) 00358 { 00359 // if current class is considered positive 00360 if(coding_matrix(j, i) == 1) 00361 { 00362 // append samples of current class to the positive classes data 00363 // matrix 00364 first_bipartition = join_cols 00365 ( 00366 first_bipartition, 00367 classes_vector[j].Data() 00368 ); 00369 00370 // add pointer of current class to temporary vector of positive 00371 // classes 00372 pos_classes.push_back(&(classes_vector[j])); 00373 00374 // update number of positive samples 00375 n_pos += classes_vector[j].Samples(); 00376 } 00377 else 00378 { 00379 // append samples of current class to the negative classe data 00380 // matrix 00381 second_bipartition = join_cols 00382 ( 00383 second_bipartition, 00384 classes_vector[j].Data() 00385 ); 00386 00387 // add pointer of current class to temporary vector of 00388 // negative classes 00389 neg_classes.push_back(&(classes_vector[j])); 00390 00391 // update number of positive samples 00392 n_neg += classes_vector[j].Samples(); 00393 } 00394 00395 } 00396 00397 // according to user specified classifier 00398 switch(classifiers_type) 00399 { 00400 // Nearest Class Centroid Classifier 00401 case NCC: 00402 { 00403 Classifier_ncc* tmp = new Classifier_ncc 00404 ( 00405 first_bipartition, 00406 second_bipartition 00407 ); 00408 00409 // update classifier classes 00410 tmp->pos = pos_classes; 00411 tmp->neg = neg_classes; 00412 tmp->n_pos = n_pos; 00413 tmp->n_neg = n_neg; 00414 00415 // store classifier 00416 classifiers_vector.push_back(tmp); 00417 00418 break; 00419 } 00420 00421 // Fisher Linear Discriminant followed by NCC 00422 case FLDA: 00423 { 00424 Classifier_flda* tmp = new Classifier_flda 00425 ( 00426 first_bipartition, 00427 second_bipartition 00428 ); 00429 00430 // update classifier classes 00431 tmp->pos = pos_classes; 00432 tmp->neg = neg_classes; 00433 tmp->n_pos = n_pos; 00434 tmp->n_neg = n_neg; 00435 00436 // store classifier 00437 classifiers_vector.push_back(tmp); 00438 00439 break; 00440 } 00441 00442 // Support Vector Machine Classifier 00443 case SVM: 00444 { 00445 Classifier_svm* tmp = new Classifier_svm 00446 ( 00447 first_bipartition, 00448 second_bipartition 00449 ); 00450 00451 // update classifier classes 00452 tmp->pos = pos_classes; 00453 tmp->neg = neg_classes; 00454 tmp->n_pos = n_pos; 00455 tmp->n_neg = n_neg; 00456 00457 // store classifier 00458 classifiers_vector.push_back(tmp); 00459 00460 break; 00461 } 00462 00463 // AdaBoost Classifier 00464 case ADABOOST: 00465 { 00466 Classifier_adaBoost* tmp = new Classifier_adaBoost 00467 ( 00468 first_bipartition, 00469 second_bipartition 00470 ); 00471 00472 // update classifier classes 00473 tmp->pos = pos_classes; 00474 tmp->neg = neg_classes; 00475 tmp->n_pos = n_pos; 00476 tmp->n_neg = n_neg; 00477 00478 // store classifier 00479 classifiers_vector.push_back(tmp); 00480 00481 break; 00482 } 00483 00484 // Sum of Error Squares Classifier 00485 case LEAST_SQUARES: 00486 { 00487 Classifier_ls* tmp = new Classifier_ls 00488 ( 00489 first_bipartition, 00490 second_bipartition 00491 ); 00492 00493 // update classifier classes 00494 tmp->pos = pos_classes; 00495 tmp->neg = neg_classes; 00496 tmp->n_pos = n_pos; 00497 tmp->n_neg = n_neg; 00498 00499 // store classifier 00500 classifiers_vector.push_back(tmp); 00501 00502 break; 00503 } 00504 00505 // Custom Classifier 00506 case CUSTOM_CLASSIFIER: 00507 { 00508 Classifier_custom* tmp = new Classifier_custom 00509 ( 00510 first_bipartition, 00511 second_bipartition 00512 ); 00513 00514 // update classifier classes 00515 tmp->pos = pos_classes; 00516 tmp->neg = neg_classes; 00517 tmp->n_pos = n_pos; 00518 tmp->n_neg = n_neg; 00519 00520 // store classifier 00521 classifiers_vector.push_back(tmp); 00522 00523 break; 00524 } 00525 00526 default: 00527 { 00528 arma_debug_print 00529 ( 00530 "dense_random_ecoc(): Unknown classifier's option" 00531 ); 00532 00533 } 00534 00535 } 00536 00537 } 00538 00539 // ================================================================ // 00540 // || Testing Step || // 00541 // ================================================================ // 00542 00543 // classification error 00544 double error = 0.0; 00545 00546 // predictions for each sample 00547 uvec predictions; 00548 00549 // confussion matrix 00550 umat confussion; 00551 00552 // number of misclassified samples 00553 u32 n_missed = 0; 00554 00555 // used to hold the number of missclassified testing samples 00556 decode 00557 ( 00558 testing_samples, 00559 tmp_testing_labels, 00560 coding_matrix, 00561 classifiers_vector, 00562 classes_vector, 00563 decoding_strategy, 00564 predictions, 00565 n_missed, 00566 error, 00567 confussion 00568 ); 00569 00570 // if verbose output is activated 00571 if(verbose == true) 00572 { 00573 predictions = join_rows(predictions, tmp_testing_labels); 00574 verbose_output << "* Predictions vs Labels: " << endl << predictions << endl << endl; 00575 verbose_output << "* Coding Matrix: " << endl << coding_matrix << endl << endl; 00576 verbose_output << "* Confusion Matrix: " << endl << confussion << endl; 00577 } 00578 00579 // clean up classifiers vector 00580 for(u32 i = 0; i < classifiers_vector.size(); i++) 00581 { 00582 delete classifiers_vector[i]; 00583 } 00584 00585 // stop timer 00586 execution_time = timer.toc(); 00587 00588 // reset class counter 00589 ClassData::globalIndex = 0; 00590 00591 // return number of misclassified samples 00592 return n_missed; 00593 } 00594 00595