ECOCPAK v0.9
|
00001 // Copyright (C) 2011 the authors listed below 00002 // http://ecocpak.sourceforge.net 00003 // 00004 // Authors: 00005 // - Dimitrios Bouzas (bouzas at ieee dot org) 00006 // - Nikolaos Arvanitopoulos (niarvani at ieee dot org) 00007 // - Anastasios Tefas (tefas at aiia dot csd dot auth dot gr) 00008 // 00009 // This file is part of the ECOC PAK C++ library. It is 00010 // provided without any warranty of fitness for any purpose. 00011 // 00012 // You can redistribute this file and/or modify it under 00013 // the terms of the GNU Lesser General Public License (LGPL) 00014 // as published by the Free Software Foundation, either 00015 // version 3 of the License or (at your option) any later 00016 // version. 00017 // (see http://www.opensource.org/licenses for more info) 00018 00019 00022 00023 00035 imat 00036 permvec 00037 ( 00038 const ivec& A 00039 ) 00040 { 00041 // output binary matrix where its columns will be the permutation 00042 // vectors of A 00043 imat out; 00044 00045 // number of possible permutations 00046 u32 n = pow(2, A.n_elem); 00047 00048 // index counters 00049 u32 k = 0; 00050 u32 l = 0; 00051 00052 // clone input vector 00053 ivec tmp1 = A; 00054 00055 // iterate through the available number of permutations 00056 for(u32 i = 0; i < n; i++) 00057 { 00058 if(k == A.n_elem) 00059 { 00060 k = 0; 00061 00062 if(l == A.n_elem) 00063 { 00064 return out; 00065 } 00066 else 00067 { 00068 tmp1(l) = -tmp1(l); 00069 l++; 00070 k++; 00071 } 00072 00073 } 00074 00075 ivec tmp = tmp1; 00076 00077 tmp(k) = -tmp(k); 00078 00079 bool exists = false; 00080 00081 if 00082 ( 00083 tmp.n_elem == accu(tmp) || 00084 tmp.n_elem == accu(-tmp) || 00085 tmp.n_elem == accu(tmp == A) || 00086 tmp.n_elem == accu(-tmp == A) 00087 ) 00088 { 00089 exists = true; 00090 } 00091 else 00092 { 00093 for(u32 j = 0; j < out.n_cols; j++) 00094 { 00095 if 00096 ( 00097 out.n_rows == accu(tmp == out.col(j)) || 00098 out.n_rows == accu(-tmp == out.col(j)) 00099 ) 00100 { 00101 exists = true; 00102 break; 00103 } 00104 00105 } 00106 00107 } 00108 00109 if(exists == false) 00110 { 00111 out = join_rows(out, tmp); 00112 } 00113 00114 k++; 00115 } 00116 00117 return out; 00118 } 00119 00120 00121 00138 void 00139 append_tree 00140 ( 00141 const vector<ClassData>& classes_vector, 00142 const ivec& root, 00143 const int criterion_option, 00144 const int classifiers_type, 00145 imat& M, 00146 vector<Classifier*>& classifiers_vector 00147 ) 00148 { 00149 // number of active classes 00150 uvec actinds = find(root != 0); 00151 u32 n_classes = actinds.n_elem; 00152 00153 // trivial recursion step 00154 if(n_classes < 2) 00155 { 00156 // exit 00157 return; 00158 } 00159 00160 // case classes are two 00161 if(n_classes == 2) 00162 { 00163 // create column to be appended in the coding matrix 00164 ivec c = zeros<ivec>(M.n_rows); 00165 00166 c[classes_vector[actinds[0]].ClassLabel() - 1] = 1; 00167 c[classes_vector[actinds[1]].ClassLabel() - 1] = -1; 00168 00169 // update coding matrix 00170 M = join_rows(M, c); 00171 00172 // create binary classifier 00173 Classifier* tmpcl = create_classifier 00174 ( 00175 classes_vector[actinds[0]].Data(), 00176 classes_vector[actinds[1]].Data(), 00177 classifiers_type 00178 ); 00179 00180 // update classifier's info 00181 tmpcl->pos.push_back(const_cast<ClassData*>(&(classes_vector[actinds[0]]))); 00182 tmpcl->neg.push_back(const_cast<ClassData*>(&(classes_vector[actinds[1]]))); 00183 00184 tmpcl->n_pos = classes_vector[actinds[0]].Samples(); 00185 tmpcl->n_neg = classes_vector[actinds[1]].Samples(); 00186 00187 // create and push back classifier 00188 classifiers_vector.push_back(tmpcl); 00189 00190 // exit 00191 return; 00192 } 00193 else 00194 { 00195 // first partition of classes 00196 vector<ClassData> part1; 00197 00198 // second partition of classes 00199 vector<ClassData> part2; 00200 00201 // datamatrix of possitive classes 00202 mat A; 00203 00204 // datamatrix of negative classes 00205 mat B; 00206 00207 // create partitions for root of tree 00208 for(u32 i = 0; i < root.n_elem; i++) 00209 { 00210 if(root[i] == 1) 00211 { 00212 part1.push_back(classes_vector[i]); 00213 A = join_cols(A, classes_vector[i].Data()); 00214 } 00215 else 00216 { 00217 if(root[i] == -1) 00218 { 00219 part2.push_back(classes_vector[i]); 00220 B = join_cols(B, classes_vector[i].Data()); 00221 } 00222 00223 } 00224 00225 } 00226 00227 // append column to coding matrix 00228 M = join_rows(M, root); 00229 00230 // create and push classifier for root of tree 00231 Classifier* tmpcl = create_classifier(A, B, classifiers_type); 00232 00233 // update classifier's data for positive classes 00234 for(u32 i = 0; i < part1.size(); i++) 00235 { 00236 tmpcl->pos.push_back 00237 ( 00238 const_cast<ClassData*>(&(classes_vector[part1[i].ClassLabel() - 1])) 00239 ); 00240 00241 tmpcl->n_pos += part1[i].Samples(); 00242 } 00243 00244 // updata classifier's data for negative classes 00245 for(u32 i = 0; i < part2.size(); i++) 00246 { 00247 tmpcl->neg.push_back 00248 ( 00249 const_cast<ClassData*>(&(classes_vector[part2[i].ClassLabel() - 1])) 00250 ); 00251 00252 tmpcl->n_neg += part2[i].Samples(); 00253 } 00254 00255 // push back classifier 00256 classifiers_vector.push_back(tmpcl); 00257 00258 // create root for first partition 00259 ivec root1 = zeros<ivec>(M.n_rows); 00260 00261 // create root for second partion 00262 ivec root2 = zeros<ivec>(M.n_rows); 00263 00264 // positive classes are more than 2 00265 if(part1.size() > 2) 00266 { 00267 // find new bipartions for current positive classes 00268 uvec indsbest1 = sffs(part1, criterion_option); 00269 uvec indscomp1 = complement(indsbest1, part1.size()); 00270 00271 for(u32 i = 0; i < indsbest1.n_elem; i++) 00272 { 00273 root1[part1[indsbest1[i]].ClassLabel() - 1] = 1; 00274 } 00275 00276 for(u32 i = 0; i < indscomp1.n_elem; i++) 00277 { 00278 root1[part1[indscomp1[i]].ClassLabel() - 1] = -1; 00279 } 00280 00281 } 00282 else 00283 { 00284 if(part1.size() == 2) 00285 { 00286 root1[part1[0].ClassLabel() - 1] = 1; 00287 root1[part1[1].ClassLabel() - 1] = -1; 00288 } 00289 else 00290 { 00291 root1[part1[0].ClassLabel() - 1] = 1; 00292 } 00293 00294 } 00295 00296 // if negative classes are more than 2 00297 if(part2.size() > 2) 00298 { 00299 // find new bipartions for current negative classes 00300 uvec indsbest2 = sffs(part2, criterion_option); 00301 uvec indscomp2 = complement(indsbest2, part2.size()); 00302 00303 for(u32 i = 0; i < indsbest2.n_elem; i++) 00304 { 00305 root2[part2[indsbest2[i]].ClassLabel() - 1] = 1; 00306 } 00307 00308 for(u32 i = 0; i < indscomp2.n_elem; i++) 00309 { 00310 root2[part2[indscomp2[i]].ClassLabel() - 1] = -1; 00311 } 00312 00313 } 00314 else 00315 { 00316 if(part2.size() == 2) 00317 { 00318 root2[part2[0].ClassLabel() - 1] = 1; 00319 root2[part2[1].ClassLabel() - 1] = -1; 00320 } 00321 else 00322 { 00323 root2[part2[0].ClassLabel() - 1] = 1; 00324 } 00325 00326 } 00327 00328 // recursion step for positive classes 00329 append_tree 00330 ( 00331 classes_vector, 00332 root1, 00333 criterion_option, 00334 classifiers_type, 00335 M, 00336 classifiers_vector 00337 ); 00338 00339 // recursion step for negative classes 00340 append_tree 00341 ( 00342 classes_vector, 00343 root2, 00344 criterion_option, 00345 classifiers_type, 00346 M, 00347 classifiers_vector 00348 ); 00349 00350 } 00351 00352 return; 00353 } 00354 00355 00356 00382 u32 00383 forest_ecoc 00384 ( 00385 const mat& training_samples, 00386 const icolvec& training_labels, 00387 const mat& testing_samples, 00388 const icolvec& testing_labels, 00389 const int decoding_strategy, 00390 const int classifiers_type, 00391 const int criterion_option, 00392 const u32 n_forests, 00393 const bool verbose, 00394 ofstream& verbose_output, 00395 double& elapsed_time 00396 ) 00397 { 00398 // timer object to count execution times 00399 wall_clock timer; 00400 00401 // start timer 00402 timer.tic(); 00403 00404 // number of training samples 00405 const u32 n_training_samples = training_samples.n_rows; 00406 00407 // number of samples attributes 00408 const u32 n_attributes = training_samples.n_cols; 00409 00410 // number of testing samples 00411 const u32 n_testing_samples = testing_samples.n_rows; 00412 00413 // variable to hold the number of classes 00414 u32 n_classes = 0; 00415 00416 // adjust the training samples class labels to start from one 00417 // and count number of classes 00418 const icolvec tmp_training_labels = 00419 conv_to<icolvec>::from(process_labels(training_labels, n_classes)); 00420 00421 // adjust the testing samples class labels to start from one 00422 const uvec tmp_testing_labels = process_labels(testing_labels); 00423 00424 // create classes vector 00425 vector<ClassData> classes_vector = create_class_vector 00426 ( 00427 training_samples, 00428 tmp_training_labels 00429 ); 00430 00431 // coding matrix 00432 imat coding_matrix; 00433 00434 // classifiers vector 00435 vector<Classifier*> classifiers_vector; 00436 00437 // classification error 00438 double error = 0.0; 00439 00440 // predictions for each sample 00441 uvec predictions; 00442 00443 // confussion matrix 00444 umat confussion; 00445 00446 // number of misclassified samples 00447 u32 n_missed = 0; 00448 00449 // ================================================================ // 00450 // || Training Step || // 00451 // ================================================================ // 00452 00453 // create initial tree 00454 decoc_ecocone 00455 ( 00456 classes_vector, 00457 classifiers_type, 00458 criterion_option, 00459 coding_matrix, 00460 classifiers_vector 00461 ); 00462 00463 // create all valide roots matrix 00464 imat vroots = permvec(coding_matrix.col(0)); 00465 00466 u32 n_iter = n_forests; 00467 00468 if(n_forests > vroots.n_cols) 00469 { 00470 n_iter = vroots.n_cols; 00471 } 00472 00473 // create random permutation of indices 00474 vec rv = randu<vec>(vroots.n_cols); 00475 uvec rinds = sort_index(rv); 00476 00477 // iterate through number of forests 00478 for(u32 i = 0; i < n_iter; i++) 00479 { 00480 append_tree 00481 ( 00482 classes_vector, 00483 vroots.col(rinds[i]), 00484 criterion_option, 00485 classifiers_type, 00486 coding_matrix, 00487 classifiers_vector 00488 ); 00489 00490 } 00491 00492 // ================================================================ // 00493 // || Testing Step || // 00494 // ================================================================ // 00495 00496 // decode coding matrix and compute generalization error 00497 decode 00498 ( 00499 testing_samples, 00500 tmp_testing_labels, 00501 coding_matrix, 00502 classifiers_vector, 00503 classes_vector, 00504 decoding_strategy, 00505 predictions, 00506 n_missed, 00507 error, 00508 confussion 00509 ); 00510 00511 // if verbose output is activated 00512 if(verbose == true) 00513 { 00514 predictions = join_rows(predictions, tmp_testing_labels); 00515 verbose_output << "* Predictions vs Labels: " << endl << predictions << endl << endl; 00516 verbose_output << "* Coding Matrix: " << endl << coding_matrix << endl << endl; 00517 verbose_output << "* Confusion Matrix: " << endl << confussion << endl; 00518 } 00519 00520 // clean up classifiers vector 00521 for(u32 i = 0; i < classifiers_vector.size(); i++) 00522 { 00523 delete classifiers_vector[i]; 00524 } 00525 00526 // stop timer 00527 elapsed_time = timer.toc(); 00528 00529 // reset class counter 00530 ClassData::globalIndex = 0; 00531 00532 // return number of misclassified samples 00533 return n_missed; 00534 } 00535 00536 00537