ECOCPAK v0.9
|
00001 // Copyright (C) 2011 the authors listed below 00002 // http://ecocpak.sourceforge.net 00003 // 00004 // Authors: 00005 // - Dimitrios Bouzas (bouzas at ieee dot org) 00006 // - Nikolaos Arvanitopoulos (niarvani at ieee dot org) 00007 // - Anastasios Tefas (tefas at aiia dot csd dot auth dot gr) 00008 // 00009 // This file is part of the ECOC PAK C++ library. It is 00010 // provided without any warranty of fitness for any purpose. 00011 // 00012 // You can redistribute this file and/or modify it under 00013 // the terms of the GNU Lesser General Public License (LGPL) 00014 // as published by the Free Software Foundation, either 00015 // version 3 of the License or (at your option) any later 00016 // version. 00017 // (see http://www.opensource.org/licenses for more info) 00018 00019 00022 00023 00024 00042 Classifier* 00043 create_classifier 00044 ( 00045 const ClassData& A, 00046 const ClassData& B, 00047 const u32 classifiers_option, 00048 double& training_error 00049 ) 00050 { 00051 switch(classifiers_option) 00052 { 00053 // Nearest Class Centroid Classifier 00054 case NCC: 00055 { 00056 Classifier_ncc* tmp = new Classifier_ncc(A.Data(), B.Data()); 00057 00058 // number of feature vectors in first class 00059 const u32 n_samplesA = A.Samples(); 00060 00061 // number of feature vectors in second class 00062 const u32 n_samplesB = B.Samples(); 00063 00064 // test classifier 00065 u32 n_missed = 0; 00066 00067 // find misclassified for first class 00068 for(u32 i = 0; i < n_samplesA; i++) 00069 { 00070 if(tmp->predict(A.Data().row(i)) < 0) 00071 { 00072 n_missed++; 00073 } 00074 } 00075 00076 // find misclassified for second class 00077 for(u32 i = 0; i < n_samplesB; i++) 00078 { 00079 if(tmp->predict(B.Data().row(i)) > 0) 00080 { 00081 n_missed++; 00082 } 00083 } 00084 00085 // compute training error 00086 training_error = 00087 (double(n_missed) / double(n_samplesA + n_samplesB)); 00088 00089 return tmp; 00090 } 00091 00092 // Fisher Linear Discriminant Analysis followed by NCC 00093 case FLDA: 00094 { 00095 Classifier_flda* tmp = new Classifier_flda(A.Data(), B.Data()); 00096 00097 // number of feature vectors in first class 00098 const u32 n_samplesA = A.Samples(); 00099 00100 // number of feature vectors in second class 00101 const u32 n_samplesB = B.Samples(); 00102 00103 // test classifier 00104 u32 n_missed = 0; 00105 00106 // find misclassified for first class 00107 for(u32 i = 0; i < n_samplesA; i++) 00108 { 00109 if(tmp->predict(A.Data().row(i)) < 0) 00110 { 00111 n_missed++; 00112 } 00113 } 00114 00115 // find misclassified for second class 00116 for(u32 i = 0; i < n_samplesB; i++) 00117 { 00118 if(tmp->predict(B.Data().row(i)) > 0) 00119 { 00120 n_missed++; 00121 } 00122 } 00123 00124 // compute training error 00125 training_error = 00126 (double(n_missed) / double(n_samplesA + n_samplesB)); 00127 00128 return tmp; 00129 } 00130 00131 // Support Vector Machine 00132 case SVM: 00133 { 00134 Classifier_svm* tmp = new Classifier_svm(A.Data(), B.Data()); 00135 00136 // number of feature vectors in first class 00137 const u32 n_samplesA = A.Samples(); 00138 00139 // number of feature vectors in second class 00140 const u32 n_samplesB = B.Samples(); 00141 00142 // test classifier 00143 u32 n_missed = 0; 00144 00145 // find misclassified for first class 00146 for(u32 i = 0; i < n_samplesA; i++) 00147 { 00148 if(tmp->predict(A.Data().row(i)) < 0) 00149 { 00150 n_missed++; 00151 } 00152 } 00153 00154 // find misclassified for second class 00155 for(u32 i = 0; i < n_samplesB; i++) 00156 { 00157 if(tmp->predict(B.Data().row(i)) > 0) 00158 { 00159 n_missed++; 00160 } 00161 } 00162 00163 // compute training error 00164 training_error = 00165 (double(n_missed) / double(n_samplesA + n_samplesB)); 00166 00167 return tmp; 00168 } 00169 00170 // AdaBoost 00171 case ADABOOST: 00172 { 00173 Classifier_adaBoost* tmp = 00174 new Classifier_adaBoost(A.Data(), B.Data()); 00175 00176 // number of feature vectors in first class 00177 const u32 n_samplesA = A.Samples(); 00178 00179 // number of feature vectors in second class 00180 const u32 n_samplesB = B.Samples(); 00181 00182 // test classifier 00183 u32 n_missed = 0; 00184 00185 // find misclassified for first class 00186 for(u32 i = 0; i < n_samplesA; i++) 00187 { 00188 if(tmp->predict(A.Data().row(i)) < 0) 00189 { 00190 n_missed++; 00191 } 00192 } 00193 00194 // find misclassified for second class 00195 for(u32 i = 0; i < n_samplesB; i++) 00196 { 00197 if(tmp->predict(B.Data().row(i)) > 0) 00198 { 00199 n_missed++; 00200 } 00201 } 00202 00203 // compute training error 00204 training_error = 00205 (double(n_missed) / double(n_samplesA + n_samplesB)); 00206 00207 return tmp; 00208 } 00209 00210 // Sum of Error Squares Classifier 00211 case LEAST_SQUARES: 00212 { 00213 Classifier_ls* tmp = 00214 new Classifier_ls(A.Data(), B.Data()); 00215 00216 // number of feature vectors in first class 00217 const u32 n_samplesA = A.Samples(); 00218 00219 // number of feature vectors in second class 00220 const u32 n_samplesB = B.Samples(); 00221 00222 // test classifier 00223 u32 n_missed = 0; 00224 00225 // find misclassified for first class 00226 for(u32 i = 0; i < n_samplesA; i++) 00227 { 00228 if(tmp->predict(A.Data().row(i)) < 0) 00229 { 00230 n_missed++; 00231 } 00232 } 00233 00234 // find misclassified for second class 00235 for(u32 i = 0; i < n_samplesB; i++) 00236 { 00237 if(tmp->predict(B.Data().row(i)) > 0) 00238 { 00239 n_missed++; 00240 } 00241 } 00242 00243 // compute training error 00244 training_error = 00245 (double(n_missed) / double(n_samplesA + n_samplesB)); 00246 00247 return tmp; 00248 } 00249 00250 // Custom Classifier 00251 case CUSTOM_CLASSIFIER: 00252 { 00253 Classifier_custom* tmp = 00254 new Classifier_custom(A.Data(), B.Data()); 00255 00256 // number of feature vectors in first class 00257 const u32 n_samplesA = A.Samples(); 00258 00259 // number of feature vectors in second class 00260 const u32 n_samplesB = B.Samples(); 00261 00262 // test classifier 00263 u32 n_missed = 0; 00264 00265 // find misclassified for first class 00266 for(u32 i = 0; i < n_samplesA; i++) 00267 { 00268 if(tmp->predict(A.Data().row(i)) < 0) 00269 { 00270 n_missed++; 00271 } 00272 } 00273 00274 // find misclassified for second class 00275 for(u32 i = 0; i < n_samplesB; i++) 00276 { 00277 if(tmp->predict(B.Data().row(i)) > 0) 00278 { 00279 n_missed++; 00280 } 00281 } 00282 00283 // compute training error 00284 training_error = 00285 (double(n_missed) / double(n_samplesA + n_samplesB)); 00286 00287 return tmp; 00288 } 00289 00290 default: 00291 { 00292 arma_debug_print 00293 ( 00294 "create_classifier(): Unknown classifier's option" 00295 ); 00296 00297 } 00298 00299 } 00300 00301 } 00302 00303 00304 00322 Classifier* 00323 create_classifier 00324 ( 00325 const ClassData& A, 00326 const ClassData& B, 00327 const u32 classifiers_option 00328 ) 00329 { 00330 switch(classifiers_option) 00331 { 00332 // Nearest Class Centroid Classifier 00333 case NCC: 00334 { 00335 return new Classifier_ncc(A.Data(), B.Data()); 00336 } 00337 00338 // Fisher Linear Discriminant Analysis followed by NCC 00339 case FLDA: 00340 { 00341 return new Classifier_flda(A.Data(), B.Data()); 00342 } 00343 00344 // Support Vector Machine 00345 case SVM: 00346 { 00347 return new Classifier_svm(A.Data(), B.Data()); 00348 } 00349 00350 // AdaBoost 00351 case ADABOOST: 00352 { 00353 return new Classifier_adaBoost(A.Data(), B.Data()); 00354 } 00355 00356 // Sum of Error Squares Classifier 00357 case LEAST_SQUARES: 00358 { 00359 return new Classifier_ls(A.Data(), B.Data()); 00360 } 00361 00362 // Custom Classifier 00363 case CUSTOM_CLASSIFIER: 00364 { 00365 return new Classifier_custom(A.Data(), B.Data()); 00366 } 00367 00368 default: 00369 { 00370 arma_debug_print 00371 ( 00372 "create_classifier(): Unknown classifier's option" 00373 ); 00374 00375 } 00376 00377 } 00378 00379 } 00380 00381 00382 00398 Classifier* 00399 create_classifier 00400 ( 00401 const mat& A, 00402 const mat& B, 00403 const u32 classifiers_option 00404 ) 00405 { 00406 switch(classifiers_option) 00407 { 00408 // Nearest Class Centroid Classifier 00409 case NCC: 00410 { 00411 return new Classifier_ncc(A, B); 00412 } 00413 00414 // Fisher Linear Discriminant Analysis followed by NCC 00415 case FLDA: 00416 { 00417 return new Classifier_flda(A, B); 00418 } 00419 00420 // Support Vector Machine 00421 case SVM: 00422 { 00423 return new Classifier_svm(A, B); 00424 } 00425 00426 // AdaBoost 00427 case ADABOOST: 00428 { 00429 return new Classifier_adaBoost(A, B); 00430 } 00431 00432 // Sum of Error Squares Classifier 00433 case LEAST_SQUARES: 00434 { 00435 return new Classifier_ls(A, B); 00436 } 00437 00438 // Custom Classifier 00439 case CUSTOM_CLASSIFIER: 00440 { 00441 return new Classifier_custom(A, B); 00442 } 00443 00444 default: 00445 { 00446 arma_debug_print 00447 ( 00448 "create_classifier(): Unknown classifier's option" 00449 ); 00450 00451 } 00452 00453 } 00454 00455 } 00456 00457 00458 00476 void 00477 coding_matrix_update 00478 ( 00479 imat& coding_matrix, 00480 const vector<ClassData>& class_vector, 00481 vector<ClassData>& split_classes_vector, 00482 u32 indx 00483 ) 00484 { 00485 const u32 n_rows = coding_matrix.n_rows; 00486 bool found = false; 00487 imat tmp; 00488 00489 // for all the rows of the coding matrix (== number of subclasses 00490 // created so far) 00491 for(u32 i = 0; i < n_rows; i++) 00492 { 00493 // class index == class vector index 00494 if(coding_matrix(i, 1) == class_vector[indx].ClassIndex()) 00495 { 00496 // the new tmp matrix will have one more row than the previous 00497 // coding matrix 00498 tmp = zeros<imat>(coding_matrix.n_rows + 1, coding_matrix.n_cols); 00499 00500 // until row i the matrix remains the same 00501 tmp.rows(0, i) = coding_matrix.rows(0, i); 00502 00503 // in row i we assign the class label and class index of 00504 // the first element in the new split classes vector 00505 tmp(i, 1) = split_classes_vector[0].ClassIndex(); 00506 tmp(i, 0) = split_classes_vector[0].ClassLabel(); 00507 00508 // row i is copied to row i + 1 00509 tmp.row(i + 1) = tmp.row(i); 00510 00511 // in row i + 1 we assign the class label and class index 00512 // of the second element in the new split classes vector 00513 tmp(i + 1, 1) = split_classes_vector[1].ClassIndex(); 00514 tmp(i + 1, 0) = split_classes_vector[1].ClassLabel(); 00515 00516 // i is not the last class index 00517 if(i < (coding_matrix.n_rows - 1)) 00518 { 00519 // the remaining rows of the coding matrix do not change 00520 tmp.rows(i + 2, tmp.n_rows - 1) = 00521 coding_matrix.rows(i + 1, coding_matrix.n_rows - 1); 00522 } 00523 00524 // update the coding matrix with the new changes 00525 coding_matrix = tmp; 00526 found = true; 00527 break; 00528 } 00529 00530 } 00531 00532 // the class index of the class specified by indx is not equal to any 00533 // class index in the second column of the coding matrix 00534 if(found == false) 00535 { 00536 // the new coding matrix will have two more rows 00537 tmp = zeros<imat>(coding_matrix.n_rows + 2, coding_matrix.n_cols); 00538 00539 // while i runs through the rows of the coding matrix 00540 // and the class label of each row of the matrix is not equal 00541 // to the class label of the class specified by the index indx 00542 u32 i = 0; 00543 while 00544 ( 00545 i < coding_matrix.n_rows && coding_matrix(i, 0) != 00546 (class_vector[indx].ClassLabel() + 1) 00547 ) 00548 { 00549 // increment the row counter 00550 i++; 00551 } 00552 00553 // i is not the last row 00554 if(i < coding_matrix.n_rows) 00555 { 00556 // the first i - 1 rows of the coding matrix remain the same 00557 tmp.rows(0, i - 1) = coding_matrix.rows(0, i - 1); 00558 00559 // row i - 1 is copied to row i 00560 tmp.row(i) = tmp.row(i - 1); 00561 00562 // in row i we assign the class label and class index of 00563 // the first element in the new split classes vector 00564 tmp(i, 0) = split_classes_vector[0].ClassLabel(); 00565 tmp(i, 1) = split_classes_vector[0].ClassIndex(); 00566 00567 // row i - 1 is copied to row i + 1 00568 tmp.row(i + 1) = tmp.row(i - 1); 00569 00570 // in row i + 1 we assign the class label and class index 00571 // of the second element in the new split classes vector 00572 tmp(i + 1, 0) = split_classes_vector[1].ClassLabel(); 00573 tmp(i + 1, 1) = split_classes_vector[1].ClassIndex(); 00574 00575 // the remaining rows are the same until the end 00576 tmp.rows(i + 2, tmp.n_rows - 1) = 00577 coding_matrix.rows(i, coding_matrix.n_rows - 1); 00578 } 00579 else // i is the last row 00580 { 00581 // the first (n - 1) rows are the same 00582 tmp.rows(0, coding_matrix.n_rows - 1) = coding_matrix; 00583 00584 // row i - 1 is copied to row i 00585 tmp.row(i) = tmp.row(i - 1); 00586 00587 // in row i we assign the class label and class index of 00588 // the first element in the new split classes vector 00589 tmp(i, 0) = split_classes_vector[0].ClassLabel(); 00590 tmp(i, 1) = split_classes_vector[0].ClassIndex(); 00591 00592 // row i - 1 is copied to row i + 1 00593 tmp.row(i + 1) = tmp.row(i - 1); 00594 00595 // in row i + 1 we assign the class label and class index 00596 // of the second element in the new split classes vector 00597 tmp(i + 1, 0) = split_classes_vector[1].ClassLabel(); 00598 tmp(i + 1, 1) = split_classes_vector[1].ClassIndex(); 00599 } 00600 00601 // update the coding matrix 00602 coding_matrix = tmp; 00603 } 00604 00605 } 00606 00607 00608 00631 void 00632 update_class_tracker 00633 ( 00634 vector<colvec>& class_tracker, 00635 const vector<ClassData>& class_vector, 00636 const u32 indx, 00637 vector<ClassData>& split_classes_vector, 00638 vector<ClassData>& classes_created_vector 00639 ) 00640 { 00641 // vector which will update the class tracker 00642 colvec newVec; 00643 00644 // size of class tracker 00645 const u32 sz = class_tracker.size(); 00646 00647 // iterate through class tracker size 00648 for(u32 i = 0; i < sz; i++) 00649 { 00650 // for each row of class tracker i 00651 for(u32 j = 0; j < class_tracker[i].n_rows; j++) 00652 { 00653 // class index of class specified by indx equal the respective 00654 // class tracker value 00655 if(class_vector[indx].ClassIndex() == (u32)(class_tracker[i](j))) 00656 { 00657 // the new vector will contain two more rows than class 00658 // tracker i 00659 newVec = zeros< colvec >(class_tracker[i].n_rows + 2); 00660 00661 // until row n - 1 the class tracker i remains the same 00662 newVec.rows(0, newVec.n_rows - 3) = class_tracker[i]; 00663 00664 // rows n - 1 and n are filled with the indices of the 00665 // subclasses 00666 newVec(newVec.n_rows - 2) = 00667 split_classes_vector[0].ClassIndex(); 00668 00669 newVec(newVec.n_rows - 1) = 00670 split_classes_vector[1].ClassIndex(); 00671 00672 // update class tracker i with the new vector and free memory 00673 class_tracker[i].reset(); 00674 class_tracker[i] = newVec; 00675 newVec.reset(); 00676 break; 00677 } 00678 00679 } 00680 00681 } 00682 00683 // create two values with the class indices of the two subclasses 00684 colvec vec1 = zeros<colvec>(1); 00685 vec1(0) = split_classes_vector[0].ClassIndex(); 00686 00687 colvec vec2 = zeros<colvec>(1); 00688 vec2(0) = split_classes_vector[1].ClassIndex(); 00689 00690 // push them back to the class tracker vector 00691 class_tracker.push_back(vec1); 00692 class_tracker.push_back(vec2); 00693 00694 // Update classes vector 00695 classes_created_vector.push_back(split_classes_vector[0]); 00696 classes_created_vector.push_back(split_classes_vector[1]); 00697 } 00698 00699 00700 00723 bool 00724 save_classifier 00725 ( 00726 Classifier* c, 00727 const vector<ClassData>& class_vector, 00728 const ucolvec& best_set_indices, 00729 const ucolvec& complement_set_indices, 00730 vector<Classifier*>& classifiers_vector 00731 ) 00732 { 00733 const u32 class_vector_size = class_vector.size(); 00734 00735 // for the total size of the classes vector 00736 for(u32 i = 0; i < class_vector_size; i++) 00737 { 00738 // iterate through the best set 00739 for(u32 j = 0; j < best_set_indices.n_rows; j++) 00740 { 00741 // class indices of class i and class j of the best set agree 00742 if( 00743 class_vector[i].ClassIndex() == 00744 class_vector[best_set_indices[j]].ClassIndex() 00745 ) 00746 { 00747 // the plus label of the classifier is updated with the class 00748 // index of the i-th class 00749 ClassData* tmp = new ClassData(class_vector[i]); 00750 c->pos.push_back(tmp); 00751 00752 c->n_pos += class_vector[i].Samples(); 00753 00754 } 00755 00756 } 00757 00758 // iterate through the complement set 00759 for(u32 j = 0; j < class_vector_size - best_set_indices.n_rows; j++) 00760 { 00761 // class indices of class i and class j of the complement set 00762 // agree 00763 if( 00764 class_vector[i].ClassIndex() == 00765 class_vector[complement_set_indices[j]].ClassIndex() 00766 ) 00767 { 00768 // the minus label of the classifier is updated with the class 00769 // index of the i-th class 00770 00771 ClassData* tmp = new ClassData(class_vector[i]); 00772 c->neg.push_back(tmp); 00773 00774 c->n_neg += class_vector[i].Samples(); 00775 } 00776 00777 } 00778 00779 } 00780 00781 // check if c is already in the initial vector of classifiers 00782 for(u32 i = 0; i < classifiers_vector.size(); i++) 00783 { 00784 if(*classifiers_vector[i] == *c) 00785 { 00786 return false; 00787 } 00788 00789 } 00790 00791 // push classifier object to the vector of classifiers 00792 classifiers_vector.push_back(c); 00793 00794 return true; 00795 } 00796 00797 00798 00818 bool 00819 save_classifier 00820 ( 00821 Classifier* c, 00822 const vector<ClassData>& class_vector, 00823 vector<Classifier*>& classifiers_vector 00824 ) 00825 { 00826 // update the plus and minus labels of the classifier 00827 // with the class indices of the two classes in the vector 00828 ClassData* tmpA = new ClassData(class_vector[0]); 00829 c->pos.push_back(tmpA); 00830 00831 ClassData* tmpB = new ClassData(class_vector[1]); 00832 c->neg.push_back(tmpB); 00833 00834 // update number of positive and negative samples of classifier 00835 c->n_pos = class_vector[0].Samples(); 00836 c->n_neg = class_vector[1].Samples(); 00837 00838 // size of the classifiers vector 00839 const u32 classifiers_vector_size = classifiers_vector.size(); 00840 00841 // iterate through number of classifiers 00842 for(u32 i = 0; i < classifiers_vector_size; i++) 00843 { 00844 // classifier already in the vector 00845 if(*classifiers_vector[i] == *c) 00846 { 00847 return false; 00848 } 00849 00850 } 00851 00852 // push classifier object to the vector of classifiers 00853 classifiers_vector.push_back(c); 00854 00855 return true; 00856 } 00857 00858 00859 00877 bool 00878 contain_same_classes 00879 ( 00880 const ucolvec& A, 00881 const vector<ClassData>& B 00882 ) 00883 { 00884 const u32 n_rows = A.n_rows; 00885 00886 // if the sizes disagree return false 00887 if(n_rows != B.size()) 00888 { 00889 return false; 00890 } 00891 00892 for(u32 i = 0; i < n_rows; i++) 00893 { 00894 // class indices disagree return false 00895 if(A[i] != B[i].ClassIndex()) 00896 { 00897 return false; 00898 } 00899 00900 } 00901 00902 return true; 00903 } 00904 00905 00906 00933 void 00934 direct_subclass_encoding 00935 ( 00936 const vector<ClassData>& class_vector, 00937 const Threshold& thres, 00938 const int criterion_option, 00939 const int classifiers_option, 00940 imat& coding_matrix, 00941 vector<Classifier*>& classifiers_vector, 00942 vector<ClassData>& classes_created_vector, 00943 vector<colvec>& class_tracker, 00944 vector<ucolvec>& problem_tracker 00945 ) 00946 { 00947 // search through problem_tracker to check if current 00948 // problem has already been examined 00949 for(u32 i = 0; i < problem_tracker.size(); i++) 00950 { 00951 00952 // initial class vector contains the same class indices 00953 // as the current problem_tracker's vector the exit 00954 // recursion branch because problem has been already 00955 // examined in another recursion branch 00956 if(contain_same_classes(problem_tracker[i], class_vector) == true) 00957 { 00958 return; 00959 } 00960 00961 } 00962 00963 // number of initial classes 00964 const u32 class_vector_size = class_vector.size(); 00965 00966 // current problem's class indices 00967 ucolvec cur_class_indices = zeros<ucolvec>(class_vector_size); 00968 00969 // copy current class indices 00970 for(u32 i = 0; i < class_vector_size; i++) 00971 { 00972 cur_class_indices[i] = class_vector[i].ClassIndex(); 00973 } 00974 00975 // push current class indices into the problem tracker 00976 problem_tracker.push_back(cur_class_indices); 00977 00978 // Trivial recursion condition just return 00979 if(class_vector_size == 1) 00980 { 00981 return; 00982 } 00983 else // Else class_vector vector contains more than one class 00984 { 00985 // temporary samples matrix and labels vector for on spot problem 00986 // construction 00987 mat current_samples; 00988 icolvec current_labels; 00989 00990 // If class_vector vector contains only two classes 00991 if(class_vector_size == 2) 00992 { 00993 // evaluate training error of specific problem 00994 double error; 00995 Classifier* first_level_classifier = 00996 create_classifier 00997 ( 00998 class_vector[0], 00999 class_vector[1], 01000 classifiers_option, 01001 error 01002 ); 01003 01004 // if error of classification is larger than specified threshold 01005 if(error > thres.Performance()) 01006 { 01007 // --- Split both classes to find clusters with maximoum --- // 01008 // --- distance --- // 01009 01010 // indices of first class clustering 01011 ucolvec ind_a; 01012 01013 // number of samples in each cluster 01014 ucolvec clust_num_a; 01015 01016 // split first class into two subclasses 01017 mat centers_a = 01018 subClasses(ind_a, clust_num_a, class_vector[0].Data()); 01019 01020 // indices of second class clustering 01021 ucolvec ind_b; 01022 01023 // number of samples in each cluster 01024 ucolvec clust_num_b; 01025 01026 // split second class into two subclasses 01027 mat centers_b = 01028 subClasses(ind_b, clust_num_b, class_vector[1].Data()); 01029 01030 // ------------------- End of splitting --------------------- // 01031 01032 // if the clusters of the first class have the largest distance 01033 if( 01034 norm(centers_a.row(0) - centers_a.row(1), 2) >= 01035 norm(centers_b.row(0) - centers_b.row(1), 2) 01036 ) 01037 { 01038 // if the created clusters sizes are less than specified 01039 // threshold 01040 if( 01041 clust_num_a[0] < thres.Size() || 01042 clust_num_a[1] < thres.Size() 01043 ) 01044 { 01045 // Create and save a classifier for initial problem 01046 save_classifier 01047 ( 01048 first_level_classifier, 01049 class_vector, 01050 classifiers_vector 01051 ); 01052 01053 } 01054 else // else split the first class 01055 { 01056 // split the first class into two clusters 01057 vector<ClassData> split_classes_vector = 01058 split_class 01059 ( 01060 class_vector[0], 01061 ind_a, 01062 clust_num_a, 01063 thres.Size() 01064 ); 01065 01066 // construct and evaluate first problem 01067 vector<ClassData> tmp_class_vector_a; 01068 tmp_class_vector_a.push_back(split_classes_vector[0]); 01069 tmp_class_vector_a.push_back(class_vector[1]); 01070 01071 01072 // evaluate training error of first cluster against rest of 01073 // the classes 01074 double error_a; 01075 Classifier* tmp_classifier_a = 01076 create_classifier 01077 ( 01078 split_classes_vector[0], 01079 class_vector[1], 01080 classifiers_option, 01081 error_a 01082 ); 01083 01084 // construct and evaluate second problem 01085 vector<ClassData> tmp_class_vector_b; 01086 tmp_class_vector_b.push_back(split_classes_vector[1]); 01087 tmp_class_vector_b.push_back(class_vector[1]); 01088 01089 01090 // evaluate training error of second cluster against rest of 01091 // the classes 01092 double error_b; 01093 Classifier* tmp_classifier_b = 01094 create_classifier 01095 ( 01096 split_classes_vector[1], 01097 class_vector[1], 01098 classifiers_option, 01099 error_b 01100 ); 01101 01102 // clean up temporary created classifiers 01103 delete tmp_classifier_a; 01104 delete tmp_classifier_b; 01105 01106 // --- Check if the we have improvement according to --- // 01107 // --- specified threshold for at least one of the --- // 01108 // --- created problems --- // 01109 01110 // If improvement of both the new problems was satisfying 01111 // according to specified threshold (comparison is done 01112 // with respect the error percentage) 01113 if( 01114 ((100.0 * (error - error_a)) / error) >= 01115 thres.Improvement() && 01116 ((100.0 * (error - error_b)) / error) >= 01117 thres.Improvement() 01118 ) 01119 { 01120 // encoding matrix update 01121 coding_matrix_update 01122 ( 01123 coding_matrix, 01124 class_vector, 01125 split_classes_vector, 01126 0 01127 ); 01128 01129 // update class tracker 01130 update_class_tracker 01131 ( 01132 class_tracker, 01133 class_vector, 01134 0, 01135 split_classes_vector, 01136 classes_created_vector 01137 ); 01138 01139 // delete first level classifier since is not needed 01140 delete first_level_classifier; 01141 01142 // recursive calls to direct_subclass_encoding 01143 direct_subclass_encoding 01144 ( 01145 tmp_class_vector_a, 01146 thres, criterion_option, 01147 classifiers_option, 01148 coding_matrix, 01149 classifiers_vector, 01150 classes_created_vector, 01151 class_tracker, 01152 problem_tracker 01153 ); 01154 01155 direct_subclass_encoding 01156 ( 01157 tmp_class_vector_b, 01158 thres, 01159 criterion_option, 01160 classifiers_option, 01161 coding_matrix, 01162 classifiers_vector, 01163 classes_created_vector, 01164 class_tracker, 01165 problem_tracker 01166 ); 01167 01168 } 01169 else // Impovement wasn't good 01170 { 01171 // Reduce the static global index of the classes created 01172 // because 01173 // the two new subclasses created were rejected 01174 ClassData::globalIndex -= 2; 01175 01176 // Create and save a classifier for initial problem 01177 save_classifier 01178 ( 01179 first_level_classifier, 01180 class_vector, 01181 classifiers_vector 01182 ); 01183 01184 } // end of if-else for improvement 01185 01186 } // end of if-else for clusters sizes 01187 01188 } 01189 else // if second class is to be splitted 01190 { 01191 // if cluster size is less than threshold 01192 if( 01193 clust_num_b[0] < thres.Size() || 01194 clust_num_b[1] < thres.Size() 01195 ) 01196 { 01197 // Create and save a classifier for initial problem 01198 save_classifier 01199 ( 01200 first_level_classifier, 01201 class_vector, 01202 classifiers_vector 01203 ); 01204 01205 } 01206 else // split the second class 01207 { 01208 // split class_vector[1] class and store the two new created 01209 // subclasses - clusters in a vector split_classes_vector 01210 vector<ClassData> split_classes_vector = 01211 split_class 01212 ( 01213 class_vector[1], 01214 ind_b, 01215 clust_num_b, 01216 thres.Size() 01217 ); 01218 01219 // construct and evaluate first problem 01220 vector<ClassData> tmp_class_vector_a; 01221 tmp_class_vector_a.push_back(split_classes_vector[0]); 01222 tmp_class_vector_a.push_back(class_vector[0]); 01223 01224 01225 // evaluate training error of first cluster against rest of 01226 // the classes 01227 double error_a; 01228 Classifier* tmp_classifier_a = 01229 create_classifier 01230 ( 01231 split_classes_vector[0], 01232 class_vector[0], 01233 classifiers_option, 01234 error_a 01235 ); 01236 01237 // construct and evaluate second problem 01238 vector<ClassData> tmp_class_vector_b; 01239 tmp_class_vector_b.push_back(split_classes_vector[1]); 01240 tmp_class_vector_b.push_back(class_vector[0]); 01241 01242 01243 // evaluate training error of second cluster against rest of 01244 // the classes 01245 double error_b; 01246 Classifier* tmp_classifier_b = 01247 create_classifier 01248 ( 01249 split_classes_vector[1], 01250 class_vector[0], 01251 classifiers_option, 01252 error_b 01253 ); 01254 01255 // clean up temporary created classifiers 01256 delete tmp_classifier_a; 01257 delete tmp_classifier_b; 01258 01259 // If improvement of both the new problems was satisfying 01260 // according to specified threshold -- (comparison is done 01261 // with respect the error percentage) 01262 if( 01263 ((100.0 * (error - error_a)) / error) >= 01264 thres.Improvement() && 01265 ((100.0 * (error - error_b)) / error) >= 01266 thres.Improvement() 01267 ) 01268 { 01269 // --- encoding matrix update --- // 01270 coding_matrix_update 01271 ( 01272 coding_matrix, 01273 class_vector, 01274 split_classes_vector, 01275 1 01276 ); 01277 01278 // update class tracker 01279 update_class_tracker 01280 ( 01281 class_tracker, 01282 class_vector, 01283 1, 01284 split_classes_vector, 01285 classes_created_vector 01286 ); 01287 01288 // delete first level classifier since is not needed 01289 delete first_level_classifier; 01290 01291 // recursive calls to direct_subclass_encoding 01292 direct_subclass_encoding 01293 ( 01294 tmp_class_vector_a, 01295 thres, 01296 criterion_option, 01297 classifiers_option, 01298 coding_matrix, 01299 classifiers_vector, 01300 classes_created_vector, 01301 class_tracker, 01302 problem_tracker 01303 ); 01304 01305 direct_subclass_encoding 01306 ( 01307 tmp_class_vector_b, 01308 thres, 01309 criterion_option, 01310 classifiers_option, 01311 coding_matrix, 01312 classifiers_vector, 01313 classes_created_vector, 01314 class_tracker, 01315 problem_tracker 01316 ); 01317 01318 } 01319 else // improvement was not good 01320 { 01321 // Reduce the static global index of the classes created 01322 // because two new subclasses created were rejected 01323 ClassData::globalIndex -= 2; 01324 01325 // Create and save a classifier for initial problem 01326 save_classifier 01327 ( 01328 first_level_classifier, 01329 class_vector, 01330 classifiers_vector 01331 ); 01332 01333 } 01334 01335 } 01336 01337 } 01338 01339 } 01340 else // if performance is good 01341 { 01342 // Create and save a classifier for initial problem 01343 save_classifier 01344 ( 01345 first_level_classifier, 01346 class_vector, 01347 classifiers_vector 01348 ); 01349 01350 } 01351 01352 } 01353 else // classes more than two 01354 { 01355 // run SFFS to configure binary decomposition of classes 01356 ucolvec best_set_indices = sffs(class_vector, criterion_option); 01357 01358 // number of classes in the best subset of classes yield by SFFS 01359 const u32 bestSetSize = best_set_indices.n_rows; 01360 01361 // The SFFS algorithm returned a problem configuration that one 01362 // partition contained one class and the other partition contained 01363 // many classes 01364 01365 // If problem configuration is one against many 01366 if(bestSetSize == 1 || (class_vector_size - bestSetSize) == 1) 01367 { 01368 // If the partition of the best set contains one class 01369 if(bestSetSize == 1) 01370 { 01371 ucolvec complement_set_indices = complement 01372 ( 01373 best_set_indices, 01374 class_vector_size 01375 ); 01376 01377 // merge classes of complement set 01378 ClassData merged_class = merge_selected_classes 01379 (class_vector, 01380 complement_set_indices 01381 ); 01382 01383 vector<ClassData> tmp_class_vector; 01384 tmp_class_vector.push_back(class_vector[best_set_indices[0]]); 01385 tmp_class_vector.push_back(merged_class); 01386 01387 01388 // evaluate training error of specific problem 01389 double error; 01390 Classifier* first_level_classifier = 01391 create_classifier 01392 ( 01393 class_vector[best_set_indices[0]], 01394 merged_class, 01395 classifiers_option, 01396 error 01397 ); 01398 01399 // if error of classification is larger than threshold 01400 if(error > thres.Performance()) 01401 { 01402 ucolvec ind_a; // indices of first clustering 01403 ucolvec clust_num_a; // number of samples in each cluster 01404 01405 // split class1 into two subclasses 01406 mat centers_a = subClasses 01407 ( 01408 ind_a, 01409 clust_num_a, 01410 class_vector[best_set_indices[0]].Data() 01411 ); 01412 01413 // if cluster size is less than threshold 01414 if( 01415 clust_num_a[0] < thres.Size() || 01416 clust_num_a[1] < thres.Size() 01417 ) 01418 { 01419 // Create and save a classifier 01420 save_classifier 01421 ( 01422 first_level_classifier, 01423 class_vector, 01424 best_set_indices, 01425 complement_set_indices, 01426 classifiers_vector 01427 ); 01428 01429 // create the new ClassData vectors according to the best 01430 // set 01431 vector<ClassData> class_vector_a; 01432 vector<ClassData> class_vector_b; 01433 for(u32 i = 0; i < class_vector_size; i++) 01434 { 01435 if(contains(best_set_indices, bestSetSize, i)) 01436 { 01437 class_vector_a.push_back(class_vector[i]); 01438 } 01439 else 01440 { 01441 class_vector_b.push_back(class_vector[i]); 01442 } 01443 } 01444 01445 // recursive calls to direct_subclass_encoding 01446 direct_subclass_encoding 01447 ( 01448 class_vector_a, 01449 thres, 01450 criterion_option, 01451 classifiers_option, 01452 coding_matrix, 01453 classifiers_vector, 01454 classes_created_vector, 01455 class_tracker, 01456 problem_tracker 01457 ); 01458 01459 direct_subclass_encoding 01460 ( 01461 class_vector_b, 01462 thres, 01463 criterion_option, 01464 classifiers_option, 01465 coding_matrix, 01466 classifiers_vector, 01467 classes_created_vector, 01468 class_tracker, 01469 problem_tracker 01470 ); 01471 01472 } 01473 else // else split first class 01474 { 01475 // Split the unary class 01476 vector<ClassData> split_classes_vector = 01477 split_class 01478 ( 01479 class_vector[best_set_indices[0]], 01480 ind_a, clust_num_a, thres.Size() 01481 ); 01482 01483 // construct and evaluate first problem 01484 vector<ClassData> tmp_class_vector_a; 01485 tmp_class_vector_a.push_back(split_classes_vector[0]); 01486 tmp_class_vector_a.push_back(merged_class); 01487 01488 // evaluate training error of first cluster against rest 01489 // of the classes 01490 double error_a; 01491 Classifier* tmp_classifier_a = 01492 create_classifier 01493 ( 01494 split_classes_vector[0], 01495 merged_class, 01496 classifiers_option, 01497 error_a 01498 ); 01499 01500 // construct and evaluate second problem 01501 vector<ClassData> tmp_class_vector_b; 01502 tmp_class_vector_b.push_back(split_classes_vector[1]); 01503 tmp_class_vector_b.push_back(merged_class); 01504 01505 // evaluate training error of second cluster against rest 01506 // of the classes 01507 double error_b; 01508 Classifier* tmp_classifier_b = 01509 create_classifier 01510 ( 01511 split_classes_vector[1], 01512 merged_class, 01513 classifiers_option, 01514 error_b 01515 ); 01516 01517 // clean up temporary created classifiers 01518 delete tmp_classifier_a; 01519 delete tmp_classifier_b; 01520 01521 // If improvement of at least one of the problems was 01522 // satisfying (comparison is done with respect the error 01523 // percentage) 01524 if( 01525 ((100.0 * (error - error_a)) / error) >= 01526 thres.Improvement() && 01527 ((100.0 * ( error - error_b )) / error) >= 01528 thres.Improvement() 01529 ) 01530 { 01531 // --- encoding matrix update --- // 01532 coding_matrix_update 01533 ( 01534 coding_matrix, 01535 class_vector, 01536 split_classes_vector, 01537 best_set_indices[0] 01538 ); 01539 01540 // create the new ClassData vectors according to the 01541 // best set 01542 vector<ClassData> class_vector_a; 01543 class_vector_a.push_back(split_classes_vector[0]); 01544 01545 vector<ClassData> class_vector_b; 01546 class_vector_b.push_back(split_classes_vector[1]); 01547 01548 for(u32 i = 0; i < class_vector_size; i++) 01549 { 01550 if(!contains(best_set_indices, bestSetSize, i)) 01551 { 01552 class_vector_a.push_back(class_vector[i]); 01553 class_vector_b.push_back(class_vector[i]); 01554 } 01555 } 01556 01557 // update class tracker 01558 update_class_tracker 01559 ( 01560 class_tracker, 01561 class_vector, 01562 best_set_indices[0], 01563 split_classes_vector, 01564 classes_created_vector 01565 ); 01566 01567 // delete first level classifier since is not needed 01568 delete first_level_classifier; 01569 01570 // recursive calls to direct_subclass_encoding 01571 direct_subclass_encoding 01572 ( 01573 class_vector_a, 01574 thres, 01575 criterion_option, 01576 classifiers_option, 01577 coding_matrix, 01578 classifiers_vector, 01579 classes_created_vector, 01580 class_tracker, 01581 problem_tracker 01582 ); 01583 01584 direct_subclass_encoding 01585 ( 01586 class_vector_b, 01587 thres, 01588 criterion_option, 01589 classifiers_option, 01590 coding_matrix, 01591 classifiers_vector, 01592 classes_created_vector, 01593 class_tracker, 01594 problem_tracker 01595 ); 01596 01597 } 01598 else // Impovement wasn't good 01599 { 01600 // Reduce static class counter because the 2 new created 01601 // subclasses were rejected 01602 ClassData::globalIndex -= 2; 01603 01604 // Create and save a classifier 01605 save_classifier 01606 ( 01607 first_level_classifier, 01608 class_vector, 01609 best_set_indices, 01610 complement_set_indices, 01611 classifiers_vector 01612 ); 01613 01614 // create the new ClassData vectors according to the 01615 // best set 01616 vector<ClassData> class_vector_a; 01617 vector<ClassData> class_vector_b; 01618 for(u32 i = 0; i < class_vector_size; i++) 01619 { 01620 if(contains(best_set_indices, bestSetSize, i)) 01621 { 01622 class_vector_a.push_back(class_vector[i]); 01623 } 01624 else 01625 { 01626 class_vector_b.push_back(class_vector[i]); 01627 } 01628 } 01629 01630 // recursive calls to direct_subclass_encoding 01631 direct_subclass_encoding 01632 ( 01633 class_vector_a, 01634 thres, 01635 criterion_option, 01636 classifiers_option, 01637 coding_matrix, 01638 classifiers_vector, 01639 classes_created_vector, 01640 class_tracker, 01641 problem_tracker 01642 ); 01643 01644 direct_subclass_encoding 01645 ( 01646 class_vector_b, 01647 thres, criterion_option, 01648 classifiers_option, 01649 coding_matrix, 01650 classifiers_vector, 01651 classes_created_vector, 01652 class_tracker, 01653 problem_tracker 01654 ); 01655 01656 } // end of if - else improvement 01657 01658 } // end of if - else clusters size 01659 01660 } 01661 else // Performance was good 01662 { 01663 // Create and save a classifier for specified problem 01664 // configuration 01665 save_classifier 01666 ( 01667 first_level_classifier, 01668 class_vector, 01669 best_set_indices, 01670 complement_set_indices, 01671 classifiers_vector 01672 ); 01673 01674 // create the new ClassData vectors according to the best 01675 // set 01676 vector<ClassData> class_vector_a; 01677 vector<ClassData> class_vector_b; 01678 for(u32 i = 0; i < class_vector_size; i++) 01679 { 01680 if(contains(best_set_indices, bestSetSize, i)) 01681 { 01682 class_vector_a.push_back(class_vector[i]); 01683 } 01684 else 01685 { 01686 class_vector_b.push_back(class_vector[i]); 01687 } 01688 } 01689 01690 // recursive calls to direct_subclass_encoding 01691 direct_subclass_encoding 01692 ( 01693 class_vector_a, 01694 thres, 01695 criterion_option, 01696 classifiers_option, 01697 coding_matrix, 01698 classifiers_vector, 01699 classes_created_vector, 01700 class_tracker, 01701 problem_tracker 01702 ); 01703 01704 direct_subclass_encoding 01705 ( 01706 class_vector_b, 01707 thres, 01708 criterion_option, 01709 classifiers_option, 01710 coding_matrix, 01711 classifiers_vector, 01712 classes_created_vector, 01713 class_tracker, 01714 problem_tracker 01715 ); 01716 01717 } // end of if - else performance 01718 01719 } 01720 else // The unary partition is the complement set 01721 { 01722 ClassData merged_class = merge_selected_classes 01723 ( 01724 class_vector, 01725 best_set_indices 01726 ); 01727 01728 ucolvec complement_set_indices = complement 01729 ( 01730 best_set_indices, 01731 class_vector_size 01732 ); 01733 01734 vector<ClassData> tmp_class_vector; 01735 tmp_class_vector.push_back(merged_class); 01736 tmp_class_vector.push_back 01737 ( 01738 class_vector[complement_set_indices[0]] 01739 ); 01740 01741 // evaluate training error for specific problem 01742 double error; 01743 Classifier* first_level_classifier = 01744 create_classifier 01745 ( 01746 merged_class, 01747 class_vector[complement_set_indices[0]], 01748 classifiers_option, 01749 error 01750 ); 01751 01752 // if error of classification is larger than threshold 01753 if(error > thres.Performance()) 01754 { 01755 ucolvec ind_a; // indices of first clustering 01756 ucolvec clust_num_a; // number of samples in each cluster 01757 01758 // split first class into two subclasses 01759 mat centers_a = 01760 subClasses 01761 ( 01762 ind_a, 01763 clust_num_a, 01764 class_vector[complement_set_indices[0]].Data() 01765 ); 01766 01767 // if clusters sizes are less than threshold 01768 if( 01769 clust_num_a[0] < thres.Size() || 01770 clust_num_a[1] < thres.Size() 01771 ) 01772 { 01773 // Create and save a classifier specified problem 01774 // configuration 01775 save_classifier 01776 ( 01777 first_level_classifier, 01778 class_vector, 01779 best_set_indices, 01780 complement_set_indices, 01781 classifiers_vector 01782 ); 01783 01784 // create the new ClassData vectors according to the 01785 // best set 01786 vector<ClassData> class_vector_a; 01787 vector<ClassData> class_vector_b; 01788 for(u32 i = 0; i < class_vector_size; i++) 01789 { 01790 if(contains(best_set_indices, bestSetSize, i)) 01791 { 01792 class_vector_b.push_back(class_vector[i]); 01793 } 01794 else 01795 { 01796 class_vector_a.push_back(class_vector[i]); 01797 } 01798 } 01799 01800 // recursive calls to direct_subclass_encoding 01801 direct_subclass_encoding 01802 ( 01803 class_vector_a, 01804 thres, 01805 criterion_option, 01806 classifiers_option, 01807 coding_matrix, 01808 classifiers_vector, 01809 classes_created_vector, 01810 class_tracker, 01811 problem_tracker 01812 ); 01813 01814 direct_subclass_encoding 01815 ( 01816 class_vector_b, 01817 thres, 01818 criterion_option, 01819 classifiers_option, 01820 coding_matrix, 01821 classifiers_vector, 01822 classes_created_vector, 01823 class_tracker, 01824 problem_tracker 01825 ); 01826 01827 } 01828 else // else split first class 01829 { 01830 // Split the unary class and store its clusters in 01831 // vector split_classes_vector 01832 vector<ClassData> split_classes_vector = 01833 split_class 01834 ( 01835 class_vector[complement_set_indices[0]], 01836 ind_a, 01837 clust_num_a, 01838 thres.Size() 01839 ); 01840 01841 // construct and evaluate first problem 01842 vector<ClassData> tmp_class_vector_a; 01843 tmp_class_vector_a.push_back(split_classes_vector[0]); 01844 tmp_class_vector_a.push_back(merged_class); 01845 01846 // evaluate training error of first cluster against rest 01847 // of the classes 01848 double error_a; 01849 Classifier* tmp_classifier_a = 01850 create_classifier 01851 ( 01852 split_classes_vector[0], 01853 merged_class, 01854 classifiers_option, 01855 error_a 01856 ); 01857 01858 // construct and evaluate second problem 01859 vector<ClassData> tmp_class_vector_b; 01860 tmp_class_vector_b.push_back(split_classes_vector[1]); 01861 tmp_class_vector_b.push_back(merged_class); 01862 01863 // evaluate training error of second cluster against 01864 // rest of the classes 01865 double error_b; 01866 Classifier* tmp_classifier_b = 01867 create_classifier 01868 ( 01869 split_classes_vector[1], 01870 merged_class, 01871 classifiers_option, 01872 error_b 01873 ); 01874 01875 // clean up temporary created classifiers 01876 delete tmp_classifier_a; 01877 delete tmp_classifier_b; 01878 01879 // If at least both of the new created problems give 01880 // better improvement than the improvement threshold 01881 // specified (comparison is done with respect the error 01882 // percentage) 01883 if( 01884 ((100.0 * (error - error_a)) / error) >= 01885 thres.Improvement() && 01886 ((100.0 * (error - error_b)) / error) >= 01887 thres.Improvement() 01888 ) 01889 { 01890 // encoding matrix update 01891 coding_matrix_update 01892 ( 01893 coding_matrix, 01894 class_vector, 01895 split_classes_vector, 01896 complement_set_indices[0] 01897 ); 01898 01899 // create the new ClassData vectors according to the 01900 // best set 01901 vector<ClassData> class_vector_a; 01902 class_vector_a.push_back(split_classes_vector[0]); 01903 01904 vector<ClassData> class_vector_b; 01905 class_vector_b.push_back(split_classes_vector[1]); 01906 01907 // load rest of the classes to each of the vectors 01908 for(u32 i = 0; i < class_vector_size; i++) 01909 { 01910 if(contains(best_set_indices, bestSetSize, i)) 01911 { 01912 class_vector_a.push_back(class_vector[i]); 01913 class_vector_b.push_back(class_vector[i]); 01914 } 01915 } 01916 01917 // update class tracker 01918 update_class_tracker 01919 ( 01920 class_tracker, 01921 class_vector, 01922 complement_set_indices[0], 01923 split_classes_vector, 01924 classes_created_vector 01925 ); 01926 01927 // delete first level classifier since is not needed 01928 delete first_level_classifier; 01929 01930 // recursive calls to direct_subclass_encoding 01931 direct_subclass_encoding 01932 ( 01933 class_vector_a, 01934 thres, 01935 criterion_option, 01936 classifiers_option, 01937 coding_matrix, 01938 classifiers_vector, 01939 classes_created_vector, 01940 class_tracker, 01941 problem_tracker 01942 ); 01943 01944 direct_subclass_encoding 01945 ( 01946 class_vector_b, 01947 thres, 01948 criterion_option, 01949 classifiers_option, 01950 coding_matrix, 01951 classifiers_vector, 01952 classes_created_vector, 01953 class_tracker, 01954 problem_tracker 01955 ); 01956 01957 } 01958 else // Impovement wasn't good 01959 { 01960 // Reduce static class counter by 2 because two new 01961 // creater clusters were rejected 01962 ClassData::globalIndex -= 2; 01963 01964 // Create and save a classifier specified problem 01965 // configuration 01966 save_classifier 01967 ( 01968 first_level_classifier, 01969 class_vector, 01970 best_set_indices, 01971 complement_set_indices, 01972 classifiers_vector 01973 ); 01974 01975 // create the new ClassData vectors according to the 01976 // best set 01977 vector<ClassData> class_vector_a; 01978 vector<ClassData> class_vector_b; 01979 for(u32 i = 0; i < class_vector_size; i++) 01980 { 01981 if(contains(best_set_indices, bestSetSize, i)) 01982 { 01983 class_vector_b.push_back(class_vector[i]); 01984 } 01985 else 01986 { 01987 class_vector_a.push_back(class_vector[i]); 01988 } 01989 } 01990 01991 // recursive calls to direct_subclass_encoding 01992 direct_subclass_encoding 01993 ( 01994 class_vector_a, 01995 thres, 01996 criterion_option, 01997 classifiers_option, 01998 coding_matrix, 01999 classifiers_vector, 02000 classes_created_vector, 02001 class_tracker, 02002 problem_tracker 02003 ); 02004 02005 direct_subclass_encoding 02006 ( 02007 class_vector_b, 02008 thres, 02009 criterion_option, 02010 classifiers_option, 02011 coding_matrix, 02012 classifiers_vector, 02013 classes_created_vector, 02014 class_tracker, 02015 problem_tracker 02016 ); 02017 02018 } // end of if - else improvement 02019 02020 } // end of if - else clusters size 02021 02022 } 02023 else // Performance is good 02024 { 02025 // Create and save a classifier specified problem 02026 // configuration 02027 save_classifier 02028 ( 02029 first_level_classifier, 02030 class_vector, 02031 best_set_indices, 02032 complement_set_indices, 02033 classifiers_vector 02034 ); 02035 02036 // create the new ClassData vectors according to the best 02037 // set 02038 vector<ClassData> class_vector_a; 02039 vector<ClassData> class_vector_b; 02040 for(u32 i = 0; i < class_vector_size; i++) 02041 { 02042 if(contains(best_set_indices, bestSetSize, i)) 02043 { 02044 class_vector_b.push_back(class_vector[i]); 02045 } 02046 else 02047 { 02048 class_vector_a.push_back(class_vector[i]); 02049 } 02050 } 02051 02052 // recursive calls to direct_subclass_encoding 02053 direct_subclass_encoding 02054 ( 02055 class_vector_a, 02056 thres, 02057 criterion_option, 02058 classifiers_option, 02059 coding_matrix, 02060 classifiers_vector, 02061 classes_created_vector, 02062 class_tracker, 02063 problem_tracker 02064 ); 02065 02066 direct_subclass_encoding 02067 ( 02068 class_vector_b, 02069 thres, 02070 criterion_option, 02071 classifiers_option, 02072 coding_matrix, 02073 classifiers_vector, 02074 classes_created_vector, 02075 class_tracker, 02076 problem_tracker 02077 ); 02078 02079 } // end of if - else performance 02080 02081 } // end of if - else bestset = 1 class else complementary 02082 // = 1 class 02083 02084 } 02085 else // Both partitions of that were returned by SFFS contained 02086 // more than one class 02087 { 02088 // compute complements set's labels 02089 ucolvec complement_set_indices= complement 02090 ( 02091 best_set_indices, 02092 class_vector_size 02093 ); 02094 02095 // merge classes of best set 02096 ClassData best_set_merged = merge_selected_classes 02097 ( 02098 class_vector, 02099 best_set_indices 02100 ); 02101 02102 // merge classes rest classes 02103 ClassData complement_set_merged = merge_selected_classes 02104 ( 02105 class_vector, 02106 complement_set_indices 02107 ); 02108 02109 // construct and evaluate first problem 02110 vector<ClassData> tmp_class_vector; 02111 tmp_class_vector.push_back(best_set_merged); 02112 tmp_class_vector.push_back(complement_set_merged); 02113 02114 // create classifier to attack specific problem for specific 02115 // problem 02116 double error; 02117 Classifier* first_level_classifier = create_classifier 02118 ( 02119 best_set_merged, 02120 complement_set_merged, 02121 classifiers_option, 02122 error 02123 ); 02124 02125 // Create and save a classifier specified problem 02126 // configuration 02127 save_classifier 02128 ( 02129 first_level_classifier, 02130 class_vector, 02131 best_set_indices, 02132 complement_set_indices, 02133 classifiers_vector 02134 ); 02135 02136 // create the new ClassData vectors according to the best set 02137 vector<ClassData> class_vector_a; 02138 vector<ClassData> class_vector_b; 02139 for(u32 i = 0; i < class_vector_size; i++) 02140 { 02141 if(contains(best_set_indices, bestSetSize, i)) 02142 { 02143 class_vector_a.push_back(class_vector[i]); 02144 } 02145 else 02146 { 02147 class_vector_b.push_back(class_vector[i]); 02148 } 02149 02150 } 02151 02152 // recursive calls to direct_subclass_encoding 02153 direct_subclass_encoding 02154 ( 02155 class_vector_a, 02156 thres, 02157 criterion_option, 02158 classifiers_option, 02159 coding_matrix, 02160 classifiers_vector, 02161 classes_created_vector, 02162 class_tracker, 02163 problem_tracker 02164 ); 02165 02166 direct_subclass_encoding 02167 ( 02168 class_vector_b, 02169 thres, 02170 criterion_option, 02171 classifiers_option, 02172 coding_matrix, 02173 classifiers_vector, 02174 classes_created_vector, 02175 class_tracker, 02176 problem_tracker 02177 ); 02178 02179 } 02180 02181 } 02182 02183 } 02184 02185 } 02186 02187 02188 02215 void 02216 subclass_encoding 02217 ( 02218 const mat& samples, 02219 const icolvec& labels, 02220 const Threshold& thres, 02221 const int criterion_option, 02222 const int classifiers_option, 02223 imat& coding_matrix, 02224 vector<Classifier*>& classifiers_vector, 02225 vector<ClassData>& classes_created_vector 02226 ) 02227 { 02228 // split the main sub problem into the corresponding classes 02229 vector<ClassData> initial_classes_vector = create_class_vector 02230 ( 02231 samples, 02232 labels 02233 ); 02234 02235 // number of initial classes 02236 const u32 n_classes = initial_classes_vector.size(); 02237 02238 // intermediate class vector 02239 vector<ClassData> intermediate_classes_vector = 02240 initial_classes_vector; 02241 02242 // vector that keeps track of splitting 02243 vector<colvec> class_tracker; 02244 02245 // initialize class_tracker 02246 for(u32 i = 0; i < n_classes; i++) 02247 { 02248 colvec temp = zeros<colvec>(1); 02249 temp[0] = intermediate_classes_vector[i].ClassIndex(); 02250 class_tracker.push_back(temp); 02251 } 02252 02253 // initialize coding matrix 02254 coding_matrix = zeros<imat>(n_classes, 2); 02255 for(u32 i = 0; i < n_classes; i++) 02256 { 02257 coding_matrix(i, 0) = i + 1; 02258 coding_matrix(i, 1) = i + 1; 02259 } 02260 02261 // problem tracker vector -- used to store problem examined so far 02262 // in order to prune the unnecessary recursive steps of the 02263 // direct_subclass_encoding procedure 02264 vector<ucolvec> problem_tracker; 02265 02266 02267 02268 // engage recursive subclass creation procedure 02269 direct_subclass_encoding 02270 ( 02271 initial_classes_vector, 02272 thres, 02273 criterion_option, 02274 classifiers_option, 02275 coding_matrix, 02276 classifiers_vector, 02277 intermediate_classes_vector, 02278 class_tracker, 02279 problem_tracker 02280 ); 02281 02282 // number of intermediate classes created in recursive procedure 02283 const u32 n_intermediate_classes = intermediate_classes_vector.size(); 02284 02285 // create the final subclasses vector from the intermediate 02286 // subclasses vector, by discarding the unnecessary created classes 02287 for(u32 k = 0; k < coding_matrix.n_rows; k++) 02288 { 02289 for(u32 l = 0; l < n_intermediate_classes; l++) 02290 { 02291 if( 02292 coding_matrix(k,1) == 02293 intermediate_classes_vector[l].ClassIndex() 02294 ) 02295 { 02296 classes_created_vector.push_back 02297 ( 02298 intermediate_classes_vector[l] 02299 ); 02300 02301 break; 02302 } 02303 02304 } 02305 02306 } 02307 02308 02309 // by now the coding matrix is consisted of 2 columns. 1st column 02310 // represents initial class labels and 2nd colum represents resulting 02311 // classes indices (if label and index is the same then the initial 02312 // class didn't split into subclasses, if else the initial class 02313 // has been split into subclasses). 02314 02315 // the next step is to use all the available information from the 02316 // recursive procedure direct_subclass_encoding in order to create the 02317 // codewords for each of the created classes (note: a class that has 02318 // been split into subclasses will be represented by as many codewords 02319 // in the coding matrix as the the number of its created subclasses) 02320 02321 // create a temporary matrix to hold the codewords as well as the 02322 // class labels and indices (i.e., the first two rows of temp will be 02323 // the coding matrix so far) 02324 imat temp = zeros<imat> 02325 ( 02326 coding_matrix.n_rows, 02327 coding_matrix.n_cols + classifiers_vector.size() 02328 ); 02329 02330 temp.cols(0, 1) = coding_matrix; 02331 02332 // number of classifiers created 02333 const u32 n_classifiers = classifiers_vector.size(); 02334 02335 // number of encoding matrix rows 02336 const u32 n_rows = coding_matrix.n_rows; 02337 02338 // for each classifier 02339 for(u32 i = 0; i < n_classifiers; i++) 02340 { 02341 // find the subclasses that the classifier recognizes and update the 02342 // corresponding column of the coding matrix with +1 or -1 according 02343 // to which set the subclass is contained (minus or plus subclass 02344 // set of classifier) 02345 02346 // number of classes marked with +1 in current classifier 02347 const u32 n_plus_classes = classifiers_vector[i]->pos.size(); 02348 02349 // for plus subclasses 02350 for(u32 j = 0; j < n_plus_classes; j++) 02351 { 02352 02353 // for each row of coding matrix 02354 for(u32 k = 0; k < n_rows; k++) 02355 { 02356 02357 for 02358 ( 02359 u32 l = 0; 02360 l < class_tracker[classifiers_vector[i]->pos[j]->ClassIndex() - 1].n_rows; 02361 l++ 02362 ) 02363 { 02364 // check if the subclass in the corresponding position of 02365 // class_tracker contains to the classifier 02366 if( 02367 class_tracker[classifiers_vector[i]->pos[j]->ClassIndex() - 1](l) == 02368 coding_matrix(k, 1) 02369 ) 02370 { 02371 temp(k, i + 2) = 1; 02372 } 02373 02374 } 02375 02376 } 02377 02378 } 02379 02380 // number of classes marked with -1 in current classifier 02381 const u32 n_minus_classes = classifiers_vector[i]->neg.size(); 02382 02383 // for minus subclasses 02384 for(u32 j = 0; j < n_minus_classes; j++) 02385 { 02386 02387 // for each row of coding matrix 02388 for(u32 k = 0; k < n_rows; k++) 02389 { 02390 02391 for 02392 ( 02393 u32 l = 0; 02394 l < class_tracker[classifiers_vector[i]->neg[j]->ClassIndex() - 1].n_rows; 02395 l++ 02396 ) 02397 { 02398 02399 // check if the subclass in the corresponding position of 02400 // class_tracker is contained in the classifier 02401 if( 02402 class_tracker[classifiers_vector[i]->neg[j]->ClassIndex() - 1](l) == 02403 coding_matrix(k, 1) 02404 ) 02405 { 02406 temp(k, i + 2) = -1; 02407 } 02408 02409 } 02410 02411 } 02412 02413 } 02414 02415 } 02416 02417 // final coding matrix 02418 // each column represents a classifier 02419 // each row represents a codeword 02420 coding_matrix = temp.cols(2, temp.n_cols - 1); 02421 } 02422 02423 02424 02443 void 02444 direct_decoc_encoding 02445 ( 02446 const vector<ClassData>& class_vector, 02447 const int criterion_option, 02448 const int classifiers_option, 02449 vector<Classifier*>& classifiers_vector 02450 ) 02451 { 02452 // number of initial classes 02453 const u32 n_classes = class_vector.size(); 02454 02455 // trivial recursion step 02456 if(n_classes == 1) 02457 { 02458 return; 02459 } 02460 else 02461 { 02462 // If class_vector vector contains only two classes 02463 // (trivial recursion step) 02464 if(n_classes == 2) 02465 { 02466 // create classifier 02467 Classifier* first_level_classifier = 02468 create_classifier 02469 ( 02470 class_vector[0], 02471 class_vector[1], 02472 classifiers_option 02473 ); 02474 02475 // save classifier 02476 save_classifier 02477 ( 02478 first_level_classifier, 02479 class_vector, 02480 classifiers_vector 02481 ); 02482 02483 02484 } 02485 else // classes more than two 02486 { 02487 // run SFFS to configure binary decomposition of classes 02488 ucolvec best_set_indices = sffs(class_vector, criterion_option); 02489 02490 // number of classes in the best subset of classes yield by SFFS 02491 const u32 bestSetSize = best_set_indices.n_rows; 02492 02493 // The SFFS algorithm returned a problem configuration that one 02494 // partition contained one class and the other partition contained 02495 // many classes 02496 02497 02498 // compute complements set's labels 02499 ucolvec complement_set_indices= complement 02500 ( 02501 best_set_indices, 02502 n_classes 02503 ); 02504 02505 // merge classes of best set 02506 ClassData best_set_merged = merge_selected_classes 02507 ( 02508 class_vector, 02509 best_set_indices 02510 ); 02511 02512 // merge classes rest classes 02513 ClassData complement_set_merged = merge_selected_classes 02514 ( 02515 class_vector, 02516 complement_set_indices 02517 ); 02518 02519 // construct and evaluate first problem 02520 vector<ClassData> tmp_class_vector; 02521 tmp_class_vector.push_back(best_set_merged); 02522 tmp_class_vector.push_back(complement_set_merged); 02523 02524 // create classifier to attack specific problem for specific 02525 // problem 02526 Classifier* first_level_classifier = create_classifier 02527 ( 02528 best_set_merged, 02529 complement_set_merged, 02530 classifiers_option 02531 ); 02532 02533 // Create and save a classifier specified problem 02534 // configuration 02535 save_classifier 02536 ( 02537 first_level_classifier, 02538 class_vector, 02539 best_set_indices, 02540 complement_set_indices, 02541 classifiers_vector 02542 ); 02543 02544 // create the new ClassData vectors according to the best set 02545 vector<ClassData> class_vector_a; 02546 vector<ClassData> class_vector_b; 02547 for(u32 i = 0; i < n_classes; i++) 02548 { 02549 if(contains(best_set_indices, bestSetSize, i)) 02550 { 02551 class_vector_a.push_back(class_vector[i]); 02552 } 02553 else 02554 { 02555 class_vector_b.push_back(class_vector[i]); 02556 } 02557 02558 } 02559 02560 // recursive calls to direct_subclass_encoding 02561 direct_decoc_encoding 02562 ( 02563 class_vector_a, 02564 criterion_option, 02565 classifiers_option, 02566 classifiers_vector 02567 ); 02568 02569 direct_decoc_encoding 02570 ( 02571 class_vector_b, 02572 criterion_option, 02573 classifiers_option, 02574 classifiers_vector 02575 ); 02576 02577 } 02578 02579 } 02580 02581 } 02582 02583 02584 02585 02604 void 02605 decoc_coding 02606 ( 02607 const vector<ClassData>& class_vector, 02608 const int criterion_option, 02609 const int classifiers_option, 02610 imat& coding_matrix, 02611 vector<Classifier*>& classifiers_vector 02612 ) 02613 { 02614 // number of initial classes 02615 const u32 n_classes = class_vector.size(); 02616 02617 // engage recursive DECOC tree creation procedure 02618 direct_decoc_encoding 02619 ( 02620 class_vector, 02621 criterion_option, 02622 classifiers_option, 02623 classifiers_vector 02624 ); 02625 02626 // initialize coding matrix 02627 coding_matrix = zeros<imat>(n_classes, classifiers_vector.size()); 02628 02629 // fill in values of coding matrix 02630 for(u32 i = 0; i < classifiers_vector.size(); i++) 02631 { 02632 02633 for(u32 j = 0; j < classifiers_vector[i]->pos.size(); j++) 02634 { 02635 coding_matrix(classifiers_vector[i]->pos[j]->ClassLabel() - 1, i) = 1; 02636 } 02637 02638 for(u32 j = 0; j < classifiers_vector[i]->neg.size(); j++) 02639 { 02640 coding_matrix(classifiers_vector[i]->neg[j]->ClassLabel() - 1, i) = -1; 02641 } 02642 02643 } 02644 02645 } 02646 02647 02648