ECOCPAK v0.9
|
00001 // Copyright (C) 2011 the authors listed below 00002 // http://ecocpak.sourceforge.net 00003 // 00004 // Authors: 00005 // - Dimitrios Bouzas (bouzas at ieee dot org) 00006 // - Nikolaos Arvanitopoulos (niarvani at ieee dot org) 00007 // - Anastasios Tefas (tefas at aiia dot csd dot auth dot gr) 00008 // 00009 // This file is part of the ECOC PAK C++ library. It is 00010 // provided without any warranty of fitness for any purpose. 00011 // 00012 // You can redistribute this file and/or modify it under 00013 // the terms of the GNU Lesser General Public License (LGPL) 00014 // as published by the Free Software Foundation, either 00015 // version 3 of the License or (at your option) any later 00016 // version. 00017 // (see http://www.opensource.org/licenses for more info) 00018 00019 00022 00023 00024 00060 double 00061 cross_validation 00062 ( 00063 const mat& samples, 00064 const icolvec& labels, 00065 const imat& codmat, 00066 const Threshold& thres, 00067 const u32 n_folds, 00068 const u32 n_classes, 00069 const u32 criterion_option, 00070 const u32 classifier_option, 00071 const int coding_strategy, 00072 const int decoding_strategy, 00073 const u32 n_matrices, 00074 const u32 n_desired_classifers, 00075 const double validation, 00076 const int init_coding_strategy, 00077 const int ecocone_mode, 00078 const u32 max_iter, 00079 const double epsilon, 00080 const double wvalidation, 00081 const bool verbose, 00082 double& mean_rows, 00083 double& mean_cols, 00084 double& elapsed_time, 00085 ofstream& output 00086 ) 00087 { 00088 // initialize 00089 mean_rows = 0.0; 00090 mean_cols = 0.0; 00091 elapsed_time = 0.0; 00092 double tmp_time = 0.0; 00093 00094 double *target = Malloc(double, n_folds); 00095 00096 int i; 00097 int* fold_start = Malloc(int, n_folds + 1); 00098 int l = samples.n_rows; 00099 int* perm = Malloc(int, l); 00100 int nr_class; 00101 00102 // stratified cv may not give leave-one-out rate 00103 // Each class to l folds -> some folds may have zero elements 00104 if(n_folds < l) 00105 { 00106 int* start = NULL; 00107 int* label = NULL; 00108 int* count = NULL; 00109 group_classes(labels, &nr_class, &label, &start, &count, perm); 00110 00111 // random shuffle and then data grouped by fold using the array perm 00112 int* fold_count = Malloc(int, n_folds); 00113 int c; 00114 int* index = Malloc(int, l); 00115 00116 for(i = 0; i < l; i++) 00117 { 00118 index[i] = perm[i]; 00119 } 00120 00121 for(c = 0; c < nr_class; c++) 00122 { 00123 00124 for(i = 0;i < count[c]; i++) 00125 { 00126 int j = i + rand() % (count[c] - i); 00127 swap(index[start[c] + j], index[start[c] + i]); 00128 } 00129 00130 } 00131 00132 for(i = 0; i < n_folds; i++) 00133 { 00134 fold_count[i] = 0; 00135 00136 for(c = 0; c < nr_class; c++) 00137 { 00138 fold_count[i] += 00139 (i + 1) * count[c] / n_folds - i * count[c] / n_folds; 00140 } 00141 00142 } 00143 00144 fold_start[0] = 0; 00145 for(i = 1; i <= n_folds; i++) 00146 { 00147 fold_start[i] = fold_start[i - 1] + fold_count[i - 1]; 00148 } 00149 00150 for(c = 0; c < nr_class; c++) 00151 { 00152 for(i = 0; i < n_folds; i++) 00153 { 00154 int begin = start[c] + i * count[c] / n_folds; 00155 int end = start[c] + (i + 1) * count[c] / n_folds; 00156 00157 for(int j = begin; j < end; j++) 00158 { 00159 perm[fold_start[i]] = index[j]; 00160 fold_start[i]++; 00161 } 00162 00163 } 00164 00165 } 00166 00167 fold_start[0] = 0; 00168 00169 for(i = 1; i <= n_folds; i++) 00170 { 00171 fold_start[i] = fold_start[i - 1] + fold_count[i - 1]; 00172 } 00173 00174 free(start); 00175 free(label); 00176 free(count); 00177 free(index); 00178 free(fold_count); 00179 } 00180 else 00181 { 00182 for(i = 0; i < l; i++) 00183 { 00184 perm[i] = i; 00185 } 00186 00187 for(i = 0; i < l; i++) 00188 { 00189 int j = i + rand() % (l - i); 00190 swap(perm[i], perm[j]); 00191 } 00192 00193 for(i = 0; i <= n_folds; i++) 00194 { 00195 fold_start[i] = i * l / n_folds; 00196 } 00197 00198 } 00199 00200 // for each fold 00201 for(i = 0; i < n_folds; i++) 00202 { 00203 #ifdef NCURSES_OUTPUT 00204 if(menu_on == true) 00205 { 00206 clear(); 00207 mvprintw(0, 0, "Running FOLD NR. %d\n", i + 1); 00208 refresh(); 00209 } 00210 #endif 00211 00212 if(verbose == true) 00213 { 00214 output << endl << ">> Fold Number " << i + 1 << endl << endl; 00215 } 00216 00217 int begin = fold_start[i]; 00218 int end = fold_start[i + 1]; 00219 int j; 00220 int k; 00221 00222 // --- costruct the sub problem that corresponds to the --- // 00223 // --- specific fold --- // 00224 00225 // number of training samples 00226 const u32 training_size = l - (end - begin); 00227 00228 // number of test samples 00229 const u32 test_size = end - begin; 00230 00231 // allocate memory for subdata matrix and sublabels 00232 mat subDataMat = zeros<mat>(training_size, samples.n_cols); 00233 icolvec subLabels = zeros<icolvec>(training_size); 00234 00235 // create the subproblem by assigning the correct samples and labels 00236 // according to the created permutations from the cross validation 00237 // procedure 00238 k = 0; 00239 for(j = 0; j < begin; j++) 00240 { 00241 subDataMat.row(k) = samples.row(perm[j]); 00242 subLabels(k) = labels(perm[j]); 00243 00244 ++k; 00245 } 00246 00247 for(j = end; j < l; j++) 00248 { 00249 subDataMat.row(k) = samples.row(perm[j]); 00250 subLabels(k) = labels(perm[j]); 00251 00252 ++k; 00253 } 00254 00255 // --- end of subproblem construction --- // 00256 00257 // --- costruct the test subproblem that corresponds to the --- // 00258 // --- specific fold --- // 00259 00260 // allocate memory for testLabels 00261 icolvec test_labels = zeros<icolvec>(test_size); 00262 00263 // allocate test set 00264 mat test_set = zeros<mat>(test_size, samples.n_cols); 00265 00266 // create the test subproblem and labels 00267 k = 0; 00268 for(j = begin; j < end; j++) 00269 { 00270 test_labels(k) = labels(perm[j]); 00271 test_set.row(k) = samples.row(perm[j]); 00272 ++k; 00273 } 00274 00275 // current fold coding matrix 00276 imat coding_matrix; 00277 00278 // current fold vector of classifiers 00279 vector<Classifier*> classifiers_vector; 00280 00281 // --- end of test subproblem construction --- // 00282 switch(coding_strategy) 00283 { 00284 // Discriminant Error Correcting Output Codes Coding 00285 case DECOC: 00286 { 00287 target[i] = decoc 00288 ( 00289 subDataMat, 00290 subLabels, 00291 test_set, 00292 test_labels, 00293 decoding_strategy, 00294 classifier_option, 00295 criterion_option, 00296 verbose, 00297 output, 00298 tmp_time 00299 ); 00300 00301 elapsed_time += tmp_time; 00302 00303 break; 00304 } 00305 00306 // Discriminant Error Correcting Output Codes with subclasses 00307 case SUBDECOC: 00308 { 00309 target[i] = subdecoc 00310 ( 00311 subDataMat, 00312 subLabels, 00313 test_set, 00314 test_labels, 00315 thres, 00316 decoding_strategy, 00317 classifier_option, 00318 criterion_option, 00319 verbose, 00320 coding_matrix, 00321 output, 00322 tmp_time 00323 ); 00324 00325 mean_rows += double(coding_matrix.n_rows) / double(n_folds); 00326 mean_cols += double(coding_matrix.n_cols) / double(n_folds); 00327 elapsed_time += tmp_time; 00328 00329 break; 00330 } 00331 00332 // One versus One, or all pairs coding 00333 case ONE_VS_ONE: 00334 { 00335 target[i] = one_vs_one 00336 ( 00337 subDataMat, 00338 subLabels, 00339 test_set, 00340 test_labels, 00341 decoding_strategy, 00342 classifier_option, 00343 verbose, 00344 output, 00345 tmp_time 00346 ); 00347 00348 elapsed_time += tmp_time; 00349 00350 break; 00351 } 00352 00353 // One versus All, or one against all coding 00354 case ONE_VS_ALL: 00355 { 00356 target[i] = one_vs_all 00357 ( 00358 subDataMat, 00359 subLabels, 00360 test_set, 00361 test_labels, 00362 decoding_strategy, 00363 classifier_option, 00364 verbose, 00365 output, 00366 tmp_time 00367 ); 00368 00369 elapsed_time += tmp_time; 00370 00371 break; 00372 } 00373 00374 // Dense Random coding 00375 case DENSE_RANDOM: 00376 { 00377 target[i] = dense_random_ecoc 00378 ( 00379 subDataMat, 00380 subLabels, 00381 test_set, 00382 test_labels, 00383 decoding_strategy, 00384 classifier_option, 00385 n_matrices, 00386 n_desired_classifers, 00387 verbose, 00388 output, 00389 tmp_time 00390 ); 00391 00392 elapsed_time += tmp_time; 00393 00394 break; 00395 } 00396 00397 // Sparse Random coding 00398 case SPARSE_RANDOM: 00399 { 00400 target[i] = sparse_random_ecoc 00401 ( 00402 subDataMat, 00403 subLabels, 00404 test_set, 00405 test_labels, 00406 decoding_strategy, 00407 classifier_option, 00408 n_matrices, 00409 n_desired_classifers, 00410 verbose, 00411 output, 00412 tmp_time 00413 ); 00414 00415 elapsed_time += tmp_time; 00416 00417 break; 00418 } 00419 00420 // ECOC One 00421 case ECOC_ONE: 00422 { 00423 target[i] = ecoc_one 00424 ( 00425 subDataMat, 00426 subLabels, 00427 test_set, 00428 test_labels, 00429 thres, 00430 decoding_strategy, 00431 classifier_option, 00432 criterion_option, 00433 n_matrices, 00434 n_desired_classifers, 00435 validation, 00436 init_coding_strategy, 00437 ecocone_mode, 00438 max_iter, 00439 epsilon, 00440 wvalidation, 00441 verbose, 00442 output, 00443 tmp_time 00444 ); 00445 00446 elapsed_time += tmp_time; 00447 00448 break; 00449 } 00450 00451 // Forest ECOC 00452 case FOREST_ECOC: 00453 { 00454 target[i] = forest_ecoc 00455 ( 00456 subDataMat, 00457 subLabels, 00458 test_set, 00459 test_labels, 00460 decoding_strategy, 00461 classifier_option, 00462 criterion_option, 00463 n_matrices, 00464 verbose, 00465 output, 00466 tmp_time 00467 ); 00468 00469 elapsed_time += tmp_time; 00470 00471 break; 00472 } 00473 00474 // User Custom coding 00475 case CUSTOM_CODING: 00476 { 00477 target[i] = custom_coding 00478 ( 00479 subDataMat, 00480 subLabels, 00481 test_set, 00482 test_labels, 00483 codmat, 00484 decoding_strategy, 00485 classifier_option, 00486 verbose, 00487 output, 00488 tmp_time 00489 ); 00490 00491 elapsed_time += tmp_time; 00492 00493 break; 00494 } 00495 00496 } 00497 00498 target[i] /= double(test_set.n_rows); 00499 00500 ClassData::globalIndex = 0; 00501 } 00502 00503 free(fold_start); 00504 free(perm); 00505 00506 double tempSum = 0.0; 00507 for(u32 i = 0; i < n_folds; i++) 00508 { 00509 tempSum += target[i]; 00510 } 00511 00512 tempSum /= double(n_folds); 00513 00514 free(target); 00515 00516 return tempSum; 00517 } 00518 00519 00520 00541 double 00542 cvonesvsone 00543 ( 00544 const mat& samples, 00545 const icolvec& labels, 00546 const u32 n_folds, 00547 const u32 n_classes, 00548 const u32 classifier_option, 00549 const int decoding_strategy, 00550 const bool verbose, 00551 double& elapsed_time, 00552 ofstream& output 00553 ) 00554 { 00555 // dummies, overrides 00556 imat codmat; 00557 Threshold thres(100,100,100); 00558 double mr; 00559 double mc; 00560 00561 return cross_validation 00562 ( 00563 samples, 00564 labels, 00565 codmat, 00566 thres, 00567 n_folds, 00568 n_classes, 00569 0, 00570 classifier_option, 00571 ONE_VS_ONE, 00572 decoding_strategy, 00573 0, 00574 0, 00575 0, 00576 0, 00577 0, 00578 0, 00579 0, 00580 0, 00581 verbose, 00582 mr, 00583 mc, 00584 elapsed_time, 00585 output 00586 ); 00587 00588 } 00589 00590 00591 00612 double 00613 cvonesvsall 00614 ( 00615 const mat& samples, 00616 const icolvec& labels, 00617 const u32 n_folds, 00618 const u32 n_classes, 00619 const u32 classifier_option, 00620 const int decoding_strategy, 00621 const bool verbose, 00622 double& elapsed_time, 00623 ofstream& output 00624 ) 00625 { 00626 // dummies, overrides 00627 imat codmat; 00628 Threshold thres(100,100,100); 00629 double mr; 00630 double mc; 00631 00632 return cross_validation 00633 ( 00634 samples, 00635 labels, 00636 codmat, 00637 thres, 00638 n_folds, 00639 n_classes, 00640 0, 00641 classifier_option, 00642 ONE_VS_ALL, 00643 decoding_strategy, 00644 0, 00645 0, 00646 0, 00647 0, 00648 0, 00649 0, 00650 0, 00651 0, 00652 verbose, 00653 mr, 00654 mc, 00655 elapsed_time, 00656 output 00657 ); 00658 00659 } 00660 00661 00662 00684 double 00685 cvdecoc 00686 ( 00687 const mat& samples, 00688 const icolvec& labels, 00689 const u32 n_folds, 00690 const u32 n_classes, 00691 const u32 classifier_option, 00692 const int decoding_strategy, 00693 const u32 criterion_option, 00694 const bool verbose, 00695 double& elapsed_time, 00696 ofstream& output 00697 ) 00698 { 00699 // dummies, overrides 00700 imat codmat; 00701 Threshold thres(100,100,100); 00702 double mr; 00703 double mc; 00704 00705 return cross_validation 00706 ( 00707 samples, 00708 labels, 00709 codmat, 00710 thres, 00711 n_folds, 00712 n_classes, 00713 criterion_option, 00714 classifier_option, 00715 DECOC, 00716 decoding_strategy, 00717 0, 00718 0, 00719 0, 00720 0, 00721 0, 00722 0, 00723 0, 00724 0, 00725 verbose, 00726 mr, 00727 mc, 00728 elapsed_time, 00729 output 00730 ); 00731 00732 } 00733 00734 00735 00760 double 00761 cvsubdecoc 00762 ( 00763 const mat& samples, 00764 const icolvec& labels, 00765 const u32 n_folds, 00766 const u32 n_classes, 00767 const u32 classifier_option, 00768 const int decoding_strategy, 00769 const u32 criterion_option, 00770 const Threshold& thres, 00771 const bool verbose, 00772 double& mr, 00773 double& mc, 00774 double& elapsed_time, 00775 ofstream& output 00776 ) 00777 { 00778 // dummies, overrides 00779 imat codmat; 00780 00781 return cross_validation 00782 ( 00783 samples, 00784 labels, 00785 codmat, 00786 thres, 00787 n_folds, 00788 n_classes, 00789 criterion_option, 00790 classifier_option, 00791 SUBDECOC, 00792 decoding_strategy, 00793 0, 00794 0, 00795 0, 00796 0, 00797 0, 00798 0, 00799 0, 00800 0, 00801 verbose, 00802 mr, 00803 mc, 00804 elapsed_time, 00805 output 00806 ); 00807 00808 } 00809 00810 00811 00837 double 00838 cvsparserand 00839 ( 00840 const mat& samples, 00841 const icolvec& labels, 00842 const u32 n_folds, 00843 const u32 n_classes, 00844 const u32 classifier_option, 00845 const int decoding_strategy, 00846 const u32 n_matrices, 00847 const u32 n_desired_classifers, 00848 const bool verbose, 00849 double& elapsed_time, 00850 ofstream& output 00851 ) 00852 { 00853 // dummies, overrides 00854 imat codmat; 00855 Threshold thres(100,100,100); 00856 double mr; 00857 double mc; 00858 00859 return cross_validation 00860 ( 00861 samples, 00862 labels, 00863 codmat, 00864 thres, 00865 n_folds, 00866 n_classes, 00867 0, 00868 classifier_option, 00869 SPARSE_RANDOM, 00870 decoding_strategy, 00871 n_matrices, 00872 n_desired_classifers, 00873 0, 00874 0, 00875 0, 00876 0, 00877 0, 00878 0, 00879 verbose, 00880 mr, 00881 mc, 00882 elapsed_time, 00883 output 00884 ); 00885 00886 } 00887 00888 00889 00915 double 00916 cvdenserand 00917 ( 00918 const mat& samples, 00919 const icolvec& labels, 00920 const u32 n_folds, 00921 const u32 n_classes, 00922 const u32 classifier_option, 00923 const int decoding_strategy, 00924 const u32 n_matrices, 00925 const u32 n_desired_classifers, 00926 const bool verbose, 00927 double& elapsed_time, 00928 ofstream& output 00929 ) 00930 { 00931 // dummies, overrides 00932 imat codmat; 00933 Threshold thres(100,100,100); 00934 double mr; 00935 double mc; 00936 00937 return cross_validation 00938 ( 00939 samples, 00940 labels, 00941 codmat, 00942 thres, 00943 n_folds, 00944 n_classes, 00945 0, 00946 classifier_option, 00947 DENSE_RANDOM, 00948 decoding_strategy, 00949 n_matrices, 00950 n_desired_classifers, 00951 0, 00952 0, 00953 0, 00954 0, 00955 0, 00956 0, 00957 verbose, 00958 mr, 00959 mc, 00960 elapsed_time, 00961 output 00962 ); 00963 00964 } 00965 00966 00967 00995 double 00996 cvecocone 00997 ( 00998 const mat& samples, 00999 const icolvec& labels, 01000 const u32 n_folds, 01001 const u32 n_classes, 01002 const u32 classifier_option, 01003 const int decoding_strategy, 01004 const double validation, 01005 const int init_coding_strategy, 01006 const int ecocone_mode, 01007 const u32 max_iter, 01008 const double epsilon, 01009 const double wvalidation, 01010 const bool verbose, 01011 double& elapsed_time, 01012 ofstream& output 01013 ) 01014 { 01015 // dummies, overrides 01016 imat codmat; 01017 Threshold thres(100,100,100); 01018 double mr; 01019 double mc; 01020 01021 return cross_validation 01022 ( 01023 samples, 01024 labels, 01025 codmat, 01026 thres, 01027 n_folds, 01028 n_classes, 01029 0, 01030 classifier_option, 01031 ECOC_ONE, 01032 decoding_strategy, 01033 0, 01034 0, 01035 validation, 01036 init_coding_strategy, 01037 ecocone_mode, 01038 max_iter, 01039 epsilon, 01040 wvalidation, 01041 verbose, 01042 mr, 01043 mc, 01044 elapsed_time, 01045 output 01046 ); 01047 01048 } 01049 01050 01051 01073 double 01074 cvforest 01075 ( 01076 const mat& samples, 01077 const icolvec& labels, 01078 const u32 n_folds, 01079 const u32 n_classes, 01080 const u32 classifier_option, 01081 const int decoding_strategy, 01082 const u32 n_trees, 01083 const bool verbose, 01084 double& elapsed_time, 01085 ofstream& output 01086 ) 01087 { 01088 // dummies, overrides 01089 imat codmat; 01090 Threshold thres(100,100,100); 01091 double mr; 01092 double mc; 01093 01094 return cross_validation 01095 ( 01096 samples, 01097 labels, 01098 codmat, 01099 thres, 01100 n_folds, 01101 n_classes, 01102 0, 01103 classifier_option, 01104 FOREST_ECOC, 01105 decoding_strategy, 01106 n_trees, 01107 0, 01108 0, 01109 0, 01110 0, 01111 0, 01112 0, 01113 0, 01114 verbose, 01115 mr, 01116 mc, 01117 elapsed_time, 01118 output 01119 ); 01120 01121 } 01122 01123 01124 01146 double 01147 cvcustom 01148 ( 01149 const mat& samples, 01150 const icolvec& labels, 01151 const u32 n_folds, 01152 const u32 n_classes, 01153 const u32 classifier_option, 01154 const int decoding_strategy, 01155 const imat& codmat, 01156 const bool verbose, 01157 double& elapsed_time, 01158 ofstream& output 01159 ) 01160 { 01161 // dummies, overrides 01162 Threshold thres(100,100,100); 01163 double mr; 01164 double mc; 01165 01166 return cross_validation 01167 ( 01168 samples, 01169 labels, 01170 codmat, 01171 thres, 01172 n_folds, 01173 n_classes, 01174 0, 01175 classifier_option, 01176 CUSTOM_CODING, 01177 decoding_strategy, 01178 0, 01179 0, 01180 0, 01181 0, 01182 0, 01183 0, 01184 0, 01185 0, 01186 verbose, 01187 mr, 01188 mc, 01189 elapsed_time, 01190 output 01191 ); 01192 01193 } 01194 01195 01196