ECOCPAK v0.9
fn_crossvalidation.hpp
Go to the documentation of this file.
00001 // Copyright (C) 2011 the authors listed below
00002 // http://ecocpak.sourceforge.net
00003 //
00004 // Authors:
00005 // - Dimitrios Bouzas (bouzas at ieee dot org)
00006 // - Nikolaos Arvanitopoulos (niarvani at ieee dot org)
00007 // - Anastasios Tefas (tefas at aiia dot csd dot auth dot gr)
00008 //
00009 // This file is part of the ECOC PAK C++ library. It is
00010 // provided without any warranty of fitness for any purpose.
00011 //
00012 // You can redistribute this file and/or modify it under
00013 // the terms of the GNU Lesser General Public License (LGPL)
00014 // as published by the Free Software Foundation, either
00015 // version 3 of the License or (at your option) any later
00016 // version.
00017 // (see http://www.opensource.org/licenses for more info)
00018 
00019 
00022 
00023 
00024 
00060 double
00061 cross_validation
00062   (
00063   const mat& samples,
00064   const icolvec& labels,
00065   const imat& codmat,
00066   const Threshold& thres,
00067   const u32 n_folds,
00068   const u32 n_classes,
00069   const u32 criterion_option,
00070   const u32 classifier_option,
00071   const int coding_strategy,
00072   const int decoding_strategy,
00073   const u32 n_matrices,
00074   const u32 n_desired_classifers,
00075   const double validation,
00076   const int init_coding_strategy,
00077   const int ecocone_mode,
00078   const u32 max_iter,
00079   const double epsilon,
00080   const double wvalidation,
00081   const bool verbose,
00082   double& mean_rows,
00083   double& mean_cols,
00084   double& elapsed_time,
00085   ofstream& output
00086   )
00087   {
00088   // initialize
00089   mean_rows = 0.0;
00090   mean_cols = 0.0;
00091   elapsed_time = 0.0;
00092   double tmp_time = 0.0;
00093 
00094   double *target = Malloc(double, n_folds);
00095 
00096   int i;
00097   int* fold_start = Malloc(int, n_folds + 1);
00098   int l = samples.n_rows;
00099   int* perm = Malloc(int, l);
00100   int nr_class;
00101 
00102   // stratified cv may not give leave-one-out rate
00103   // Each class to l folds -> some folds may have zero elements
00104   if(n_folds < l)
00105     {
00106     int* start = NULL;
00107     int* label = NULL;
00108     int* count = NULL;
00109     group_classes(labels, &nr_class, &label, &start, &count, perm);
00110 
00111     // random shuffle and then data grouped by fold using the array perm
00112     int* fold_count = Malloc(int, n_folds);
00113     int c;
00114     int* index = Malloc(int, l);
00115 
00116     for(i = 0; i < l; i++)
00117       {
00118       index[i] = perm[i];
00119       }
00120 
00121     for(c = 0; c < nr_class; c++)
00122       {
00123 
00124       for(i = 0;i < count[c]; i++)
00125         {
00126         int j = i + rand() % (count[c] - i);
00127         swap(index[start[c] + j], index[start[c] + i]);
00128         }
00129 
00130       }
00131 
00132     for(i = 0; i < n_folds; i++)
00133       {
00134       fold_count[i] = 0;
00135 
00136       for(c = 0; c < nr_class; c++)
00137         {
00138         fold_count[i] +=
00139           (i + 1) * count[c] / n_folds - i * count[c] / n_folds;
00140         }
00141 
00142       }
00143 
00144     fold_start[0] = 0;
00145     for(i = 1; i <= n_folds; i++)
00146       {
00147       fold_start[i] = fold_start[i - 1] + fold_count[i - 1];
00148       }
00149 
00150     for(c = 0; c < nr_class; c++)
00151       {
00152       for(i = 0; i < n_folds; i++)
00153         {
00154         int begin = start[c] + i * count[c] / n_folds;
00155         int end = start[c] + (i + 1) * count[c] / n_folds;
00156 
00157         for(int j = begin; j < end; j++)
00158           {
00159           perm[fold_start[i]] = index[j];
00160           fold_start[i]++;
00161           }
00162 
00163         }
00164 
00165       }
00166 
00167     fold_start[0] = 0;
00168 
00169     for(i = 1; i <= n_folds; i++)
00170       {
00171       fold_start[i] = fold_start[i - 1] + fold_count[i - 1];
00172       }
00173 
00174     free(start);
00175     free(label);
00176     free(count);
00177     free(index);
00178     free(fold_count);
00179     }
00180   else
00181     {
00182     for(i = 0; i < l; i++)
00183       {
00184       perm[i] = i;
00185       }
00186 
00187     for(i = 0; i < l; i++)
00188       {
00189       int j = i + rand() % (l - i);
00190       swap(perm[i], perm[j]);
00191       }
00192 
00193     for(i = 0; i <= n_folds; i++)
00194       {
00195       fold_start[i] = i * l / n_folds;
00196       }
00197 
00198     }
00199 
00200   // for each fold
00201   for(i = 0; i < n_folds; i++)
00202     {
00203     #ifdef NCURSES_OUTPUT
00204     if(menu_on == true)
00205       {
00206       clear();
00207       mvprintw(0, 0, "Running FOLD NR. %d\n", i + 1);
00208       refresh();
00209       }
00210     #endif
00211 
00212     if(verbose == true)
00213       {
00214       output << endl << ">> Fold Number " <<  i + 1 << endl << endl;
00215       }
00216 
00217     int begin = fold_start[i];
00218     int end = fold_start[i + 1];
00219     int j;
00220     int k;
00221 
00222     // --- costruct the sub problem that corresponds to the       --- //
00223     // --- specific fold                                          --- //
00224 
00225     // number of training samples
00226     const u32 training_size = l - (end - begin);
00227 
00228     // number of test samples
00229     const u32 test_size = end - begin;
00230 
00231     // allocate memory for subdata matrix and sublabels
00232     mat subDataMat = zeros<mat>(training_size, samples.n_cols);
00233     icolvec subLabels = zeros<icolvec>(training_size);
00234 
00235     // create the subproblem by assigning the correct samples and labels
00236     // according to the created permutations from the cross validation
00237     // procedure
00238     k = 0;
00239     for(j = 0; j < begin; j++)
00240       {
00241       subDataMat.row(k) = samples.row(perm[j]);
00242       subLabels(k) = labels(perm[j]);
00243 
00244       ++k;
00245       }
00246 
00247     for(j = end; j < l; j++)
00248       {
00249       subDataMat.row(k) = samples.row(perm[j]);
00250       subLabels(k) = labels(perm[j]);
00251 
00252       ++k;
00253       }
00254 
00255     // --- end of subproblem construction --- //
00256 
00257     // --- costruct the test subproblem that corresponds to the   --- //
00258     // --- specific fold                                          --- //
00259 
00260     // allocate memory for testLabels
00261     icolvec test_labels = zeros<icolvec>(test_size);
00262 
00263     // allocate test set
00264     mat test_set = zeros<mat>(test_size, samples.n_cols);
00265 
00266     // create the test subproblem and labels
00267     k = 0;
00268     for(j = begin; j < end; j++)
00269       {
00270       test_labels(k) = labels(perm[j]);
00271       test_set.row(k) = samples.row(perm[j]);
00272       ++k;
00273       }
00274 
00275     // current fold coding matrix
00276     imat coding_matrix;
00277 
00278     // current fold vector of classifiers
00279     vector<Classifier*> classifiers_vector;
00280 
00281     // --- end of test subproblem construction --- //
00282     switch(coding_strategy)
00283       {
00284       // Discriminant Error Correcting Output Codes Coding
00285       case DECOC:
00286         {
00287         target[i] = decoc
00288                       (
00289                       subDataMat,
00290                       subLabels,
00291                       test_set,
00292                       test_labels,
00293                       decoding_strategy,
00294                       classifier_option,
00295                       criterion_option,
00296                       verbose,
00297                       output,
00298                       tmp_time
00299                       );
00300 
00301         elapsed_time += tmp_time;
00302 
00303         break;
00304         }
00305 
00306       // Discriminant Error Correcting Output Codes with subclasses
00307       case SUBDECOC:
00308         {
00309         target[i] = subdecoc
00310                       (
00311                       subDataMat,
00312                       subLabels,
00313                       test_set,
00314                       test_labels,
00315                       thres,
00316                       decoding_strategy,
00317                       classifier_option,
00318                       criterion_option,
00319                       verbose,
00320                       coding_matrix,
00321                       output,
00322                       tmp_time
00323                       );
00324 
00325         mean_rows += double(coding_matrix.n_rows) / double(n_folds);
00326         mean_cols += double(coding_matrix.n_cols) / double(n_folds);
00327         elapsed_time += tmp_time;
00328 
00329         break;
00330         }
00331 
00332       // One versus One, or all pairs coding
00333       case ONE_VS_ONE:
00334         {
00335         target[i] = one_vs_one
00336                       (
00337                       subDataMat,
00338                       subLabels,
00339                       test_set,
00340                       test_labels,
00341                       decoding_strategy,
00342                       classifier_option,
00343                       verbose,
00344                       output,
00345                       tmp_time
00346                       );
00347 
00348         elapsed_time += tmp_time;
00349 
00350         break;
00351         }
00352 
00353       // One versus All, or one against all coding
00354       case ONE_VS_ALL:
00355         {
00356         target[i] = one_vs_all
00357                       (
00358                       subDataMat,
00359                       subLabels,
00360                       test_set,
00361                       test_labels,
00362                       decoding_strategy,
00363                       classifier_option,
00364                       verbose,
00365                       output,
00366                       tmp_time
00367                       );
00368 
00369         elapsed_time += tmp_time;
00370 
00371         break;
00372         }
00373 
00374       // Dense Random coding
00375       case DENSE_RANDOM:
00376         {
00377         target[i] = dense_random_ecoc
00378                       (
00379                       subDataMat,
00380                       subLabels,
00381                       test_set,
00382                       test_labels,
00383                       decoding_strategy,
00384                       classifier_option,
00385                       n_matrices,
00386                       n_desired_classifers,
00387                       verbose,
00388                       output,
00389                       tmp_time
00390                       );
00391 
00392         elapsed_time += tmp_time;
00393 
00394         break;
00395         }
00396 
00397       // Sparse Random coding
00398       case SPARSE_RANDOM:
00399         {
00400         target[i] = sparse_random_ecoc
00401                       (
00402                       subDataMat,
00403                       subLabels,
00404                       test_set,
00405                       test_labels,
00406                       decoding_strategy,
00407                       classifier_option,
00408                       n_matrices,
00409                       n_desired_classifers,
00410                       verbose,
00411                       output,
00412                       tmp_time
00413                       );
00414 
00415         elapsed_time += tmp_time;
00416 
00417         break;
00418         }
00419 
00420       // ECOC One
00421       case ECOC_ONE:
00422         {
00423         target[i] = ecoc_one
00424                       (
00425                       subDataMat,
00426                       subLabels,
00427                       test_set,
00428                       test_labels,
00429                       thres,
00430                       decoding_strategy,
00431                       classifier_option,
00432                       criterion_option,
00433                       n_matrices,
00434                       n_desired_classifers,
00435                       validation,
00436                       init_coding_strategy,
00437                       ecocone_mode,
00438                       max_iter,
00439                       epsilon,
00440                       wvalidation,
00441                       verbose,
00442                       output,
00443                       tmp_time
00444                       );
00445 
00446         elapsed_time += tmp_time;
00447 
00448         break;
00449         }
00450 
00451       // Forest ECOC
00452       case FOREST_ECOC:
00453         {
00454         target[i] = forest_ecoc
00455                       (
00456                       subDataMat,
00457                       subLabels,
00458                       test_set,
00459                       test_labels,
00460                       decoding_strategy,
00461                       classifier_option,
00462                       criterion_option,
00463                       n_matrices,
00464                       verbose,
00465                       output,
00466                       tmp_time
00467                       );
00468 
00469         elapsed_time += tmp_time;
00470 
00471         break;
00472         }
00473 
00474       // User Custom coding
00475       case CUSTOM_CODING:
00476         {
00477         target[i] = custom_coding
00478                       (
00479                       subDataMat,
00480                       subLabels,
00481                       test_set,
00482                       test_labels,
00483                       codmat,
00484                       decoding_strategy,
00485                       classifier_option,
00486                       verbose,
00487                       output,
00488                       tmp_time
00489                       );
00490 
00491         elapsed_time += tmp_time;
00492 
00493         break;
00494         }
00495 
00496       }
00497 
00498     target[i] /= double(test_set.n_rows);
00499 
00500     ClassData::globalIndex = 0;
00501     }
00502 
00503   free(fold_start);
00504   free(perm);
00505 
00506   double tempSum = 0.0;
00507   for(u32 i = 0; i < n_folds; i++)
00508     {
00509     tempSum += target[i];
00510     }
00511 
00512   tempSum /= double(n_folds);
00513 
00514   free(target);
00515 
00516   return tempSum;
00517   }
00518 
00519 
00520 
00541 double
00542 cvonesvsone
00543   (
00544   const mat& samples,
00545   const icolvec& labels,
00546   const u32 n_folds,
00547   const u32 n_classes,
00548   const u32 classifier_option,
00549   const int decoding_strategy,
00550   const bool verbose,
00551   double& elapsed_time,
00552   ofstream& output
00553   )
00554   {
00555   // dummies, overrides
00556   imat codmat;
00557   Threshold thres(100,100,100);
00558   double mr;
00559   double mc;
00560 
00561   return cross_validation
00562            (
00563            samples,
00564            labels,
00565            codmat,
00566            thres,
00567            n_folds,
00568            n_classes,
00569            0,
00570            classifier_option,
00571            ONE_VS_ONE,
00572            decoding_strategy,
00573            0,
00574            0,
00575            0,
00576            0,
00577            0,
00578            0,
00579            0,
00580            0,
00581            verbose,
00582            mr,
00583            mc,
00584            elapsed_time,
00585            output
00586            );
00587 
00588   }
00589 
00590 
00591 
00612 double
00613 cvonesvsall
00614   (
00615   const mat& samples,
00616   const icolvec& labels,
00617   const u32 n_folds,
00618   const u32 n_classes,
00619   const u32 classifier_option,
00620   const int decoding_strategy,
00621   const bool verbose,
00622   double& elapsed_time,
00623   ofstream& output
00624   )
00625   {
00626   // dummies, overrides
00627   imat codmat;
00628   Threshold thres(100,100,100);
00629   double mr;
00630   double mc;
00631 
00632   return cross_validation
00633            (
00634            samples,
00635            labels,
00636            codmat,
00637            thres,
00638            n_folds,
00639            n_classes,
00640            0,
00641            classifier_option,
00642            ONE_VS_ALL,
00643            decoding_strategy,
00644            0,
00645            0,
00646            0,
00647            0,
00648            0,
00649            0,
00650            0,
00651            0,
00652            verbose,
00653            mr,
00654            mc,
00655            elapsed_time,
00656            output
00657            );
00658 
00659   }
00660 
00661 
00662 
00684 double
00685 cvdecoc
00686   (
00687   const mat& samples,
00688   const icolvec& labels,
00689   const u32 n_folds,
00690   const u32 n_classes,
00691   const u32 classifier_option,
00692   const int decoding_strategy,
00693   const u32 criterion_option,
00694   const bool verbose,
00695   double& elapsed_time,
00696   ofstream& output
00697   )
00698   {
00699   // dummies, overrides
00700   imat codmat;
00701   Threshold thres(100,100,100);
00702   double mr;
00703   double mc;
00704 
00705   return cross_validation
00706            (
00707            samples,
00708            labels,
00709            codmat,
00710            thres,
00711            n_folds,
00712            n_classes,
00713            criterion_option,
00714            classifier_option,
00715            DECOC,
00716            decoding_strategy,
00717            0,
00718            0,
00719            0,
00720            0,
00721            0,
00722            0,
00723            0,
00724            0,
00725            verbose,
00726            mr,
00727            mc,
00728            elapsed_time,
00729            output
00730            );
00731 
00732   }
00733 
00734 
00735 
00760 double
00761 cvsubdecoc
00762   (
00763   const mat& samples,
00764   const icolvec& labels,
00765   const u32 n_folds,
00766   const u32 n_classes,
00767   const u32 classifier_option,
00768   const int decoding_strategy,
00769   const u32 criterion_option,
00770   const Threshold& thres,
00771   const bool verbose,
00772   double& mr,
00773   double& mc,
00774   double& elapsed_time,
00775   ofstream& output
00776   )
00777   {
00778   // dummies, overrides
00779   imat codmat;
00780 
00781   return cross_validation
00782            (
00783            samples,
00784            labels,
00785            codmat,
00786            thres,
00787            n_folds,
00788            n_classes,
00789            criterion_option,
00790            classifier_option,
00791            SUBDECOC,
00792            decoding_strategy,
00793            0,
00794            0,
00795            0,
00796            0,
00797            0,
00798            0,
00799            0,
00800            0,
00801            verbose,
00802            mr,
00803            mc,
00804            elapsed_time,
00805            output
00806            );
00807 
00808   }
00809 
00810 
00811 
00837 double
00838 cvsparserand
00839   (
00840   const mat& samples,
00841   const icolvec& labels,
00842   const u32 n_folds,
00843   const u32 n_classes,
00844   const u32 classifier_option,
00845   const int decoding_strategy,
00846   const u32 n_matrices,
00847   const u32 n_desired_classifers,
00848   const bool verbose,
00849   double& elapsed_time,
00850   ofstream& output
00851   )
00852   {
00853   // dummies, overrides
00854   imat codmat;
00855   Threshold thres(100,100,100);
00856   double mr;
00857   double mc;
00858 
00859   return cross_validation
00860            (
00861            samples,
00862            labels,
00863            codmat,
00864            thres,
00865            n_folds,
00866            n_classes,
00867            0,
00868            classifier_option,
00869            SPARSE_RANDOM,
00870            decoding_strategy,
00871            n_matrices,
00872            n_desired_classifers,
00873            0,
00874            0,
00875            0,
00876            0,
00877            0,
00878            0,
00879            verbose,
00880            mr,
00881            mc,
00882            elapsed_time,
00883            output
00884            );
00885 
00886   }
00887 
00888 
00889 
00915 double
00916 cvdenserand
00917   (
00918   const mat& samples,
00919   const icolvec& labels,
00920   const u32 n_folds,
00921   const u32 n_classes,
00922   const u32 classifier_option,
00923   const int decoding_strategy,
00924   const u32 n_matrices,
00925   const u32 n_desired_classifers,
00926   const bool verbose,
00927   double& elapsed_time,
00928   ofstream& output
00929   )
00930   {
00931   // dummies, overrides
00932   imat codmat;
00933   Threshold thres(100,100,100);
00934   double mr;
00935   double mc;
00936 
00937   return cross_validation
00938            (
00939            samples,
00940            labels,
00941            codmat,
00942            thres,
00943            n_folds,
00944            n_classes,
00945            0,
00946            classifier_option,
00947            DENSE_RANDOM,
00948            decoding_strategy,
00949            n_matrices,
00950            n_desired_classifers,
00951            0,
00952            0,
00953            0,
00954            0,
00955            0,
00956            0,
00957            verbose,
00958            mr,
00959            mc,
00960            elapsed_time,
00961            output
00962            );
00963 
00964   }
00965 
00966 
00967 
00995 double
00996 cvecocone
00997   (
00998   const mat& samples,
00999   const icolvec& labels,
01000   const u32 n_folds,
01001   const u32 n_classes,
01002   const u32 classifier_option,
01003   const int decoding_strategy,
01004   const double validation,
01005   const int init_coding_strategy,
01006   const int ecocone_mode,
01007   const u32 max_iter,
01008   const double epsilon,
01009   const double wvalidation,
01010   const bool verbose,
01011   double& elapsed_time,
01012   ofstream& output
01013   )
01014   {
01015   // dummies, overrides
01016   imat codmat;
01017   Threshold thres(100,100,100);
01018   double mr;
01019   double mc;
01020 
01021   return cross_validation
01022            (
01023            samples,
01024            labels,
01025            codmat,
01026            thres,
01027            n_folds,
01028            n_classes,
01029            0,
01030            classifier_option,
01031            ECOC_ONE,
01032            decoding_strategy,
01033            0,
01034            0,
01035            validation,
01036            init_coding_strategy,
01037            ecocone_mode,
01038            max_iter,
01039            epsilon,
01040            wvalidation,
01041            verbose,
01042            mr,
01043            mc,
01044            elapsed_time,
01045            output
01046            );
01047 
01048   }
01049 
01050 
01051 
01073 double
01074 cvforest
01075   (
01076   const mat& samples,
01077   const icolvec& labels,
01078   const u32 n_folds,
01079   const u32 n_classes,
01080   const u32 classifier_option,
01081   const int decoding_strategy,
01082   const u32 n_trees,
01083   const bool verbose,
01084   double& elapsed_time,
01085   ofstream& output
01086   )
01087   {
01088   // dummies, overrides
01089   imat codmat;
01090   Threshold thres(100,100,100);
01091   double mr;
01092   double mc;
01093 
01094   return cross_validation
01095            (
01096            samples,
01097            labels,
01098            codmat,
01099            thres,
01100            n_folds,
01101            n_classes,
01102            0,
01103            classifier_option,
01104            FOREST_ECOC,
01105            decoding_strategy,
01106            n_trees,
01107            0,
01108            0,
01109            0,
01110            0,
01111            0,
01112            0,
01113            0,
01114            verbose,
01115            mr,
01116            mc,
01117            elapsed_time,
01118            output
01119            );
01120 
01121   }
01122 
01123 
01124 
01146 double
01147 cvcustom
01148   (
01149   const mat& samples,
01150   const icolvec& labels,
01151   const u32 n_folds,
01152   const u32 n_classes,
01153   const u32 classifier_option,
01154   const int decoding_strategy,
01155   const imat& codmat,
01156   const bool verbose,
01157   double& elapsed_time,
01158   ofstream& output
01159   )
01160   {
01161   // dummies, overrides
01162   Threshold thres(100,100,100);
01163   double mr;
01164   double mc;
01165 
01166   return cross_validation
01167            (
01168            samples,
01169            labels,
01170            codmat,
01171            thres,
01172            n_folds,
01173            n_classes,
01174            0,
01175            classifier_option,
01176            CUSTOM_CODING,
01177            decoding_strategy,
01178            0,
01179            0,
01180            0,
01181            0,
01182            0,
01183            0,
01184            0,
01185            0,
01186            verbose,
01187            mr,
01188            mc,
01189            elapsed_time,
01190            output
01191            );
01192 
01193   }
01194 
01195 
01196 
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerator Defines