ECOCPAK: fn_loss_weighted_decoding.hpp Source File

ECOCPAK v0.9
00001 // Copyright (C) 2011 the authors listed below
00002 // http://ecocpak.sourceforge.net
00003 //
00004 // Authors:
00005 // - Dimitrios Bouzas (bouzas at ieee dot org)
00006 // - Nikolaos Arvanitopoulos (niarvani at ieee dot org)
00007 // - Anastasios Tefas (tefas at aiia dot csd dot auth dot gr)
00008 //
00009 // This file is part of the ECOC PAK C++ library. It is
00010 // provided without any warranty of fitness for any purpose.
00011 //
00012 // You can redistribute this file and/or modify it under
00013 // the terms of the GNU Lesser General Public License (LGPL)
00014 // as published by the Free Software Foundation, either
00015 // version 3 of the License or (at your option) any later
00016 // version.
00017 // (see http://www.opensource.org/licenses for more info)
00018 
00019 
00022 
00023 
00024 
00043 u32
00044 linear_loss_weighted_decoding
00045   (
00046   const vector<Classifier*>& classifiers_vector,
00047   const vector<ClassData>& classes_vector,
00048   const imat& ecoc_matrix,
00049   const mat& test_set_samples,
00050   const uvec& test_set_labels,
00051   uvec& results,
00052   umat& confussion
00053   )
00054   {
00055   #ifdef NCURSES_OUTPUT
00056   // print status header
00057   mvaddstr(3, 0, "Computing decoding:");
00058   refresh();
00059   #endif
00060 
00061   // number of test set samples
00062   const u32 test_set_size = test_set_samples.n_rows;
00063 
00064   // number of misclasified test samples
00065   u32 error = 0;
00066 
00067   // allocate memory for hypothesis matrix
00068   mat hypothesis_matrix = zeros<mat>
00069                             (
00070                             ecoc_matrix.n_rows,
00071                             ecoc_matrix.n_cols
00072                             );
00073 
00074   // allocate memory for matrix that stores the results of the
00075   // classification procedure
00076   results = zeros<uvec>(test_set_size);
00077 
00078   // for each classifier
00079   for(u32 j = 0; j < classifiers_vector.size(); j++)
00080     {
00081     // for each subclass
00082     for(u32 i = 0; i < classes_vector.size(); i++)
00083       {
00084       // temporary sum
00085       double tmp_sum = 0.0;
00086 
00087       int sign;
00088 
00089       // for each sample in subclass i evaluate the classifier j
00090       for(u32 k = 0; k < classes_vector[i].Samples(); k++)
00091         {
00092         (
00093         classifiers_vector[j]->predict
00094                                 (
00095                                 classes_vector[i].Data().row(k)
00096                                 ) < 0
00097         )
00098         ?
00099         sign = -1
00100         :
00101         sign = +1;
00102 
00103         if(sign == ecoc_matrix(i, j))
00104           {
00105           tmp_sum += 1;
00106           }
00107 
00108         }
00109 
00110       // assign to H(i,j) the sum divided by the number of samples in
00111       // subclass i
00112       hypothesis_matrix(i, j) =
00113         tmp_sum / double(classes_vector[i].Samples());
00114       }
00115 
00116     }
00117 
00118   // allocate memory for the weighted matrix
00119   // which is the normalization of the hpothesis matrix
00120   mat weighted_matrix = zeros<mat>
00121                           (
00122                           hypothesis_matrix.n_rows,
00123                           hypothesis_matrix.n_cols
00124                           );
00125 
00126   // column vector that holds the sum of each row of the hypothesis
00127   // matrix
00128   colvec tmp_sum = sum(hypothesis_matrix, 1);
00129 
00130   // normalize the rows of the hypothesis matrix and assign the results
00131   // in the corresponding index of the weighted matrix
00132   for(u32 i = 0; i < weighted_matrix.n_rows; i++)
00133     {
00134 
00135     if(tmp_sum[i] != 0.0)
00136       {
00137       for(u32 j = 0; j < weighted_matrix.n_cols; j++)
00138         {
00139         // divide each item of the hypothesis matrix
00140         // by the sum of the corresponding row
00141         weighted_matrix(i, j) = hypothesis_matrix(i, j) / tmp_sum[i];
00142         }
00143 
00144       }
00145     else
00146       {
00147       weighted_matrix.row(i).fill(0.0);
00148       }
00149 
00150     #ifdef NCURSES_OUTPUT
00151     // print spin chars
00152     mvaddch(3, 20, spin_chars[i & 3]);
00153     refresh();
00154     #endif
00155     }
00156 
00157   // allocate memory for the distances between the test_set samples and
00158   // each subclass
00159   mat distance = zeros<mat>(classes_vector.size(), test_set_size);
00160 
00161   // calculate the distance of each test_set sample from each subclass
00162   for(u32 s = 0; s < test_set_size; s++)
00163     {
00164     // initialize to maximum
00165     double m = numeric_limits<double>::max();
00166 
00167     // initialize to zero
00168     u32 index = 0;
00169 
00170     for(u32 i = 0; i < classes_vector.size(); i++)
00171       {
00172       double tmp_sum = 0.0;
00173 
00174       for(u32 j = 0; j < classifiers_vector.size(); j++)
00175         {
00176         double sign = 0.0;
00177 
00178         (
00179         classifiers_vector[j]->predict
00180                                 (
00181                                 test_set_samples.row(s)
00182                                 ) < 0
00183         )
00184         ?
00185         sign = -1
00186         :
00187         sign = +1;
00188 
00189         // loss weighted decoding assignment
00190         tmp_sum +=
00191           (-1.0) * ecoc_matrix(i, j) * sign * weighted_matrix(i, j);
00192         }
00193 
00194       // distance of test_set sample s from subclass i
00195       distance(i, s) = tmp_sum;
00196 
00197       if(m > distance(i, s))
00198         {
00199         m = distance(i, s);
00200         index = i;
00201         }
00202 
00203       }
00204 
00205     // assign to the test_set sample i the class that it belongs after
00206     // the end of the procedure
00207     results[s] = classes_vector[index].ClassLabel();
00208 
00209     if(results[s] != test_set_labels[s])
00210       {
00211       error++;
00212       }
00213 
00214     confussion(results[s] - 1, test_set_labels[s] - 1)++;
00215 
00216     #ifdef NCURSES_OUTPUT
00217     // print spin chars
00218     mvaddch(3, 20, spin_chars[s & 3]);
00219     refresh();
00220     #endif
00221     }
00222 
00223   return error;
00224   }
00225 
00226 
00227 
00228 
00247 u32
00248 exponential_loss_weighted_decoding
00249   (
00250   const vector<Classifier*>& classifiers_vector,
00251   const vector<ClassData>& classes_vector,
00252   const imat& ecoc_matrix,
00253   const mat& test_set_samples,
00254   const uvec& test_set_labels,
00255   uvec& results,
00256   umat& confussion
00257   )
00258   {
00259   #ifdef NCURSES_OUTPUT
00260   // print status header
00261   mvaddstr(3, 0, "Computing decoding:");
00262   refresh();
00263   #endif
00264 
00265   // number of test set samples
00266   const u32 test_set_size = test_set_samples.n_rows;
00267 
00268   // number of misclasified test samples
00269   u32 error = 0;
00270 
00271   // allocate memory for hypothesis matrix
00272   mat hypothesis_matrix = zeros<mat>
00273                             (
00274                             ecoc_matrix.n_rows,
00275                             ecoc_matrix.n_cols
00276                             );
00277 
00278   // allocate memory for matrix that stores the results of the
00279   // classification procedure
00280   results = zeros<uvec>(test_set_size);
00281 
00282   // for each classifier
00283   for(u32 j = 0; j < classifiers_vector.size(); j++)
00284     {
00285     // for each subclass
00286     for(u32 i = 0; i < classes_vector.size(); i++)
00287       {
00288       // temporary sum
00289       double tmp_sum = 0.0;
00290 
00291       int sign;
00292 
00293       // for each sample in subclass i evaluate the classifier j
00294       for(u32 k = 0; k < classes_vector[i].Samples(); k++)
00295         {
00296         (
00297         classifiers_vector[j]->predict
00298                                 (
00299                                 classes_vector[i].Data().row(k)
00300                                 ) < 0
00301         )
00302         ?
00303         sign = -1
00304         :
00305         sign = +1;
00306 
00307         if(sign == ecoc_matrix(i, j))
00308           {
00309           tmp_sum += 1;
00310           }
00311 
00312         }
00313 
00314       // assign to H(i,j) the sum divided by the number of samples in
00315       // subclass i
00316       hypothesis_matrix(i, j) =
00317         tmp_sum / double(classes_vector[i].Samples());
00318       }
00319 
00320     }
00321 
00322   // allocate memory for the weighted matrix
00323   // which is the normalization of the hpothesis matrix
00324   mat weighted_matrix = zeros<mat>
00325                           (
00326                           hypothesis_matrix.n_rows,
00327                           hypothesis_matrix.n_cols
00328                           );
00329 
00330   // column vector that holds the sum of each row of the hypothesis
00331   // matrix
00332   colvec tmp_sum = sum(hypothesis_matrix, 1);
00333 
00334   // normalize the rows of the hypothesis matrix and assign the results
00335   // in the corresponding index of the weighted matrix
00336   for(u32 i = 0; i < weighted_matrix.n_rows; i++)
00337     {
00338 
00339     if(tmp_sum[i] != 0.0)
00340       {
00341       for(u32 j = 0; j < weighted_matrix.n_cols; j++)
00342         {
00343         // divide each item of the hypothesis matrix
00344         // by the sum of the corresponding row
00345         weighted_matrix(i, j) = hypothesis_matrix(i, j) / tmp_sum[i];
00346         }
00347 
00348       }
00349     else
00350       {
00351       weighted_matrix.row(i).fill(0.0);
00352       }
00353 
00354     #ifdef NCURSES_OUTPUT
00355     // print spin chars
00356     mvaddch(3, 20, spin_chars[i & 3]);
00357     refresh();
00358     #endif
00359     }
00360 
00361   // allocate memory for the distances between the test_set samples and
00362   // each subclass
00363   mat distance = zeros<mat>(classes_vector.size(), test_set_size);
00364 
00365   // calculate the distance of each test_set sample from each subclass
00366   for(u32 s = 0; s < test_set_size; s++)
00367     {
00368     // initialize to maximum
00369     double m = numeric_limits<double>::max();
00370 
00371     // initialize to zero
00372     u32 index = 0;
00373 
00374     for(u32 i = 0; i < classes_vector.size(); i++)
00375       {
00376       double tmp_sum = 0.0;
00377 
00378       for(u32 j = 0; j < classifiers_vector.size(); j++)
00379         {
00380         double sign = 0.0;
00381 
00382         (
00383         classifiers_vector[j]->predict
00384                                 (
00385                                 test_set_samples.row(s)
00386                                 ) < 0
00387         )
00388         ?
00389         sign = -1
00390         :
00391         sign = +1;
00392 
00393         // loss weighted decoding assignment
00394         tmp_sum +=
00395           exp((-1.0) * ecoc_matrix(i, j) * sign * weighted_matrix(i, j));
00396         }
00397 
00398       // distance of test_set sample s from subclass i
00399       distance(i, s) = tmp_sum;
00400 
00401       if(m > distance(i, s))
00402         {
00403         m = distance(i, s);
00404         index = i;
00405         }
00406 
00407       }
00408 
00409     // assign to the test_set sample i the class that it belongs after
00410     // the end of the procedure
00411     results[s] = classes_vector[index].ClassLabel();
00412 
00413     if(results[s] != test_set_labels[s])
00414       {
00415       error++;
00416       }
00417 
00418     confussion(results[s] - 1, test_set_labels[s] - 1)++;
00419 
00420     #ifdef NCURSES_OUTPUT
00421     // print spin chars
00422     mvaddch(3, 20, spin_chars[s & 3]);
00423     refresh();
00424     #endif
00425     }
00426 
00427   return error;
00428   }
00429 
00430 
00431