ECOCPAK v0.9
|
00001 // Copyright (C) 2011 the authors listed below 00002 // http://ecocpak.sourceforge.net 00003 // 00004 // Authors: 00005 // - Dimitrios Bouzas (bouzas at ieee dot org) 00006 // - Nikolaos Arvanitopoulos (niarvani at ieee dot org) 00007 // - Anastasios Tefas (tefas at aiia dot csd dot auth dot gr) 00008 // 00009 // This file is part of the ECOC PAK C++ library. It is 00010 // provided without any warranty of fitness for any purpose. 00011 // 00012 // You can redistribute this file and/or modify it under 00013 // the terms of the GNU Lesser General Public License (LGPL) 00014 // as published by the Free Software Foundation, either 00015 // version 3 of the License or (at your option) any later 00016 // version. 00017 // (see http://www.opensource.org/licenses for more info) 00018 00019 00022 00023 00024 00043 u32 00044 linear_loss_weighted_decoding 00045 ( 00046 const vector<Classifier*>& classifiers_vector, 00047 const vector<ClassData>& classes_vector, 00048 const imat& ecoc_matrix, 00049 const mat& test_set_samples, 00050 const uvec& test_set_labels, 00051 uvec& results, 00052 umat& confussion 00053 ) 00054 { 00055 #ifdef NCURSES_OUTPUT 00056 // print status header 00057 mvaddstr(3, 0, "Computing decoding:"); 00058 refresh(); 00059 #endif 00060 00061 // number of test set samples 00062 const u32 test_set_size = test_set_samples.n_rows; 00063 00064 // number of misclasified test samples 00065 u32 error = 0; 00066 00067 // allocate memory for hypothesis matrix 00068 mat hypothesis_matrix = zeros<mat> 00069 ( 00070 ecoc_matrix.n_rows, 00071 ecoc_matrix.n_cols 00072 ); 00073 00074 // allocate memory for matrix that stores the results of the 00075 // classification procedure 00076 results = zeros<uvec>(test_set_size); 00077 00078 // for each classifier 00079 for(u32 j = 0; j < classifiers_vector.size(); j++) 00080 { 00081 // for each subclass 00082 for(u32 i = 0; i < classes_vector.size(); i++) 00083 { 00084 // temporary sum 00085 double tmp_sum = 0.0; 00086 00087 int sign; 00088 00089 // for each sample in subclass i evaluate the classifier j 00090 for(u32 k = 0; k < classes_vector[i].Samples(); k++) 00091 { 00092 ( 00093 classifiers_vector[j]->predict 00094 ( 00095 classes_vector[i].Data().row(k) 00096 ) < 0 00097 ) 00098 ? 00099 sign = -1 00100 : 00101 sign = +1; 00102 00103 if(sign == ecoc_matrix(i, j)) 00104 { 00105 tmp_sum += 1; 00106 } 00107 00108 } 00109 00110 // assign to H(i,j) the sum divided by the number of samples in 00111 // subclass i 00112 hypothesis_matrix(i, j) = 00113 tmp_sum / double(classes_vector[i].Samples()); 00114 } 00115 00116 } 00117 00118 // allocate memory for the weighted matrix 00119 // which is the normalization of the hpothesis matrix 00120 mat weighted_matrix = zeros<mat> 00121 ( 00122 hypothesis_matrix.n_rows, 00123 hypothesis_matrix.n_cols 00124 ); 00125 00126 // column vector that holds the sum of each row of the hypothesis 00127 // matrix 00128 colvec tmp_sum = sum(hypothesis_matrix, 1); 00129 00130 // normalize the rows of the hypothesis matrix and assign the results 00131 // in the corresponding index of the weighted matrix 00132 for(u32 i = 0; i < weighted_matrix.n_rows; i++) 00133 { 00134 00135 if(tmp_sum[i] != 0.0) 00136 { 00137 for(u32 j = 0; j < weighted_matrix.n_cols; j++) 00138 { 00139 // divide each item of the hypothesis matrix 00140 // by the sum of the corresponding row 00141 weighted_matrix(i, j) = hypothesis_matrix(i, j) / tmp_sum[i]; 00142 } 00143 00144 } 00145 else 00146 { 00147 weighted_matrix.row(i).fill(0.0); 00148 } 00149 00150 #ifdef NCURSES_OUTPUT 00151 // print spin chars 00152 mvaddch(3, 20, spin_chars[i & 3]); 00153 refresh(); 00154 #endif 00155 } 00156 00157 // allocate memory for the distances between the test_set samples and 00158 // each subclass 00159 mat distance = zeros<mat>(classes_vector.size(), test_set_size); 00160 00161 // calculate the distance of each test_set sample from each subclass 00162 for(u32 s = 0; s < test_set_size; s++) 00163 { 00164 // initialize to maximum 00165 double m = numeric_limits<double>::max(); 00166 00167 // initialize to zero 00168 u32 index = 0; 00169 00170 for(u32 i = 0; i < classes_vector.size(); i++) 00171 { 00172 double tmp_sum = 0.0; 00173 00174 for(u32 j = 0; j < classifiers_vector.size(); j++) 00175 { 00176 double sign = 0.0; 00177 00178 ( 00179 classifiers_vector[j]->predict 00180 ( 00181 test_set_samples.row(s) 00182 ) < 0 00183 ) 00184 ? 00185 sign = -1 00186 : 00187 sign = +1; 00188 00189 // loss weighted decoding assignment 00190 tmp_sum += 00191 (-1.0) * ecoc_matrix(i, j) * sign * weighted_matrix(i, j); 00192 } 00193 00194 // distance of test_set sample s from subclass i 00195 distance(i, s) = tmp_sum; 00196 00197 if(m > distance(i, s)) 00198 { 00199 m = distance(i, s); 00200 index = i; 00201 } 00202 00203 } 00204 00205 // assign to the test_set sample i the class that it belongs after 00206 // the end of the procedure 00207 results[s] = classes_vector[index].ClassLabel(); 00208 00209 if(results[s] != test_set_labels[s]) 00210 { 00211 error++; 00212 } 00213 00214 confussion(results[s] - 1, test_set_labels[s] - 1)++; 00215 00216 #ifdef NCURSES_OUTPUT 00217 // print spin chars 00218 mvaddch(3, 20, spin_chars[s & 3]); 00219 refresh(); 00220 #endif 00221 } 00222 00223 return error; 00224 } 00225 00226 00227 00228 00247 u32 00248 exponential_loss_weighted_decoding 00249 ( 00250 const vector<Classifier*>& classifiers_vector, 00251 const vector<ClassData>& classes_vector, 00252 const imat& ecoc_matrix, 00253 const mat& test_set_samples, 00254 const uvec& test_set_labels, 00255 uvec& results, 00256 umat& confussion 00257 ) 00258 { 00259 #ifdef NCURSES_OUTPUT 00260 // print status header 00261 mvaddstr(3, 0, "Computing decoding:"); 00262 refresh(); 00263 #endif 00264 00265 // number of test set samples 00266 const u32 test_set_size = test_set_samples.n_rows; 00267 00268 // number of misclasified test samples 00269 u32 error = 0; 00270 00271 // allocate memory for hypothesis matrix 00272 mat hypothesis_matrix = zeros<mat> 00273 ( 00274 ecoc_matrix.n_rows, 00275 ecoc_matrix.n_cols 00276 ); 00277 00278 // allocate memory for matrix that stores the results of the 00279 // classification procedure 00280 results = zeros<uvec>(test_set_size); 00281 00282 // for each classifier 00283 for(u32 j = 0; j < classifiers_vector.size(); j++) 00284 { 00285 // for each subclass 00286 for(u32 i = 0; i < classes_vector.size(); i++) 00287 { 00288 // temporary sum 00289 double tmp_sum = 0.0; 00290 00291 int sign; 00292 00293 // for each sample in subclass i evaluate the classifier j 00294 for(u32 k = 0; k < classes_vector[i].Samples(); k++) 00295 { 00296 ( 00297 classifiers_vector[j]->predict 00298 ( 00299 classes_vector[i].Data().row(k) 00300 ) < 0 00301 ) 00302 ? 00303 sign = -1 00304 : 00305 sign = +1; 00306 00307 if(sign == ecoc_matrix(i, j)) 00308 { 00309 tmp_sum += 1; 00310 } 00311 00312 } 00313 00314 // assign to H(i,j) the sum divided by the number of samples in 00315 // subclass i 00316 hypothesis_matrix(i, j) = 00317 tmp_sum / double(classes_vector[i].Samples()); 00318 } 00319 00320 } 00321 00322 // allocate memory for the weighted matrix 00323 // which is the normalization of the hpothesis matrix 00324 mat weighted_matrix = zeros<mat> 00325 ( 00326 hypothesis_matrix.n_rows, 00327 hypothesis_matrix.n_cols 00328 ); 00329 00330 // column vector that holds the sum of each row of the hypothesis 00331 // matrix 00332 colvec tmp_sum = sum(hypothesis_matrix, 1); 00333 00334 // normalize the rows of the hypothesis matrix and assign the results 00335 // in the corresponding index of the weighted matrix 00336 for(u32 i = 0; i < weighted_matrix.n_rows; i++) 00337 { 00338 00339 if(tmp_sum[i] != 0.0) 00340 { 00341 for(u32 j = 0; j < weighted_matrix.n_cols; j++) 00342 { 00343 // divide each item of the hypothesis matrix 00344 // by the sum of the corresponding row 00345 weighted_matrix(i, j) = hypothesis_matrix(i, j) / tmp_sum[i]; 00346 } 00347 00348 } 00349 else 00350 { 00351 weighted_matrix.row(i).fill(0.0); 00352 } 00353 00354 #ifdef NCURSES_OUTPUT 00355 // print spin chars 00356 mvaddch(3, 20, spin_chars[i & 3]); 00357 refresh(); 00358 #endif 00359 } 00360 00361 // allocate memory for the distances between the test_set samples and 00362 // each subclass 00363 mat distance = zeros<mat>(classes_vector.size(), test_set_size); 00364 00365 // calculate the distance of each test_set sample from each subclass 00366 for(u32 s = 0; s < test_set_size; s++) 00367 { 00368 // initialize to maximum 00369 double m = numeric_limits<double>::max(); 00370 00371 // initialize to zero 00372 u32 index = 0; 00373 00374 for(u32 i = 0; i < classes_vector.size(); i++) 00375 { 00376 double tmp_sum = 0.0; 00377 00378 for(u32 j = 0; j < classifiers_vector.size(); j++) 00379 { 00380 double sign = 0.0; 00381 00382 ( 00383 classifiers_vector[j]->predict 00384 ( 00385 test_set_samples.row(s) 00386 ) < 0 00387 ) 00388 ? 00389 sign = -1 00390 : 00391 sign = +1; 00392 00393 // loss weighted decoding assignment 00394 tmp_sum += 00395 exp((-1.0) * ecoc_matrix(i, j) * sign * weighted_matrix(i, j)); 00396 } 00397 00398 // distance of test_set sample s from subclass i 00399 distance(i, s) = tmp_sum; 00400 00401 if(m > distance(i, s)) 00402 { 00403 m = distance(i, s); 00404 index = i; 00405 } 00406 00407 } 00408 00409 // assign to the test_set sample i the class that it belongs after 00410 // the end of the procedure 00411 results[s] = classes_vector[index].ClassLabel(); 00412 00413 if(results[s] != test_set_labels[s]) 00414 { 00415 error++; 00416 } 00417 00418 confussion(results[s] - 1, test_set_labels[s] - 1)++; 00419 00420 #ifdef NCURSES_OUTPUT 00421 // print spin chars 00422 mvaddch(3, 20, spin_chars[s & 3]); 00423 refresh(); 00424 #endif 00425 } 00426 00427 return error; 00428 } 00429 00430 00431