ECOCPAK v0.9
|
00001 // Copyright (C) 2011 the authors listed below 00002 // http://ecocpak.sourceforge.net 00003 // 00004 // Authors: 00005 // - Dimitrios Bouzas (bouzas at ieee dot org) 00006 // - Nikolaos Arvanitopoulos (niarvani at ieee dot org) 00007 // - Anastasios Tefas (tefas at aiia dot csd dot auth dot gr) 00008 // 00009 // This file is part of the ECOC PAK C++ library. It is 00010 // provided without any warranty of fitness for any purpose. 00011 // 00012 // You can redistribute this file and/or modify it under 00013 // the terms of the GNU Lesser General Public License (LGPL) 00014 // as published by the Free Software Foundation, either 00015 // version 3 of the License or (at your option) any later 00016 // version. 00017 // (see http://www.opensource.org/licenses for more info) 00018 00019 00022 00023 00024 00043 void 00044 sigmoid_training 00045 ( 00046 double& A, 00047 double& B, 00048 const vec f, 00049 const u32 n_pos, 00050 const u32 n_neg 00051 ) 00052 { 00053 // initialize sigmoid's parameters 00054 A = 0; 00055 B = log(double(n_neg + 1) / double(n_pos + 1)); 00056 00057 // initialize auxiliary parameters 00058 double hiTarget = double(n_pos + 1) / double(n_pos + 2); 00059 double loTarget = 1.0 / double(n_neg + 2); 00060 double lambda = 1e-3; 00061 double olderr = 1e300; 00062 00063 // temporary vector to store current estimate of probability of 00064 // examples 00065 vec pp(f.n_elem); 00066 pp.fill((double(n_pos + 1) / double(n_neg + n_pos + 2))); 00067 00068 00069 // initialize auxiliary counter 00070 int count = 0; 00071 00072 // iterate 100 times 00073 for(u32 iter = 0; iter < 100; iter++) 00074 { 00075 // initialize auxiliary loop variables 00076 double a = 0.0; 00077 double b = 0.0; 00078 double c = 0.0; 00079 double d = 0.0; 00080 double e = 0.0; 00081 double t = 0.0; 00082 00083 // First, compute Hessian and gradient of error function with 00084 // respect to A and B 00085 for(u32 i = 0; i < f.n_elem; i++) 00086 { 00087 00088 // if ith example is a positive example set t to hiTarget 00089 // otherwise set t to loTarget 00090 (i < n_pos)? t = hiTarget : t = loTarget; 00091 00092 double d1 = pp[i] - t; 00093 double d2 = pp[i] * (1 - pp[i]); 00094 00095 a = a + f[i] * f[i] * d2; 00096 b = b + d2; 00097 c = c + f[i] * d2; 00098 d = d + f[i] * d1; 00099 e = e + d1; 00100 } 00101 00102 // if gradient is really tiny, then stop 00103 if(abs(d) < 1e-9 && abs(e) < 1e-9) 00104 { 00105 break; 00106 } 00107 00108 // initialize 00109 double oldA = A; 00110 double oldB = B; 00111 double err = 0.0; 00112 00113 // loop until goodness of fit increases 00114 while(true) 00115 { 00116 // compute determinant 00117 double D = ((a + lambda) * (b + lambda)) - (c * c); 00118 00119 // if determinant of Hessian is equal to zero 00120 if(D == 0) 00121 { 00122 // increase stabilizer 00123 lambda *= 10.0; 00124 continue; 00125 } 00126 00127 // update A and B 00128 A = oldA + ((((b + lambda) * d) - (c * e)) / D); 00129 B = oldB + ((((a + lambda) * e) - (c * d)) / D); 00130 00131 // initialize error 00132 err = 0.0; 00133 00134 // loop to compute the goodness of fit 00135 for(u32 i = 0; i < f.n_elem; i++) 00136 { 00137 // compute sigmoid 00138 double p = 1.0 / (1.0 + exp((f[i] * A) + B)); 00139 00140 pp[i] = p; 00141 00142 // make sure log(0) returns -200 00143 err -= ((t * log(p)) + ((1 - t) * log(1 - p))); 00144 } 00145 00146 if(err < olderr * (1 + 1e-7)) 00147 { 00148 // decrease stabilizer 00149 lambda *= 0.1; 00150 break; 00151 } 00152 00153 // error did not decrease: increase stabilizer by factor of 10 and 00154 // try again 00155 lambda *= 10.0; 00156 00157 // something is broken, break loop 00158 if(lambda >= 1e6) 00159 { 00160 break; 00161 } 00162 00163 } 00164 00165 double diff = err - olderr; 00166 double scale = 0.5 * (err + olderr + 1.0); 00167 00168 if(diff > (-1e-3 * scale) && diff < (1e-7 * scale)) 00169 { 00170 count++; 00171 } 00172 else 00173 { 00174 count = 0; 00175 } 00176 00177 olderr = err; 00178 00179 if(count == 3) 00180 { 00181 break; 00182 } 00183 00184 } 00185 00186 } 00187 00188 00189 00205 u32 00206 probabilistic_decoding 00207 ( 00208 const vector<Classifier*>& classifiers_vector, 00209 const vector<ClassData>& classes_vector, 00210 const imat& coding_matrix, 00211 const mat& test_samples, 00212 const ucolvec& test_set_labels, 00213 uvec& predictions, 00214 umat& confussion 00215 ) 00216 { 00217 #ifdef NCURSES_OUTPUT 00218 // print status header 00219 mvaddstr(3, 0, "Computing Probabilistic Decoding:"); 00220 refresh(); 00221 #endif 00222 00223 // ================================================================== // 00224 // || DECLARATIONS - INITIALIZATION || // 00225 // ================================================================== // 00226 00227 // number of test set samples 00228 const u32 n_test_samples = test_samples.n_rows; 00229 00230 // number of coding matrix rows (i.e., number of classes) 00231 const u32 n_rows = coding_matrix.n_rows; 00232 00233 // number of coding matrix columns (i.e., number of classifiers) 00234 const u32 n_cols = coding_matrix.n_cols; 00235 00236 // number of missclassified test samples -- initialization 00237 u32 n_missed = 0; 00238 00239 // ================================================================== // 00240 // || BEGIN MAIN PART OF THE ALGORITHM || // 00241 // ================================================================== // 00242 00243 // ================================================================ // 00244 // || Step No.1: Compute \alpha. \alpha denotes the probability || // 00245 // || mass dispersed over invalid codes. || // 00246 // ================================================================ // 00247 00248 // number of all possible codewords 00249 double p = pow(2.0, double(n_cols)); 00250 00251 // number of valid codewords for current coding matrix 00252 double d = 0.0; 00253 00254 // iterate through rows of coding matrix to find the zero possitions 00255 // and compute the number of valid codewords 00256 for(u32 i = 0; i < n_rows; i++) 00257 { 00258 d += pow(2.0, double(accu(coding_matrix.row(i) == 0))); 00259 } 00260 00261 // compute alpha (i.e., probability of invalid codeword) 00262 double alpha = double(p - d) / double(p * n_rows); 00263 00264 // ================================================================ // 00265 // || Step No.2: Compute A's and B's of sigmoid for each || // 00266 // || binary classifier. || // 00267 // ================================================================ // 00268 00269 // allocate A's and B's matrix -- 1st column A's, 2nd column B's 00270 mat AB = zeros<mat>(n_cols, 2); 00271 00272 // iterate through the number of the binary classifiers 00273 for(u32 i = 0; i < n_cols; i++) 00274 { 00275 // number of classifier's training samples 00276 const u32 n_train = 00277 classifiers_vector[i]->n_pos + classifiers_vector[i]->n_neg; 00278 00279 // allocate vector of classifier's responses 00280 vec f = zeros<vec>(n_train); 00281 00282 // classifier's responses index 00283 u32 ind = 0; 00284 00285 // iterate through possitive classes of current classifier 00286 for(u32 j = 0; j < classifiers_vector[i]->pos.size(); j++) 00287 { 00288 00289 // pointer to current class 00290 ClassData* tmp = classifiers_vector[i]->pos[j]; 00291 00292 // iterate through current class samples 00293 for(u32 k = 0; k < tmp->Samples(); k++) 00294 { 00295 f[ind] = classifiers_vector[i]->predict((tmp->Data()).row(k)); 00296 ind++; 00297 } 00298 00299 } 00300 00301 // iterate through negative classes of current classifier 00302 for(u32 j = 0; j < classifiers_vector[i]->neg.size(); j++) 00303 { 00304 00305 // pointer to current class 00306 ClassData* tmp = classifiers_vector[i]->neg[j]; 00307 00308 // iterate through current class samples 00309 for(u32 k = 0; k < tmp->Samples(); k++) 00310 { 00311 f[ind] = classifiers_vector[i]->predict((tmp->Data()).row(k)); 00312 ind++; 00313 } 00314 00315 } 00316 00317 // compute A and B of current classifier 00318 sigmoid_training 00319 ( 00320 AB(i,0), 00321 AB(i,1), 00322 f, 00323 classifiers_vector[i]->n_pos, 00324 classifiers_vector[i]->n_neg 00325 ); 00326 00327 } 00328 00329 // ================================================================ // 00330 // || Step No.3: Compute conditional probabilities and assign || // 00331 // || test sample to winning class || // 00332 // ================================================================ // 00333 00334 // iterate through test samples 00335 for(u32 i = 0; i < n_test_samples; i++) 00336 { 00337 // winning coding matrix row -- initialize to first class 00338 u32 winner = 1; 00339 00340 // auxiliary threshold 00341 double t = numeric_limits<double>::max(); 00342 00343 // iterate through number of classes 00344 for(u32 j = 0; j < n_rows; j++) 00345 { 00346 00347 // initialize conditional probability 00348 double cp = 1.0; 00349 00350 // iterate through number of classifiers 00351 for(u32 k = 0; k < n_cols; k++) 00352 { 00353 if(coding_matrix(j,k) != 0) 00354 { 00355 cp *= (1.0 / (1.0 + exp(coding_matrix(j,k) * (AB(k,0) * classifiers_vector[k]->predict(test_samples.row(i)) + AB(k,1))))); 00356 } 00357 00358 } 00359 00360 // add alpha parameter 00361 cp += alpha; 00362 00363 // compute the log of the probability 00364 cp = -log(cp); 00365 00366 // update winning row 00367 if(cp < t) 00368 { 00369 t = cp; 00370 winner = j + 1; 00371 } 00372 else 00373 { 00374 // in ties random assignment for unbiased estimation 00375 if(cp == t) 00376 { 00377 if((std::rand() % 2) == 0) 00378 { 00379 t = cp; 00380 winner = j + 1; 00381 } 00382 00383 } 00384 00385 } 00386 00387 } 00388 00389 predictions[i] = winner; 00390 00391 // if current test sample missclassified increase missclassified 00392 // counter 00393 if(classes_vector[winner - 1].ClassLabel() != test_set_labels[i]) 00394 { 00395 n_missed++; 00396 } 00397 00398 confussion(classes_vector[winner - 1].ClassLabel() - 1, test_set_labels[i] - 1)++; 00399 } 00400 00401 // ================================================================== // 00402 // || END MAIN PART OF THE ALGORITHM || // 00403 // ================================================================== // 00404 00405 return n_missed; 00406 } 00407 00408 00409