ECOCPAK: Classifier_adaBoost.hpp Source File

ECOCPAK v0.9
00001 // Copyright (C) 2011 the authors listed below
00002 // http://ecocpak.sourceforge.net
00003 //
00004 // Authors:
00005 // - Dimitrios Bouzas (bouzas at ieee dot org)
00006 // - Nikolaos Arvanitopoulos (niarvani at ieee dot org)
00007 // - Anastasios Tefas (tefas at aiia dot csd dot auth dot gr)
00008 //
00009 // This file is part of the ECOC PAK C++ library. It is
00010 // provided without any warranty of fitness for any purpose.
00011 //
00012 // You can redistribute this file and/or modify it under
00013 // the terms of the GNU Lesser General Public License (LGPL)
00014 // as published by the Free Software Foundation, either
00015 // version 3 of the License or (at your option) any later
00016 // version.
00017 // (see http://www.opensource.org/licenses for more info)
00018 
00019 
00022 
00023 
00024 #ifndef _CLASSIFIER_ADABOOST_H_
00025 #define _CLASSIFIER_ADABOOST_H_
00026 
00027 
00028 
00029 #include "Classifier.hpp"
00030 
00031 
00032 
00035 class Classifier_weak
00036   {
00037   public:
00038 
00039   // ---------------------------------------------------------------- //
00040   // ------------------------ Constructors -------------------------- //
00041   // ---------------------------------------------------------------- //
00042 
00043   // Copy ctor -- Overloaded
00044   Classifier_weak
00045     (
00046     const Classifier_weak& c
00047     );
00048 
00049   // User defined ctor for weak classifier -- Overloaded
00050   Classifier_weak
00051     (
00052     u32 fea,
00053     double theta,
00054     int p,
00055     double alpha
00056     );
00057 
00058   // ---------------------------------------------------------------- //
00059   // ---------------------- Member Functions ------------------------ //
00060   // ---------------------------------------------------------------- //
00061 
00062   // ---------------------------------------------------------------- //
00063   // --------------------- Overloaded Operators --------------------- //
00064   // ---------------------------------------------------------------- //
00065 
00066   // ---------------------------------------------------------------- //
00067   // -------------------------- Attributes -------------------------- //
00068   // ---------------------------------------------------------------- //
00069 
00070   // feature
00071   u32 fea;
00072 
00073   // theta threshold
00074   double theta;
00075 
00076   // label
00077   int p;
00078 
00079   // alpha
00080   double alpha;
00081   };
00082 
00083 
00091 Classifier_weak::Classifier_weak
00092   (
00093   const Classifier_weak& c
00094   )
00095   {
00096   // feature
00097   fea = c.fea;
00098 
00099   // theta threshold
00100   theta = c.theta;
00101 
00102   // label
00103   p = c.p;
00104 
00105   // alpha
00106   alpha = c.alpha;
00107   }
00108 
00109 
00110 
00120 Classifier_weak::Classifier_weak
00121   (
00122   u32 _fea,
00123   double _theta,
00124   int _p,
00125   double _alpha
00126   )
00127   {
00128   // feature's number
00129   fea   = _fea;
00130 
00131   // threshold
00132   theta = _theta;
00133 
00134   // label
00135   p     = _p;
00136 
00137   // alpha
00138   alpha = _alpha;
00139   }
00140 
00141 
00142 
00147 class Classifier_adaBoost : public Classifier
00148   {
00149   public:
00150 
00151   // ---------------------------------------------------------------- //
00152   // ------------------------ Constructors -------------------------- //
00153   // ---------------------------------------------------------------- //
00154 
00155   // Copy ctor -- Overloaded
00156   Classifier_adaBoost
00157     (
00158     const Classifier_adaBoost& c
00159     );
00160 
00161   // User defined ctor adaBoost -- Overloaded
00162   Classifier_adaBoost
00163     (
00164     const mat& A,
00165     const mat& B
00166     );
00167 
00168   // ---------------------------------------------------------------- //
00169   // ---------------------- Member Functions ------------------------ //
00170   // ---------------------------------------------------------------- //
00171 
00172   // return prediction value of classifier for input feature vector
00173   double predict(const rowvec& t) const;
00174 
00175   // ---------------------------------------------------------------- //
00176   // --------------------- Overloaded Operators --------------------- //
00177   // ---------------------------------------------------------------- //
00178 
00179   // ---------------------------------------------------------------- //
00180   // -------------------------- Attributes -------------------------- //
00181   // ---------------------------------------------------------------- //
00182 
00183   // vector with weak classifiers
00184   vector<Classifier_weak> L;
00185   };
00186 
00187 
00188 
00196 Classifier_adaBoost::Classifier_adaBoost
00197   (
00198   const Classifier_adaBoost& c
00199   )
00200   {
00201   // update the plus and minus classes
00202   pos = c.pos;
00203   neg = c.neg;
00204 
00205   // update number of possitive and negative samples
00206   n_pos = c.n_pos;
00207   n_neg = c.n_neg;
00208 
00209   // update training error
00210   training_error = c.training_error;
00211 
00212   // copy vector of weak classifiers
00213   for(u32 i = 0; i < c.L.size(); i++)
00214     {
00215     L.push_back(c.L[i]);
00216     }
00217 
00218   }
00219 
00220 
00221 
00231 Classifier_adaBoost::Classifier_adaBoost
00232   (
00233   const mat& A,
00234   const mat& B
00235   )
00236   {
00237   // number of positive class feature vectors
00238   n_pos = A.n_rows;
00239 
00240   // number of negative class feature vectors
00241   n_neg = B.n_rows;
00242 
00243   // number of features
00244   const u32 n_fea = A.n_cols;
00245 
00246   // number of training feature vectors -- training samples
00247   const u32 n_samples = n_pos + n_neg;
00248 
00249   // maximum number of iterations
00250   const u32 mxiter = param.degree;
00251 
00252   // initialize positive class weights
00253   rowvec w1 = ones<rowvec>(n_pos);
00254 
00255   // initialize negative class weights
00256   rowvec w2 = ones<rowvec>(n_neg);
00257 
00258   // initialize errors vector
00259   vec err = zeros<vec>(mxiter);
00260 
00261   // iterate through number of iterations
00262   for(u32 i = 0; i < mxiter; i++)
00263     {
00264     // normalization constant of weights
00265     double tw = accu(w1) + accu(w2);
00266 
00267     // classes are seperable break loop
00268     if(tw == 0)
00269       {
00270       break;
00271       }
00272 
00273     // normalize weights (convex sum)
00274     w1 /= tw;
00275     w2 /= tw;
00276 
00277     // --- train weak classifier --- //
00278     mat V = join_cols(A,B);
00279     vec dummy = ones<vec>(n_samples);
00280 
00281     mat W = trans(join_rows(w1, w2)) * ones<rowvec>(n_fea);
00282 
00283     rowvec valsumW = sum(W);
00284 
00285     mat Y = join_cols(ones<mat>(n_pos, n_fea), zeros<mat>(n_neg, n_fea));
00286 
00287     mat V_sort = sort(V);
00288     umat inds = zeros<umat>(n_samples, n_fea);
00289     mat W_sort = zeros<mat>(n_samples, n_fea);
00290     mat Y_sort = zeros<mat>(n_samples, n_fea);
00291     for(u32 j = 0; j < n_fea; j++)
00292       {
00293       inds.col(j) = sort_index(V.col(j));
00294 
00295       for(u32 l = 0; l < n_samples; l++)
00296         {
00297         W_sort(l,j) = W(inds(l,j), j);
00298         Y_sort(l,j) = Y(inds(l,j), j);
00299         }
00300 
00301       }
00302 
00303     mat P_cum = cumsum(Y_sort % W_sort);
00304 
00305     mat N_cum = cumsum((ones<mat>(n_samples, n_fea) - Y_sort) % W_sort);
00306 
00307     mat PN_cum1 = ((dummy * max(P_cum)) - P_cum) + N_cum;
00308     mat PN_cum2 = ((dummy * max(N_cum)) - N_cum) + P_cum;
00309 
00310     rowvec min1 = zeros<rowvec>(n_fea);
00311     urowvec thresh_ind1 = zeros<urowvec>(n_fea);
00312     rowvec min2 = zeros<rowvec>(n_fea);
00313     urowvec thresh_ind2 = zeros<urowvec>(n_fea);
00314     for(u32 j = 0; j < n_fea; j++)
00315       {
00316       vec tmpv = PN_cum1.col(j);
00317       min1[j] = tmpv.min(thresh_ind1[j]);
00318       tmpv = PN_cum2.col(j);
00319       min2[j] =  tmpv.min(thresh_ind2[j]);
00320       }
00321 
00322     uvec inds1 = find(min1 > min2);
00323     uvec inds2 = find(min1 <= min2);
00324 
00325     urowvec thresh_ind = zeros<urowvec>(n_fea);
00326     for(u32 j = 0; j < inds1.n_elem; j++)
00327       {
00328       thresh_ind[inds1[j]] = thresh_ind2[inds1[j]];
00329       }
00330 
00331     mat PN_cum = zeros<mat>(n_samples, n_fea);
00332     if(inds1.n_elem > 0)
00333       {
00334       for(u32 j = 0; j < inds1.n_elem; j++)
00335         {
00336         PN_cum.col(inds1[j]) = PN_cum2.col(inds1[j]);
00337         }
00338 
00339       }
00340 
00341     for(u32 j = 0; j < inds2.n_elem; j++)
00342       {
00343       thresh_ind[inds2[j]] = thresh_ind1[inds2[j]];
00344       }
00345 
00346     for(u32 j = 0; j < inds2.n_elem; j++)
00347       {
00348       PN_cum.col(inds2[j]) = PN_cum1.col(inds2[j]);
00349       }
00350 
00351     uvec inds3 = find(thresh_ind == n_samples);
00352     for(u32 j = 0; j < inds3.n_elem; j++)
00353       {
00354       thresh_ind[inds3[j]]--;
00355       }
00356 
00357     vec displacement = linspace(0, n_fea - 1, n_fea);
00358 
00359     urowvec thr_ind = conv_to<urowvec>::from(thresh_ind + trans(n_samples * displacement));
00360 
00361     rowvec thr = zeros<rowvec>(thr_ind.n_elem);
00362     for(u32 j = 0; j < thr_ind.n_elem; j++)
00363       {
00364       thr[j] = (V_sort[thr_ind[j]] + V_sort[thr_ind[j] + 1]) / 2.0;
00365       }
00366 
00367     rowvec p = zeros<rowvec>(thr_ind.n_elem);
00368     for(u32 j = 0; j < thr_ind.n_elem; j++)
00369       {
00370       p[j] = ((P_cum[thr_ind[j]] > N_cum[thr_ind[j]])? 1 : -1);
00371       }
00372 
00373     // ----------------------------- //
00374 
00375     mat classes = conv_to<mat>::from(((dummy * p) % join_cols(A,B)) < (dummy * (p % thr)));
00376 
00377     rowvec error = sum((trans(join_rows(w1, w2)) * ones<rowvec>(n_fea))
00378                     % (classes != join_cols(ones<mat>(n_pos, n_fea), zeros<mat>(n_neg, n_fea))));
00379 
00380     u32 feature0;
00381     err[i] = error.min(feature0);
00382 
00383     double beta0 = err[i] / (1.0 - err[i]);
00384 
00385     // create and save weak classifier
00386     L.push_back
00387       (
00388       Classifier_weak
00389         (
00390         feature0,
00391         thr[feature0],
00392         p[feature0],
00393         -log10(beta0 + math::eps())
00394         )
00395       );
00396 
00397 
00398     // update weights for positive class
00399     vec fe =
00400       conv_to<vec>::from((p[feature0] * A.col(feature0)) < (p[feature0] * thr[feature0]));
00401 
00402     for(u32 j = 0; j < fe.n_elem; j++)
00403       {
00404       fe[j] = pow(beta0, fe[j]);
00405       }
00406 
00407     w1 = w1 % trans(fe);
00408 
00409     // update weights for negative class
00410     fe = conv_to<vec>::from((p[feature0] * B.col(feature0)) >= (p[feature0] * thr[feature0]));
00411     for(u32 j = 0; j < fe.n_elem; j++)
00412       {
00413       fe[j] = pow(beta0, fe[j]);
00414       }
00415 
00416     w2 = w2 % trans(fe);
00417 
00418     if(err[i] >= 0.5)
00419       {
00420       break;
00421       }
00422 
00423     // compute training error
00424     // u32 missed = 0;
00425     // for(u32 j = 0; j < n_pos; j++)
00426     //   {
00427     //   if(predict(A.row(j)) != 1)
00428     //     {
00429     //     missed++;
00430     //     }
00431     //   }
00432 
00433     // for(u32 j = 0; j < n_pos; j++)
00434     //   {
00435     //   if(predict(B.row(j)) != -1)
00436     //     {
00437     //     missed++;
00438     //     }
00439     //   }
00440 
00441     // if(double(missed) / double(n_samples) < math::eps())
00442     //   {
00443     //   break;
00444     //   }
00445     }
00446 
00447   }
00448 
00449 
00450 
00463 inline
00464 double
00465 Classifier_adaBoost::predict(const rowvec& t) const
00466   {
00467   // cummulative sum
00468   double cm = 0.0;
00469 
00470   // cummulative threshold
00471   double thr = 0.0;
00472 
00473   // for each weak classifier
00474   for(u32 i = 0; i < L.size(); i++)
00475     {
00476     cm += (L[i].alpha * ((L[i].p * t[L[i].fea] < L[i].p * L[i].theta)? 1.0 : 0.0));
00477     thr += L[i].alpha;
00478     }
00479 
00480   return ((cm >= thr / 2.0)? 1.0 : -1.0);
00481   }
00482 
00483 
00484 
00485 #endif
00486 
00487 
00488