ECOCPAK v0.9
fn_sffs.hpp
Go to the documentation of this file.
00001 // Copyright (C) 2011 the authors listed below
00002 // http://ecocpak.sourceforge.net
00003 //
00004 // Authors:
00005 // - Dimitrios Bouzas (bouzas at ieee dot org)
00006 // - Nikolaos Arvanitopoulos (niarvani at ieee dot org)
00007 // - Anastasios Tefas (tefas at aiia dot csd dot auth dot gr)
00008 //
00009 // This file is part of the ECOC PAK C++ library. It is
00010 // provided without any warranty of fitness for any purpose.
00011 //
00012 // You can redistribute this file and/or modify it under
00013 // the terms of the GNU Lesser General Public License (LGPL)
00014 // as published by the Free Software Foundation, either
00015 // version 3 of the License or (at your option) any later
00016 // version.
00017 // (see http://www.opensource.org/licenses for more info)
00018 
00019 
00022 
00023 
00044 double
00045 criterion_fqmi
00046   (
00047   const vector<ClassData>& classes_vector,
00048   const ucolvec& indexPtr,
00049   const u32 curSubsetSize,
00050   const double sigma
00051   )
00052   {
00053   // print header
00054   #ifdef NCURSES_OUTPUT
00055   mvaddstr(3, 0, "Computing FQMI:      ");
00056   refresh();
00057   #endif
00058 
00059   if((classes_vector.size() - curSubsetSize) == 0)
00060     {
00061     return 0.0;
00062     }
00063 
00064   // data samples
00065   mat samples;
00066 
00067   // sample labels
00068   ucolvec labels;
00069 
00070   // compute complement subset of current best subset
00071   ucolvec cSet = complement_sffs(indexPtr, curSubsetSize);
00072 
00073   // create first bipartition
00074   for(u32 i = 0; i < curSubsetSize; i++)
00075     {
00076     samples = join_cols(samples, classes_vector[indexPtr[i]].Data());
00077     labels =
00078       join_cols
00079         (
00080         labels,
00081         ones<ucolvec>(classes_vector[indexPtr[i]].Data().n_rows)
00082         );
00083     }
00084 
00085   // create second bipartition
00086   for(u32 i = 0; i < cSet.n_rows; i++)
00087     {
00088     samples = join_cols(samples, classes_vector[cSet[i]].Data());
00089     labels =
00090       join_cols
00091         (
00092         labels,
00093         2 * ones<ucolvec>(classes_vector[cSet[i]].Data().n_rows)
00094         );
00095     }
00096 
00097   return fqmi(samples, labels, sigma);
00098   }
00099 
00100 
00101 
00116 double
00117 criterion_fldr
00118   (
00119   const vector<ClassData>& classes_vector,
00120   const ucolvec& indexPtr,
00121   const u32 curSubsetSize
00122   )
00123   {
00124   #ifdef NCURSES_OUTPUT
00125   mvaddstr(3, 0, "Computing FLDR:     ");
00126   refresh();
00127   #endif
00128 
00129   if((classes_vector.size() - curSubsetSize) == 0)
00130     {
00131     return 0.0;
00132     }
00133 
00134   // size of current subset is 1
00135   if(curSubsetSize == 1)
00136     {
00137     // if the number of samples of the current subset is less than 2
00138     if(classes_vector[indexPtr[0]].Samples() < 2)
00139       {
00140       return 0.0;
00141       }
00142     }
00143 
00144   // find the complement set of indexPtr
00145   ucolvec cSet = complement_sffs(indexPtr, curSubsetSize);
00146 
00147   // number of classes and current subset size differ only by 1
00148   if((classes_vector.size() - curSubsetSize ) == 1)
00149     {
00150     // if the number of samples of the complement subset is less than 2
00151     if(classes_vector[cSet[0]].Samples() < 2)
00152       {
00153       return 0.0;
00154       }
00155     }
00156 
00157   // ClassData vector for partition 1
00158   vector<ClassData> newClassVec1;
00159 
00160   // push back the classes that belong to one set
00161   for(u32 i = 0; i < curSubsetSize; i++)
00162     {
00163     newClassVec1.push_back(classes_vector[indexPtr[i]]);
00164     }
00165 
00166   // ClassData vector for partition 2
00167   vector<ClassData> newClassVec2;
00168 
00169   // push back classes that belong to the complement set
00170   for(u32 i = 0; i < classes_vector.size() - curSubsetSize; i++)
00171     {
00172     newClassVec2.push_back(classes_vector[cSet[i]]);
00173     }
00174 
00175   // create data matrices that correspond to the two bi-partitions
00176   mat first_bipartition = create_samples_matrix(newClassVec1);
00177   mat second_bipartition = create_samples_matrix(newClassVec2);
00178 
00179   return fldr(first_bipartition, second_bipartition);
00180   }
00181 
00182 
00183 
00199 double
00200 criterion
00201   (
00202   const vector<ClassData>& class_vector,
00203   const ucolvec& indexPtr,
00204   const u32 curSubSetSize,
00205   const u32 criterion_option,
00206   const double sigma
00207   )
00208   {
00209   // engage according to user enter criterion option
00210   switch(criterion_option)
00211     {
00212     case FQMI:
00213       {
00214       // call to fqmi procedure
00215       return criterion_fqmi
00216                (
00217                class_vector,
00218                indexPtr,
00219                curSubSetSize,
00220                sigma
00221                );
00222 
00223       }
00224 
00225     case FLDR:
00226       {
00227       // call to fldr procedure
00228       return criterion_fldr(class_vector, indexPtr, curSubSetSize);
00229       }
00230 
00231     case CUSTOM_CRITERION:
00232       {
00233       // call to custom criterion procedure
00234       return criterion_custom(class_vector, indexPtr, curSubSetSize);
00235       }
00236 
00237     default:
00238       {
00239       #ifdef NCURSES_OUTPUT
00240       endwin();
00241       #endif
00242       cerr << "Unknown criterion option" << endl;
00243       exit(1);
00244       }
00245 
00246     }
00247 
00248   }
00249 
00250 
00251 
00321 ucolvec
00322 sffs
00323   (
00324   const vector<ClassData>& classes_vector,
00325   const u32 crit
00326   )
00327   {
00328   // sigma for fqmi()
00329   double sigma = 1.0;
00330 
00331   // if criterion is FQMI compute sigma
00332   if(crit == 1)
00333     {
00334     // compute sigma
00335     sigma = compute_sigma(classes_vector);
00336     }
00337 
00338   // --- declarations of constants --- //
00339 
00340   // full feature set size
00341   const u32 n = classes_vector.size();
00342 
00343   // desired feature subset size
00344   const u32 d = n - 1;
00345 
00346   // set floating search stopping constraint
00347   int delta = 1;
00348 
00349   // set generalization level
00350   int genLevel = 1;
00351 
00352   // a multiplier of estimated delta
00353   const double DELTAMUL = 1.0;
00354 
00355   // a constant to be added to estimated delta
00356   const int DELTAADD = 1;
00357 
00358   // double overflow limit
00359   const double SAFEUP = 5e300;
00360 
00361   // ---declarations of variables --- //
00362 
00363   // of size n, stores 0/1 information on currently selected features
00364   ucolvec binPtr;
00365 
00366   // of size d, it is computed from bin, stores indexPtrs of selected
00367   // features
00368   ucolvec indexPtr;
00369 
00370   // temporary best subset array
00371   ucolvec tempBestSetPtr;
00372 
00373   // best value
00374   double tempBestValue;
00375 
00376   // so far best subsets
00377   ucolvec globalBestSetPtr;
00378 
00379   // so far best values
00380   colvec globalBestValuePtr;
00381 
00382   // indicates, if a better subset has been found during last step
00383   bool betterFound;
00384 
00385   // criterion function return value
00386   double value;
00387 
00388   // current subset size
00389   int curSubSetSize;
00390 
00391   // current subset size in integers
00392   int curSubSetSizeInt;
00393 
00394   // how to change curSubSetSize (when changing direction of search):
00395   // -2,-1,+1,+2 steps
00396   int curSubsetChange;
00397 
00398   // initial generalization level
00399   int initGenLevel;
00400 
00401   // auxiliary variable
00402   int aux;
00403 
00404   // beginning of block of identifiers
00405   int begin;
00406 
00407   // "pivot" - last identifier in a block
00408   int pivot;
00409 
00410   // identifies end of internal exhaustive search
00411   bool stopExhaustive;
00412 
00413   // identifies end of it all
00414   bool stopAll;
00415 
00416   // identifies the dimension, for which the algorithm should end
00417   int stopDimension = 0;
00418 
00419   // identifier 0 exhanged in case of backward search
00420   int id0 = 0;
00421 
00422   // identifier 2 exhanged in case of backward search
00423   int id2 = 2;
00424 
00425   // 0=sfs, 1=sbs current search direction
00426   bool sbs = 0;
00427 
00428   // stores current backtracking depth (+forward,-backward)
00429   int backtrackDepth = 0;
00430 
00431   // predicted delta estimate
00432   float predDeltaEst = 0.0;
00433 
00434   // number of direction changes (needed for delta averaging)
00435   int dirCount = 0;
00436 
00437   // number of steps
00438   long int stepsNum;
00439 
00440   // set stopDimension according to delta value
00441   if(delta == 0)
00442     {
00443     stopDimension = n;
00444     }
00445   else
00446     {
00447     if(delta > 0)
00448       {
00449       stopDimension = d + delta;
00450 
00451       if(stopDimension > n)
00452         {
00453         stopDimension = n;
00454         }
00455 
00456       }
00457 
00458     }
00459 
00460   // initialize back tracking depth
00461   backtrackDepth = 0;
00462 
00463   // initialize predicted delta estimate
00464   predDeltaEst = 0.0;
00465 
00466   // initialize direction counter
00467   dirCount = 0;
00468 
00469   // allocate memory for feature code word
00470   binPtr = zeros<ucolvec>(n + 1);
00471 
00472   // allocate memory for indices of selected features
00473   indexPtr = zeros<ucolvec>(n);
00474 
00475   // allocate memory for current best set
00476   tempBestSetPtr = zeros<ucolvec>(n);
00477 
00478   // allocate memory for so far best subsets
00479   globalBestSetPtr = zeros<ucolvec>((n * ( n + 1 ) ) / 2);
00480 
00481   // allocate memory for so far best subset values
00482   globalBestValuePtr.set_size(n);
00483   globalBestValuePtr.fill(-SAFEUP);
00484 
00485   // set initial generalization level to current generalization level
00486   initGenLevel = genLevel;
00487 
00488   // --- Beginning of printing info block according to type of    --- //
00489   // --- floating search                                          --- //
00490 
00491   // set current subset size equal to initial generalization level
00492   curSubSetSize = initGenLevel;
00493 
00494   // initialize stop centinel
00495   stopAll = false;
00496 
00497   // do while floating search ends
00498   do
00499     {
00500     // initialize temporary best set
00501     tempBestSetPtr.zeros();
00502 
00503     // initialize best value with underflow limit
00504     tempBestValue = -SAFEUP;
00505 
00506     // initialize binPtr for exhaustive step
00507     aux = initGenLevel;
00508     for(u32 i = 0; ((aux > 0) && ( i < n )); i++)
00509       {
00510       if(binPtr[i] == id0)
00511         {
00512         binPtr[i] = id2;
00513         aux--;
00514         }
00515 
00516       }
00517 
00518     // size of current subset size in number integer (usually 4 byte per
00519     // int)
00520     curSubSetSizeInt = curSubSetSize * sizeof( int );
00521 
00522     // --- estimate the number of steps --- //
00523     stepsNum = 1;
00524 
00525     // backward step sbs = 1
00526     if(sbs)
00527       {
00528       aux = curSubSetSize + initGenLevel;
00529       }
00530      else // else forward step
00531       {
00532       aux = n - ( curSubSetSize - initGenLevel );
00533       }
00534 
00535     for(u32 i = 0; i < initGenLevel; i++)
00536       {
00537       stepsNum *= aux - i;
00538       }
00539 
00540     for(u32 i = 2; i <= initGenLevel; i++)
00541       {
00542       stepsNum /= i;
00543       }
00544     // --- end of steps estimation --- //
00545 
00546     stopExhaustive = false;
00547 
00548     // do while internal exhaustive search ends
00549     do
00550       {
00551       // --- convert "binPtr" to "indexPtr" --- //
00552       aux = 0;
00553       for(u32 i = 0; i < curSubSetSize; i++)
00554         {
00555         while(binPtr[aux] <= 0)
00556           {
00557           aux++;
00558           }
00559 
00560         indexPtr[i] = aux;
00561         aux++;
00562         }
00563       // --- end of conversion --- //
00564 
00565       // evaluate criterion function
00566       value = criterion
00567                 (
00568                 classes_vector,
00569                 indexPtr,
00570                 curSubSetSize,
00571                 crit,
00572                 sigma
00573                 );
00574 
00575       // if the new value is better than the best value already
00576       if(value > tempBestValue)
00577         {
00578         tempBestSetPtr = indexPtr;
00579         tempBestValue = value;
00580         }
00581 
00582       // iterate through pBegin to find the beginning of the block of
00583       // selected features
00584       for(begin = 0; binPtr[begin] != id2; begin++)
00585         {
00586         // just iterate
00587         }
00588 
00589       // iterate through pBegin to find the pivot
00590       for(pivot = begin; (pivot < n) && (binPtr[pivot] != id0); pivot++)
00591         {
00592         // just iterate
00593         }
00594 
00595       if(pivot == n)
00596         {
00597         stopExhaustive = true;
00598         }
00599       else
00600         {
00601         // remember the position of first 0 on the right
00602         aux = pivot;
00603 
00604         // find a real pivot
00605         do
00606           {
00607           pivot--;
00608           }
00609         while(binPtr[pivot] != id2);
00610 
00611         // shift pivot to the right
00612         binPtr[pivot] = id0;
00613         binPtr[aux] = id2;
00614 
00615         aux = 0;
00616 
00617         // --- run "aux" from left, "piv" from right. the 0,2     --- //
00618         // --- pairs found are changed to 2,0                     --- //
00619         do
00620           {
00621           pivot--;
00622           }
00623         while((pivot > 0) && (binPtr[pivot] != id2));
00624 
00625         while((aux < pivot) && (binPtr[aux] != id0))
00626           {
00627           aux++;
00628           }
00629 
00630         while((pivot - aux) > 0)
00631           {
00632           binPtr[pivot] = id0;
00633           binPtr[aux] = id2;
00634 
00635           do
00636             {
00637             pivot--;
00638             }
00639           while((pivot > 0) && (binPtr[pivot] != id2));
00640 
00641           while((aux < pivot) && (binPtr[aux] != id0))
00642             {
00643             aux++;
00644             }
00645 
00646           }
00647         // --- end of run --- //
00648 
00649         }
00650 
00651       }
00652     while(!stopExhaustive);
00653 
00654     // update if best value is found
00655     if(tempBestValue > globalBestValuePtr[curSubSetSize - 1])
00656       {
00657       memcpy
00658         (&globalBestSetPtr[(curSubSetSize * (curSubSetSize - 1)) / 2],
00659         tempBestSetPtr.memptr(),
00660         curSubSetSizeInt
00661         );
00662 
00663       globalBestValuePtr[curSubSetSize - 1] = tempBestValue;
00664       betterFound = true;
00665       }
00666     else
00667       {
00668       betterFound = false;
00669       }
00670 
00671     // if last step was sbs
00672     if(sbs)
00673       {
00674       // a better subset was found
00675       if(betterFound)
00676         {
00677         // --- convert according to format --- //
00678 
00679         // convert to sbs format
00680         if(curSubSetSize > genLevel)
00681           {
00682           curSubsetChange = -1;
00683 
00684           for(u32 i = 0; i < n; i++)
00685             {
00686             binPtr[i] = -1;
00687             }
00688 
00689           // freeze changes
00690           for(u32 i = 0; i < curSubSetSize; i++)
00691             {
00692             binPtr[tempBestSetPtr[i]] = 2;
00693             }
00694 
00695           }
00696         else // nothing to remove, convert to sfs format
00697           {
00698           for(u32 i = 0; i < n; i++)
00699             {
00700             binPtr[i] = 0;
00701             }
00702 
00703           for(u32 i = 0; i < curSubSetSize; i++)
00704             {
00705             binPtr[tempBestSetPtr[i]] = 1;
00706             }
00707 
00708           // change to sfs
00709           sbs = false;
00710 
00711           // change "binPtr" identifiers 0 and 2
00712           id0 = 0;
00713           id2 = 2;
00714 
00715           curSubsetChange = 1;
00716           }
00717 
00718         // --- end of conversion --- //
00719 
00720         }
00721       else // no improvement during last step (sbs)
00722         {
00723         // change to sfs
00724         sbs = false;
00725 
00726         // change to "binPtr" identifiers 0 and 2
00727         id0 = 0;
00728         id2 = 2;
00729 
00730         // forget last step and perform one new sfs step
00731         curSubsetChange = 2;
00732         }
00733 
00734       }
00735     else // last step was sfs
00736       {
00737       // if removing is possible, prepare sbs
00738       if(curSubSetSize > genLevel)
00739         {
00740         for(u32 i = 0; i < n; i++)
00741           {
00742           binPtr[i] = -1;
00743           }
00744 
00745         for(u32 i = 0; i < curSubSetSize; i++)
00746           {
00747           binPtr[tempBestSetPtr[i]] = 2;
00748           }
00749 
00750         // change to sbs
00751         sbs = true;
00752 
00753         // change to "binPtr" identifiers 0 and 2
00754         id0 = 2;
00755         id2 = 0;
00756 
00757         curSubsetChange = -1;
00758 
00759         }
00760       else // otherwise stay by sfs
00761         {
00762         curSubsetChange = 1;
00763 
00764         for(u32 i = 0; i < n; i++)
00765           {
00766           binPtr[i] = 0;
00767           }
00768 
00769         // freeze changes
00770         for(u32 i = 0; i < curSubSetSize; i++)
00771           {
00772           binPtr[tempBestSetPtr[i]] = 1;
00773           }
00774 
00775         }
00776 
00777       }
00778 
00779     // renew curSubSetSize and initGenLevel
00780     if(curSubsetChange == -1)
00781       {
00782       if((curSubSetSize == d) && (d % genLevel != 0))
00783         {
00784         curSubSetSize = d - ( d % genLevel );
00785         initGenLevel = d % genLevel;
00786         }
00787       else
00788         {
00789         curSubSetSize -= genLevel;
00790         initGenLevel = genLevel;
00791         }
00792 
00793       if(backtrackDepth < 0)
00794         {
00795         // continue
00796         backtrackDepth -= initGenLevel;
00797         }
00798       else
00799         {
00800         // begin
00801         backtrackDepth = -initGenLevel;
00802         }
00803 
00804       }
00805     else // else curSubsetChange == 1 or 2
00806       {
00807       if((curSubSetSize < d) && ((curSubSetSize + genLevel) > d))
00808         {
00809         curSubSetSize = d;
00810         initGenLevel = d % genLevel;
00811         }
00812       else
00813         {
00814         curSubSetSize += genLevel;
00815         initGenLevel = genLevel;
00816         }
00817 
00818       // if end of going down
00819       if(backtrackDepth < 0)
00820         {
00821         if(delta == -1)
00822           {
00823           // averaging
00824           predDeltaEst =
00825             ( ( dirCount * predDeltaEst ) - backtrackDepth ) /
00826             ( dirCount + 1 );
00827 
00828           dirCount++;
00829           }
00830         else // else maximize
00831           {
00832           if((-backtrackDepth) > predDeltaEst)
00833             {
00834             predDeltaEst = -backtrackDepth;
00835             }
00836 
00837           }
00838 
00839         }
00840 
00841       backtrackDepth = 0;
00842       }
00843 
00844     //  once more and renew "binPtr"
00845     if(curSubsetChange == 2)
00846       {
00847       // change to sfs format now with actualized "curSubSetSize"
00848       for(u32 i = 0; i < n; i++)
00849         {
00850         binPtr[i] = 0;
00851         }
00852 
00853       aux = (curSubSetSize * (curSubSetSize - 1)) / 2;
00854 
00855       for(u32 i = 0; i < curSubSetSize; i++)
00856         {
00857         binPtr[globalBestSetPtr[aux + i]] = 1;
00858         }
00859 
00860       if((curSubSetSize < d) && ((curSubSetSize + genLevel) > d))
00861         {
00862         curSubSetSize = d;
00863         initGenLevel = d % genLevel;
00864         }
00865       else
00866         {
00867         curSubSetSize += genLevel;
00868         initGenLevel = genLevel;
00869         }
00870 
00871       // no direction change
00872       }
00873 
00874     if(delta < 0)
00875       {
00876       stopDimension = int(d + (DELTAMUL * predDeltaEst) + DELTAADD);
00877 
00878       if(stopDimension > n)
00879         {
00880         stopDimension = n;
00881         }
00882 
00883       }
00884 
00885     // end if delta reached
00886     if(curSubSetSize > stopDimension)
00887       {
00888       stopAll = true;
00889       }
00890 
00891     }
00892   while(!stopAll);
00893 
00894   // ---  find the best value and best subset from all possible   --- //
00895   // --- subsets                                                  --- //
00896   double bestValue = globalBestValuePtr[0];
00897   int bestValueIndex = 0;
00898 
00899   // find the index of the best value
00900   for(u32 i = 1; i < n - 1; i++)
00901     {
00902     if(globalBestValuePtr[i] > bestValue)
00903       {
00904       bestValue = globalBestValuePtr[i];
00905       bestValueIndex = i;
00906       }
00907 
00908     }
00909   // --- end of best value and subset --- //
00910 
00911   // auxiliary variable
00912   aux = ((bestValueIndex + 1) * bestValueIndex) / 2;
00913 
00914   return globalBestSetPtr.rows(aux, aux + bestValueIndex);
00915   }
00916 
00917 
00918 
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerator Defines