FAST-ER: dog.cc Source File

00001 /*
00002 
00003     This file is part of the FAST-ER machine learning system.
00004     Copyright (C) 2008  Edward Rosten and Los Alamos National Laboratory
00005 
00006     This program is free software; you can redistribute it and/or modify
00007     it under the terms of the GNU General Public License as published by
00008     the Free Software Foundation; either version 2 of the License, or
00009     (at your option) any later version.
00010 
00011     This program is distributed in the hope that it will be useful,
00012     but WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014     GNU General Public License for more details.
00015 
00016     You should have received a copy of the GNU General Public License along
00017     with this program; if not, write to the Free Software Foundation, Inc.,
00018     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
00019 */
00020 #include <cvd/image.h>
00021 #include <cvd/convolution.h>
00022 #include <cvd/vision.h>
00023 #include <cvd/image_convert.h>
00024 
00025 #include <vector>
00026 #include <map>
00027 #include <gvars3/instances.h>
00028 
00029 #include "dog.h"
00030 #include "harrislike.h"
00031 
00032 #include <TooN/TooN.h>
00033 
00034 using namespace std;
00035 using namespace CVD;
00036 using namespace GVars3;
00037 using namespace TooN;
00038 
00039 
00040 ////////////////////////////////////////////////////////////////////////////////
00041 //
00042 // Dog detector
00043 //
00044 typedef Image<float>::iterator fi;
00045 
00046 
00047 //These classes are used in local_maxima to determine how to 
00048 //modify the indexing of the coarser and finer input images.
00049 //It allows the images to either be a factor of 2 larger or smaller.
00050 ///\cond never
00051 struct Equal { static int eval(int x){ return x;} };
00052 struct Larger { static int eval(int x){ return 1+2*x;} };
00053 struct Smaller{ static int eval(int x){ return x/2;} };
00054 ///\endcond
00055 
00056 
00057 template<class LEval, class SEval> void local_maxima(const Image<float>& large, const Image<float>& mid, const Image<float>& small, vector<pair<float, ImageRef> >& corners, int m)
00058 {
00059         for(int y=1; y < mid.size().y-1; y++)
00060             for(int x=1; x <mid.size().x-1; x++)
00061             {
00062                 float c = mid[y][x];
00063 
00064                 if( (c > mid[y-1][x-1]  &&
00065                     c > mid[y-1][x+0]  &&
00066                     c > mid[y-1][x+1]  &&
00067                     c > mid[y-0][x-1]  &&
00068                     c > mid[y-0][x+1]  &&
00069                     c > mid[y+1][x-1]  &&
00070                     c > mid[y+1][x+0]  &&
00071                     c > mid[y+1][x+1]  &&
00072                     c > large[LEval::eval(y)][LEval::eval(x)] &&
00073                     c > small[SEval::eval(y)][SEval::eval(x)]) 
00074                     || 
00075                     (c < mid[y-1][x-1]  &&
00076                     c < mid[y-1][x+0]  &&
00077                     c < mid[y-1][x+1]  &&
00078                     c < mid[y-0][x-1]  &&
00079                     c < mid[y-0][x+1]  &&
00080                     c < mid[y+1][x-1]  &&
00081                     c < mid[y+1][x+0]  &&
00082                     c < mid[y+1][x+1]  &&
00083                     c < large[LEval::eval(y)][LEval::eval(x)] &&
00084                     c < small[SEval::eval(y)][SEval::eval(x)])
00085                     )
00086                 {
00087                     //A local extrema 
00088 
00089                     //Compute the Hessian using finite differences
00090                     Matrix<2> H;
00091 
00092                     H[0][0] = mid[y][x-1] - 2 * mid[y][x] + mid[y][x+1];
00093                     H[1][1] = mid[y-1][x] - 2 * mid[y][x] + mid[y+1][x];
00094                     H[0][1] = 0.25 * (mid[y+1][x+1] - mid[y+1][x-1] - mid[y-1][x+1] + mid[y-1][x-1]);
00095                     H[1][0] = H[0][1];
00096 
00097                     double tr = H[0][0] + H[1][1];
00098                     double det = H[0][0]*H[1][1] - H[0][1]*H[1][0];
00099                     double edginess =  tr*tr/det;
00100                     double r=10;
00101 
00102                    if (edginess < (r+1)*(r+1)/r)
00103                         corners.push_back(make_pair(-abs(c), m * ImageRef(x, y) + ImageRef(m/2, m/2)));
00104 
00105                 }
00106             }
00107 }
00108 
00109 void dog::operator()(const CVD::Image<CVD::byte>& i, std::vector<CVD::ImageRef>& c, unsigned int N) const
00110 {
00111     int s = GV3::get<int>("dog.divisions_per_octave", 3,1); //Divisions per octave
00112     int octaves=GV3::get<int>("dog.octaves", 4, 1);
00113 
00114     double k = pow(2, 1.0/s);
00115 
00116     double sigma = GV3::get<double>("dog.sigma", 0.8, 1);
00117 
00118     Image<float> im = convert_image(i);
00119 
00120     convolveGaussian_fir(im, im, sigma);
00121     
00122 
00123     Image<float> d1, d2, d3;
00124     c.clear();
00125     vector<pair<float, ImageRef> > corners;
00126     corners.reserve(50000);
00127 
00128     int scalemul=1;
00129     int d1m = 1, d2m = 1, d3m = 1;
00130 
00131     for(int o=0; o < octaves; o++)
00132     {
00133 
00134         for(int j=0; j < s; j++)
00135         {
00136             float delta_sigma = sigma * sqrt(k*k-1);
00137             Image<float> blurred(im.size());
00138             convolveGaussian_fir(im, blurred, delta_sigma);
00139             
00140             for(fi i1=im.begin(), i2 = blurred.begin(); i1!= im.end(); ++i1, ++i2)
00141                 *i1 = (*i2 - *i1);
00142 
00143             //im is now dog
00144             //blurred 
00145 
00146             d1 = d2;
00147             d2 = d3;
00148             d3 = im;
00149             im = blurred;
00150 
00151             d1m = d2m;
00152             d2m = d3m;
00153             d3m = scalemul;
00154 
00155             //Find maxima
00156             if(d1.size().x != 0)
00157             {
00158                 if(d1.size() == d2.size())
00159                     if(d2.size() == d3.size())
00160                         local_maxima<Equal, Equal>(d1, d2, d3, corners, d2m);
00161                     else
00162                         local_maxima<Equal, Smaller>(d1, d2, d3, corners, d2m);
00163                 else
00164                     if(d2.size() == d3.size())
00165                         local_maxima<Larger, Equal>(d1, d2, d3, corners, d2m);
00166                     else
00167                         local_maxima<Larger, Smaller>(d1, d2, d3, corners, d2m);
00168             }
00169             
00170 
00171             sigma *= k;
00172         }
00173         
00174         if(o != octaves - 1)
00175         {
00176             scalemul *=2;
00177             sigma /=2;
00178             Image<float> tmp(im.size()/2);
00179             halfSample(im,tmp);
00180             im=tmp;
00181         }
00182     }
00183     
00184 
00185     if(corners.size() > N)
00186     {
00187         nth_element(corners.begin(), corners.begin() + N, corners.end());
00188         corners.resize(N);
00189     }
00190 
00191 
00192     for(unsigned int i=0; i < corners.size(); i++)
00193         c.push_back(corners[i].second);
00194 }
00195 
00196 template<class LEval, class SEval> bool is_scale_maximum(const Image<float>& large, const Image<float>& mid, const Image<float>& small, ImageRef c)
00197 {
00198     if( 
00199           (mid[c] > 0 && 
00200           mid[c] > small[SEval::eval(c.y)][SEval::eval(c.x)] && 
00201           mid[c] > large[LEval::eval(c.y)][LEval::eval(c.x)])
00202         ||
00203           (mid[c] < 0 && 
00204           mid[c] < small[SEval::eval(c.y)][SEval::eval(c.x)] && 
00205           mid[c] < large[LEval::eval(c.y)][LEval::eval(c.x)]))
00206         return true;
00207     else
00208         return false;
00209 }
00210 
00211 bool is_scale_maximum(const Image<float>& d1, const Image<float>& d2, const Image<float>& d3, ImageRef c)
00212 {
00213     if(d1.size() == d2.size())
00214         if(d2.size() == d3.size())
00215             return is_scale_maximum<Equal, Equal>(d1, d2, d3, c);
00216         else
00217             return is_scale_maximum<Equal, Smaller>(d1, d2, d3, c);
00218     else
00219         if(d2.size() == d3.size())
00220             return is_scale_maximum<Larger, Equal>(d1, d2, d3, c);
00221         else
00222             return is_scale_maximum<Larger, Smaller>(d1, d2, d3, c);
00223 }
00224 
00225 
00226 void harrisdog::operator()(const CVD::Image<CVD::byte>& i, std::vector<CVD::ImageRef>& c, unsigned int N) const
00227 {
00228     int s = GV3::get<int>("harrislaplace.dog.divisions_per_octave", 11);    //Divisions per octave
00229     int octaves=GV3::get<int>("harrislaplace.dog.octaves", 4, 1);
00230     double sigma = GV3::get<double>("harrislaplace.dog.sigma", 0.8);
00231 
00232     float hblur = GV3::get<float>("harrislaplace.harris.blur", 2.5);
00233     float hsigmas = GV3::get<float>("harrislaplace.harris.sigmas", 2.0, 1);
00234 
00235     double k = pow(2, 1.0/s);
00236 
00237     Image<float> im = convert_image(i);
00238 
00239     //convolveGaussian(im, sigma);
00240 
00241     Image<float> d1, d2, d3;
00242     Image<float> im1, im2, im3;
00243     c.clear();
00244     vector<pair<float, ImageRef> > corners;
00245     corners.reserve(50000);
00246 
00247     int scalemul=1;
00248     int d1m = 1, d2m = 1, d3m = 1;
00249 
00250     for(int o=0; o < octaves; o++)
00251     {
00252 
00253         for(int j=0; j < s; j++)
00254         {
00255             float delta_sigma = sigma * sqrt(k*k-1);
00256             //hblur *= sqrt(k*k-1);
00257 
00258             //Blur im, and put the result in blurred.
00259             //im is already blurred from the previous layers
00260             Image<float> blurred(im.size(), 0);
00261             convolveGaussian_fir(im, blurred, delta_sigma);
00262             
00263             //For DoG, at this point, we don't need im anymore, since blurred
00264             //will be used as "im" for the next layer. However, we do need it for 
00265             //HarrisDoG, since we need to do a HarrisDetect on it.
00266             Image<float>  diff(im.size(), 0);
00267             for(fi i1=im.begin(), i2 = blurred.begin(), d = diff.begin(); i1!= im.end(); ++i1, ++i2, ++d)
00268                 *d = (*i2 - *i1);
00269             
00270             //Insert the current image, and the current difference
00271             //in to the ring buffer
00272             d1 = d2;
00273             d2 = d3;
00274             d3 = diff;
00275 
00276             im1 = im2;
00277             im2 = im3;
00278             im3 = im;
00279 
00280 
00281             im = blurred;
00282 
00283             d1m = d2m;
00284             d2m = d3m;
00285             d3m = scalemul;
00286 
00287             //Find maxima
00288             if(d1.size().x != 0)
00289             {
00290                 //First, find Harris maxima
00291                 vector<pair<float, ImageRef> > layer_corners;
00292                 HarrisDetector(im2, layer_corners, N, hblur, hsigmas);
00293                 
00294                 //Keep if they are LoG (or really DoG) maxima across scales.
00295                 //The Harris score olny is used.
00296                 for(unsigned int c=0; c < layer_corners.size(); c++)
00297                     if(is_scale_maximum(d1, d2, d3, layer_corners[c].second))
00298                         corners.push_back(layer_corners[c]);
00299             }
00300             
00301             sigma *= k;
00302         }
00303         
00304         if(o != octaves - 1)
00305         {
00306             scalemul *=2;
00307             sigma /=2;
00308             Image<float> tmp(im.size()/2);
00309             halfSample(im,tmp);
00310             im=tmp;
00311         }
00312     }
00313     
00314 
00315     if(corners.size() > N)
00316     {
00317         nth_element(corners.begin(), corners.begin() + N, corners.end());
00318         corners.resize(N);
00319     }
00320     
00321     c.clear();
00322 
00323     for(unsigned int i=0; i < corners.size(); i++)
00324         c.push_back(corners[i].second);
00325 
00326 }