00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <iostream>
00025 #include <vector>
00026
00027 #include <classifiers/siftpp.h>
00028
00029
00030 #include <utils/time/clock.h>
00031 #include <utils/time/tracker.h>
00032
00033
00034 #include <core/exception.h>
00035 #include <core/exceptions/software.h>
00036 #include <fvutils/color/colorspaces.h>
00037 #include <fvutils/color/conversions.h>
00038 #include <fvutils/readers/png.h>
00039
00040
00041
00042
00043 using namespace fawkes;
00044
00045 namespace firevision {
00046 #if 0
00047 }
00048 #endif
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072 SiftppClassifier::SiftppClassifier( const char * object_file,
00073 int samplingStep, int octaves, int levels,
00074 float magnif, int noorient, int unnormalized)
00075 : Classifier("SiftppClassifier")
00076 {
00077
00078 __samplingStep = samplingStep;
00079 __octaves = octaves;
00080 __levels = levels;
00081
00082 __first = -1 ;
00083 __threshold = 0.04f / __levels / 2.0f ;
00084 __edgeThreshold = 10.0f;
00085 __magnif = magnif;
00086 __noorient = noorient;
00087 __unnormalized = unnormalized;
00088
00089
00090 __vlen = 128;
00091
00092
00093
00094 __tt = new TimeTracker();
00095 __loop_count = 0;
00096 __ttc_objconv = __tt->add_class("ObjectConvert");
00097 __ttc_objfeat = __tt->add_class("ObjectFeatures");
00098 __ttc_imgconv = __tt->add_class("ImageConvert");
00099 __ttc_imgfeat = __tt->add_class("ImageFeatures");
00100 __ttc_matchin = __tt->add_class("Matching");
00101 __ttc_roimerg = __tt->add_class("MergeROIs");
00102
00103
00104
00105 __tt->ping_start(__ttc_objconv);
00106
00107
00108 PNGReader pngr( object_file );
00109 unsigned char* buf = malloc_buffer( pngr.colorspace(), pngr.pixel_width(), pngr.pixel_height() );
00110 pngr.set_buffer( buf );
00111 pngr.read();
00112
00113 unsigned int lwidth = pngr.pixel_width();
00114 unsigned int lheight = pngr.pixel_height();
00115 VL::pixel_t * im_pt = new VL::pixel_t [lwidth * lheight ];
00116 VL::pixel_t * start = im_pt;
00117
00118 for (unsigned int h = 0; h < lheight; ++h) {
00119 for (unsigned int w = 0; w < lwidth ; ++w) {
00120 int i = (buf[h * lwidth + w] );
00121 VL::pixel_t norm = VL::pixel_t( 255 );
00122 *start++ = VL::pixel_t( i ) / norm;
00123 }
00124 }
00125
00126 __obj_img = new VL::PgmBuffer();
00127 __obj_img->width = lwidth;
00128 __obj_img->height = lheight;
00129 __obj_img->data = im_pt;
00130
00131 if ( ! __obj_img ) {
00132 throw Exception("Could not load object file");
00133 }
00134
00135
00136 __tt->ping_end(__ttc_objconv);
00137
00138
00139
00140
00141
00142
00143 __tt->ping_start(__ttc_objfeat);
00144
00145
00146
00147 __obj_features.clear();
00148
00149 __obj_num_features = 0;
00150
00151 __sigman = .5 ;
00152 __sigma0 = 1.6 * powf(2.0f, 1.0f / __levels) ;
00153
00154 std::cout << "SiftppClassifier(ctor): init scalespace" << std::endl;
00155
00156 VL::Sift sift(__obj_img->data, __obj_img->width, __obj_img->height,
00157 __sigman, __sigma0, __octaves, __levels, __first, -1, __levels+1) ;
00158
00159 std::cout << "SiftppClassifier(ctor): detect object keypoints" << std::endl;
00160
00161 sift.detectKeypoints(__threshold, __edgeThreshold) ;
00162
00163 __obj_num_features = sift.keypointsEnd() - sift.keypointsBegin();
00164 std::cout << "SiftppClassifier(ctor): computed '" << __obj_num_features << "' object-keypoints" << std::endl;
00165
00166
00167 sift.setNormalizeDescriptor( ! __unnormalized ) ;
00168 sift.setMagnification( __magnif ) ;
00169
00170 std::cout << "SiftppClassifier(ctor): run detector, compute ori and des ..." << std::endl;
00171
00172 for( VL::Sift::KeypointsConstIter iter = sift.keypointsBegin() ;
00173 iter != sift.keypointsEnd() ; ++iter ) {
00174
00175
00176 Feature feat;
00177
00178
00179 feat.key = (*iter);
00180
00181
00182 VL::float_t angles [4] ;
00183 int nangles ;
00184 if( ! __noorient ) {
00185 nangles = sift.computeKeypointOrientations(angles, *iter) ;
00186 } else {
00187 nangles = 1;
00188 angles[0] = VL::float_t(0) ;
00189 }
00190 feat.number_of_desc = nangles;
00191 feat.descs = new VL::float_t*[nangles];
00192
00193
00194
00195 for(int a = 0 ; a < nangles ; ++a) {
00196
00197
00198
00199 feat.descs[a] = new VL::float_t[__vlen];
00200 sift.computeKeypointDescriptor(feat.descs[a], *iter, angles[a]) ;
00201 }
00202
00203
00204
00205 __obj_features.push_back( feat );
00206
00207 }
00208
00209 __obj_num_features = __obj_features.size();
00210 if ( ! __obj_num_features > 0 ) {
00211 throw Exception("Could not compute object features");
00212 }
00213 std::cout << "SiftppClassifier(ctor): computed '" << __obj_num_features << "' features from object" << std::endl;
00214
00215
00216 __tt->ping_end(__ttc_objfeat);
00217
00218
00219 }
00220
00221
00222
00223 SiftppClassifier::~SiftppClassifier()
00224 {
00225
00226 delete __obj_img;
00227 __obj_features.clear();
00228
00229
00230 __img_features.clear();
00231 }
00232
00233
00234 std::list< ROI > *
00235 SiftppClassifier::classify()
00236 {
00237
00238 __tt->ping_start(0);
00239
00240
00241
00242 std::list< ROI > *rv = new std::list< ROI >();
00243
00244
00245 int x_min = _width;
00246 int y_min = _height;
00247 int x_max = 0;
00248 int y_max = 0;
00249
00250
00251 __tt->ping_start(__ttc_imgconv);
00252
00253 std::cout << "SiftppClassifier(classify): copy imgdat to SIFTPP Image" << std::endl;
00254
00255 VL::pixel_t * im_pt = new VL::pixel_t [_width * _height ];
00256 VL::pixel_t * start = im_pt;
00257 for (unsigned int h = 0; h < _height; ++h) {
00258 for (unsigned int w = 0; w < _width ; ++w) {
00259 int i = (_src[h * _width + w] );
00260 VL::pixel_t norm = VL::pixel_t( 255 );
00261 *start++ = VL::pixel_t( i ) / norm;
00262 }
00263 }
00264
00265 __image = new VL::PgmBuffer();
00266 __image->width = _width;
00267 __image->height = _height;
00268 __image->data = im_pt;
00269
00270
00271 __tt->ping_end(__ttc_imgconv);
00272
00273
00274
00275
00276
00277
00278 __tt->ping_start(__ttc_imgfeat);
00279
00280
00281
00282 __img_features.clear();
00283 __img_num_features = 0;
00284
00285
00286 std::cout << "SiftppClassifier(classify): init scalespace" << std::endl;
00287
00288 VL::Sift sift(__image->data, __image->width, __image->height,
00289 __sigman, __sigma0, __octaves, __levels, __first, -1, __levels+1) ;
00290
00291 std::cout << "SiftppClassifier(classify): detect image keypoints" << std::endl;
00292
00293 sift.detectKeypoints(__threshold, __edgeThreshold) ;
00294
00295
00296 __img_num_features = sift.keypointsEnd() - sift.keypointsBegin();
00297 std::cout << "SiftppClassifier(classify): Extracted '" << __img_num_features << "' image keypoints" << std::endl;
00298
00299
00300 sift.setNormalizeDescriptor( ! __unnormalized ) ;
00301 sift.setMagnification( __magnif ) ;
00302
00303 std::cout << "SiftppClassifier(classify): run detector, compute ori and des ..." << std::endl;
00304
00305 for( VL::Sift::KeypointsConstIter iter = sift.keypointsBegin() ;
00306 iter != sift.keypointsEnd() ; ++iter ) {
00307
00308 Feature feat;
00309
00310
00311 feat.key = (*iter);
00312
00313
00314
00315 VL::float_t angles [4] ;
00316 int nangles ;
00317 if( ! __noorient ) {
00318 nangles = sift.computeKeypointOrientations(angles, *iter) ;
00319 } else {
00320 nangles = 1;
00321 angles[0] = VL::float_t(0) ;
00322 }
00323 feat.number_of_desc = nangles;
00324 feat.descs = new VL::float_t*[nangles];
00325
00326
00327
00328 for(int a = 0 ; a < nangles ; ++a) {
00329
00330 feat.descs[a] = new VL::float_t[__vlen] ;
00331 sift.computeKeypointDescriptor(feat.descs[a], *iter, angles[a]) ;
00332 }
00333
00334
00335
00336 __img_features.push_back( feat );
00337
00338 }
00339
00340
00341 __img_num_features = __img_features.size();
00342
00343
00344 __tt->ping_end(__ttc_imgfeat);
00345
00346
00347 std::cout << "SiftppClassifier(classify): Extracted '" << __img_num_features << "' image features" << std::endl;
00348
00349
00350 __tt->ping_start(__ttc_matchin);
00351
00352 std::cout << "SiftppClassifier(classify): matching ..." << std::endl;
00353
00354 std::vector< int > matches(__obj_features.size());
00355 int m = 0;
00356 for (unsigned i = 0; i < __obj_features.size(); i++) {
00357 int match = findMatch(__obj_features[i], __img_features);
00358 matches[i] = match;
00359 if (match != -1) {
00360 std::cout << "SiftppClassifier(classify): Matched feature " << i << " in object image with feature " << match << " in image." << std::endl;
00361
00362 ROI r( (int)(__img_features[matches[i]].key.x)-5, (int)(__img_features[matches[i]].key.y )-5, 11, 11, _width, _height);
00363 rv->push_back(r);
00364
00365 ++m;
00366 }
00367 }
00368
00369
00370 __tt->ping_end(__ttc_matchin);
00371
00372 std::cout << "SiftppClassifier(classify) matched '" << m << "' of '" << __obj_features.size() << "' features in scene." << std::endl;
00373
00374 std::cout << "SiftppClassifier(classify): computing ROI" << std::endl;
00375
00376 __tt->ping_start(__ttc_roimerg);
00377
00378
00379 for (unsigned i = 0; i < matches.size(); i++) {
00380 if (matches[i] != -1) {
00381 if( (int)__img_features[matches[i]].key.x < x_min )
00382 x_min = (int)__img_features[matches[i]].key.x;
00383 if( (int)__img_features[matches[i]].key.y < y_min )
00384 y_min = (int)__img_features[matches[i]].key.y;
00385 if( (int)__img_features[matches[i]].key.x > x_max )
00386 x_max = (int)__img_features[matches[i]].key.x;
00387 if( (int)__img_features[matches[i]].key.y > y_max )
00388 y_max = (int)__img_features[matches[i]].key.y;
00389 }
00390 }
00391 if( m != 0 ) {
00392 ROI r(x_min, y_min, x_max-x_min, y_max-y_min, _width, _height);
00393 rv->push_back(r);
00394 }
00395
00396
00397 __tt->ping_end(__ttc_roimerg);
00398
00399
00400
00401 __tt->ping_end(0);
00402
00403
00404
00405
00406 __tt->print_to_stdout();
00407
00408
00409 delete __image;
00410
00411 std::cout << "SiftppClassifier(classify): done ... returning '" << rv->size() << "' ROIs." << std::endl;
00412 return rv;
00413 }
00414
00415 int
00416 SiftppClassifier::findMatch(const Feature & ip1, const std::vector< Feature > & ipts) {
00417 double mind = 1e100, second = 1e100;
00418 int match = -1;
00419
00420 for (unsigned i = 0; i < ipts.size(); i++) {
00421
00422 if (ipts[i].number_of_desc != ip1.number_of_desc)
00423 continue;
00424
00425 for ( int j = 0; j < ip1.number_of_desc; ++j ) {
00426 double d = distSquare(ipts[i].descs[j], ip1.descs[j], __vlen);
00427
00428 if (d < mind) {
00429 second = mind;
00430 mind = d;
00431 match = i;
00432 } else if (d < second) {
00433 second = d;
00434 }
00435 }
00436 }
00437
00438 if (mind < 0.5 * second)
00439 return match;
00440
00441 return -1;
00442 }
00443
00444
00445 double
00446 SiftppClassifier::distSquare(VL::float_t *v1, VL::float_t *v2, int n) {
00447 double dsq = 0.;
00448 while (n--) {
00449 dsq += (v1[n-1] - v2[n-1]) * (v1[n-1] - v2[n-1]);
00450 }
00451
00452 return dsq;
00453 }
00454
00455 }