00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #include "detector.h"
00032 #include "fasthessian.h"
00033 #include "non_max_suppression.h"
00034 #include "keypoint_interpolation.h"
00035 #include "GpuSurfFeatures.hpp"
00036 #include "GpuSurfOctave.hpp"
00037
00038 namespace asrl {
00039 void run_surf_detector(float * d_hessianBuffer, GpuSurfOctave & octave, int octaveIdx, GpuSurfFeatures & features,
00040 float threshold, int fh_x_threads, int fh_y_threads,
00041 int nonmax_x_threads, int nonmax_y_threads)
00042 {
00044
00046 dim3 threads;
00047
00048 threads.x = fh_x_threads;
00049 threads.y = fh_y_threads;
00050 threads.z = octave.intervals();
00051
00052 dim3 grid;
00053 grid.x = ( (octave.width() + threads.x - 1) / threads.x);
00054 grid.y = ( (octave.height() + threads.y - 1) / threads.y);
00055 grid.z = 1;
00056
00057 if(octave.valid()) {
00058 run_fasthessian_kernel(grid, threads, d_hessianBuffer, octaveIdx);
00059 ASRL_CHECK_CUDA_ERROR("Finding fasthessian");
00060 }
00061
00062
00063 features.featureCounterMem().pullFromDevice();
00064 features.featureCounterMem().h_get()[1] = 0;
00065 features.featureCounterMem().pushToDevice();
00066
00067
00069
00071
00072 threads.x = nonmax_x_threads;
00073 threads.y = nonmax_y_threads;
00074 threads.z = octave.intervals();
00075
00076 grid.x = ( (octave.width() + (threads.x-2) - 1) / (threads.x-2));
00077 grid.y = ( (octave.height() + (threads.y-2) - 1) / (threads.y-2));
00078 grid.z = 1;
00079
00080 size_t sharedBytes = threads.x*threads.y*threads.z*sizeof(float);
00081 run_surf_nonmaxonly_kernel(grid, threads, sharedBytes, d_hessianBuffer,
00082 octaveIdx, features.rawFeatureMem().d_get(), features.featureCounterMem().d_get() + 1,
00083 threshold);
00084 ASRL_CHECK_CUDA_ERROR("Running Nonmax, octave " << octaveIdx);
00085
00086
00088
00090
00091
00092 run_fh_interp_extremum(d_hessianBuffer,
00093 features.deviceFeatures(),
00094 features.rawFeatureMem().d_get(),
00095 features.featureCounterMem().d_get(),
00096 features.featureCounterMem().d_get() + 1);
00097
00098 features.featureCounterMem().pullFromDevice();
00099 features.setDirty();
00100
00101
00102 }
00103
00104 }