00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #include "keypoint_interpolation.h"
00032
00033 namespace asrl {
00034
00035
00036 #define MID_IDX 1
00037 __global__ void fh_interp_extremum(float * d_hessian, Keypoint * d_features, int4 * d_maxmin, unsigned int * d_feature_counter, unsigned int * d_max_min_counter)
00038 {
00039 if(blockIdx.x >= *d_max_min_counter)
00040 return;
00041
00042 int octave = d_maxmin[blockIdx.x].w;
00043
00044 unsigned int i = ASRL_SURF_MAX_FEATURES;
00045 int hidx_x = d_maxmin[blockIdx.x].x-1 + threadIdx.x;
00046 int hidx_y = d_maxmin[blockIdx.x].y-1 + threadIdx.y;
00047 int hidx_z = d_maxmin[blockIdx.x].z-1 + threadIdx.z;
00048
00049 int hidx_lin = hidx_x + d_hessian_stride[0] * hidx_y + d_hessian_stride[0] * d_octave_params[octave].y_size * hidx_z;
00050
00051 __shared__ float fh_vals[3][3][3];
00052 __shared__ Keypoint p;
00053
00054 fh_vals[threadIdx.z][threadIdx.y][threadIdx.x] = d_hessian[hidx_lin];
00055 __syncthreads();
00056
00057 if(threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0){
00058 __shared__ float H[3][3];
00059
00060
00061 H[0][0] = fh_vals[MID_IDX ][MID_IDX + 1][MID_IDX ]
00062 - 2.0f*fh_vals[MID_IDX ][MID_IDX ][MID_IDX ]
00063 + fh_vals[MID_IDX ][MID_IDX - 1][MID_IDX ];
00064
00065
00066 H[1][1] = fh_vals[MID_IDX ][MID_IDX ][MID_IDX + 1]
00067 - 2.0f*fh_vals[MID_IDX ][MID_IDX ][MID_IDX ]
00068 + fh_vals[MID_IDX ][MID_IDX ][MID_IDX - 1];
00069
00070 H[2][2] = fh_vals[MID_IDX + 1][MID_IDX ][MID_IDX ]
00071 - 2.0f*fh_vals[MID_IDX ][MID_IDX ][MID_IDX ]
00072 + fh_vals[MID_IDX - 1][MID_IDX ][MID_IDX ];
00073
00074
00075 H[0][1]= 0.25f*(fh_vals[MID_IDX ][MID_IDX + 1][MID_IDX + 1] -
00076 fh_vals[MID_IDX ][MID_IDX - 1][MID_IDX + 1] -
00077 fh_vals[MID_IDX ][MID_IDX + 1][MID_IDX - 1] +
00078 fh_vals[MID_IDX ][MID_IDX - 1][MID_IDX - 1]);
00079
00080 H[0][2]= 0.25f*(fh_vals[MID_IDX + 1][MID_IDX + 1][MID_IDX ] -
00081 fh_vals[MID_IDX + 1][MID_IDX - 1][MID_IDX ] -
00082 fh_vals[MID_IDX - 1][MID_IDX + 1][MID_IDX ] +
00083 fh_vals[MID_IDX - 1][MID_IDX - 1][MID_IDX ]);
00084
00085
00086 H[1][2]= 0.25f*(fh_vals[MID_IDX + 1][MID_IDX ][MID_IDX + 1] -
00087 fh_vals[MID_IDX + 1][MID_IDX ][MID_IDX - 1] -
00088 fh_vals[MID_IDX - 1][MID_IDX ][MID_IDX + 1] +
00089 fh_vals[MID_IDX - 1][MID_IDX ][MID_IDX - 1]);
00090
00091
00092 H[1][0] = H[0][1];
00093
00094
00095 H[2][0] = H[0][2];
00096
00097
00098 H[2][1] = H[1][2];
00099
00100
00101 __shared__ float dD[3];
00102
00103
00104 dD[0] = 0.5f*(fh_vals[MID_IDX ][MID_IDX + 1][MID_IDX ] -
00105 fh_vals[MID_IDX ][MID_IDX - 1][MID_IDX ]);
00106
00107 dD[1] = 0.5f*(fh_vals[MID_IDX ][MID_IDX ][MID_IDX + 1] -
00108 fh_vals[MID_IDX ][MID_IDX ][MID_IDX - 1]);
00109
00110 dD[2] = 0.5f*(fh_vals[MID_IDX + 1][MID_IDX ][MID_IDX ] -
00111 fh_vals[MID_IDX - 1][MID_IDX ][MID_IDX ]);
00112
00113 __shared__ float invdet;
00114 invdet = 1.f /
00115 (
00116 H[0][0]*H[1][1]*H[2][2]
00117 + H[0][1]*H[1][2]*H[2][0]
00118 + H[0][2]*H[1][0]*H[2][1]
00119 - H[0][0]*H[1][2]*H[2][1]
00120 - H[0][1]*H[1][0]*H[2][2]
00121 - H[0][2]*H[1][1]*H[2][0]
00122 );
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135 __shared__ float Hinv[3][3];
00136 Hinv[0][0] = invdet*(H[1][1]*H[2][2]-H[1][2]*H[2][1]);
00137 Hinv[0][1] = -invdet*(H[0][1]*H[2][2]-H[0][2]*H[2][1]);
00138 Hinv[0][2] = invdet*(H[0][1]*H[1][2]-H[0][2]*H[1][1]);
00139
00140 Hinv[1][0] = -invdet*(H[1][0]*H[2][2]-H[1][2]*H[2][0]);
00141 Hinv[1][1] = invdet*(H[0][0]*H[2][2]-H[0][2]*H[2][0]);
00142 Hinv[1][2] = -invdet*(H[0][0]*H[1][2]-H[0][2]*H[1][0]);
00143
00144 Hinv[2][0] = invdet*(H[1][0]*H[2][1]-H[1][1]*H[2][0]);
00145 Hinv[2][1] = -invdet*(H[0][0]*H[2][1]-H[0][1]*H[2][0]);
00146 Hinv[2][2] = invdet*(H[0][0]*H[1][1]-H[0][1]*H[1][0]);
00147
00148 __shared__ float x[3];
00149
00150 x[0] = -(Hinv[0][0]*(dD[0]) + Hinv[0][1]*(dD[1]) + Hinv[0][2]*(dD[2]));
00151 x[1] = -(Hinv[1][0]*(dD[0]) + Hinv[1][1]*(dD[1]) + Hinv[1][2]*(dD[2]));
00152 x[2] = -(Hinv[2][0]*(dD[0]) + Hinv[2][1]*(dD[1]) + Hinv[2][2]*(dD[2]));
00153
00154 if(fabs(x[0])< 1.f && fabs(x[1])< 1.f && fabs(x[2])< 1.f) {
00155
00156
00157 i = atomicInc(d_feature_counter,(unsigned int) -1);
00158
00159 if(i < ASRL_SURF_MAX_FEATURES) {
00160
00161
00162 p.x = ((float)d_maxmin[blockIdx.x].x+x[1]) * (float)d_octave_params[octave].step + d_octave_params[octave].border;
00163
00164 p.y = ((float)d_maxmin[blockIdx.x].y+x[0]) * (float)d_octave_params[octave].step + d_octave_params[octave].border;
00165
00166 if(x[2] > 0)
00167 {
00168 float a = d_hessian_scale[ASRL_SURF_MAX_INTERVALS*octave + d_maxmin[blockIdx.x].z];
00169 float b = d_hessian_scale[ASRL_SURF_MAX_INTERVALS*octave + d_maxmin[blockIdx.x].z + 1];
00170
00171 p.size = (1.f - x[2]) * a + x[2] * b;
00172 }
00173 else
00174 {
00175 float a = d_hessian_scale[ASRL_SURF_MAX_INTERVALS*octave + d_maxmin[blockIdx.x].z];
00176 float b = d_hessian_scale[ASRL_SURF_MAX_INTERVALS*octave + d_maxmin[blockIdx.x].z - 1];
00177
00178 p.size = (1.f + x[2]) * a - x[2] * b;
00179 }
00180
00181
00182 p.octave = octave;
00183
00184
00185 p.response = fh_vals[MID_IDX][MID_IDX][MID_IDX];
00186
00187
00188 d_features[i] = p;
00189 } else {
00190 *d_feature_counter = ASRL_SURF_MAX_FEATURES;
00191 }
00192 }
00193 }
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205 }
00206
00207
00208 void run_fh_interp_extremum(float * d_hessian, Keypoint * d_features, int4 * d_maxmin, unsigned int * d_feature_counter, unsigned int * d_max_min_counter)
00209 {
00210 dim3 threads;
00211 threads.x = 3;
00212 threads.y = 3;
00213 threads.z = 3;
00214
00215 dim3 grid;
00216 grid.x = ASRL_SURF_MAX_CANDIDATES;
00217 grid.y = 1;
00218 grid.z = 1;
00219
00220 fh_interp_extremum<<< grid, threads >>>(d_hessian,
00221 d_features,
00222 d_maxmin,
00223 d_feature_counter,
00224 d_max_min_counter);
00225 }
00226
00227 }