00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #include "fasthessian.h"
00032 #include "gpu_utils.h"
00033 #include "gpu_globals.h"
00034
00035 namespace asrl{
00036
00037
00038 __device__ float evalDyyByArea(float x, float y, float t, float mask_width, float mask_height, float fscale)
00039 {
00040 float Dyy = 0.f;
00041
00042 Dyy += iiAreaLookupCDHalfWH( x , y, mask_width, mask_height);
00043 Dyy -= t * iiAreaLookupCDHalfWH( x , y, mask_width, fscale);
00044
00045 Dyy *= 1/(fscale*fscale);
00046 return Dyy;
00047 }
00048
00049 __device__ float evalDxxByArea(float x, float y, float t, float mask_width, float mask_height, float fscale)
00050 {
00051 float Dxx = 0.f;
00052
00053 Dxx += iiAreaLookupCDHalfWH( x , y, mask_height, mask_width);
00054 Dxx -= t * iiAreaLookupCDHalfWH( x , y, fscale , mask_width);
00055
00056 Dxx *= 1/(fscale*fscale);
00057 return Dxx;
00058 }
00059
00060
00061 __device__ float evalDxy(float x, float y, float fscale, int octave)
00062 {
00063 float center_offset = d_octave_params[octave].dxy_center_offset * fscale;
00064 float half_width = d_octave_params[octave].dxy_half_width * fscale;
00065 float Dxy = 0.f;
00066 Dxy += iiAreaLookupCDHalfWH(x - center_offset, y - center_offset, half_width, half_width);
00067 Dxy -= iiAreaLookupCDHalfWH(x - center_offset, y + center_offset, half_width, half_width);
00068 Dxy += iiAreaLookupCDHalfWH(x + center_offset, y + center_offset, half_width, half_width);
00069 Dxy -= iiAreaLookupCDHalfWH(x + center_offset, y - center_offset, half_width, half_width);
00070
00071 Dxy *= 1/(fscale*fscale);
00072 return Dxy;
00073 }
00074
00075
00076
00077 __global__ void fasthessian_kernel(float * d_hessian, int octave)
00078 {
00079
00080 int hidx_x = threadIdx.x + __mul24(blockIdx.x,blockDim.x);
00081 int hidx_y = threadIdx.y + __mul24(blockIdx.y,blockDim.y);
00082 int hidx_z = threadIdx.z;
00083
00084 int hidx_lin = hidx_x + d_hessian_stride[0] * hidx_y + d_hessian_stride[0] * d_octave_params[octave].y_size * hidx_z;
00085
00086
00087 __shared__ float t;
00088 if(threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0)
00089 {
00090 t = d_octave_params[octave].mask_height;
00091 }
00092
00093 __syncthreads();
00094
00095
00096 float fscale = d_hessian_scale[ASRL_SURF_MAX_INTERVALS*octave + hidx_z];
00097
00098
00099 float x = (hidx_x * d_octave_params[octave].step + d_octave_params[octave].border);
00100 float y = (hidx_y * d_octave_params[octave].step + d_octave_params[octave].border);
00101
00102
00103 if(hidx_x < d_octave_params[octave].x_size && hidx_y < d_octave_params[octave].y_size && hidx_z < d_octave_params[octave].nIntervals)
00104 {
00105 float mask_width = d_octave_params[octave].mask_width * fscale;
00106 float mask_height = d_octave_params[octave].mask_height * fscale;
00107
00108
00109 float Dyy = evalDyyByArea(x, y, t, mask_width, mask_height, fscale);
00110 float Dxx = evalDxxByArea(x, y, t, mask_width, mask_height, fscale);
00111 float Dxy = evalDxy(x, y, fscale, octave);
00112
00113
00114 float result = ( Dxx * Dyy ) - d_octave_params[octave].dxy_scale*(Dxy*Dxy);
00115 if(Dxx+Dyy > 0.f)
00116 setLastBit(result);
00117 else
00118 clearLastBit(result);
00119
00120 d_hessian[hidx_lin] = result;
00121 }
00122 }
00123
00124
00125 __global__ void eval_component_kernel(float * d_hessian, int octave, int component)
00126 {
00127 int hidx_x = threadIdx.x + blockIdx.x * blockDim.x;
00128 int hidx_y = threadIdx.y + blockIdx.y * blockDim.y;
00129 int hidx_z = threadIdx.z;
00130 int hidx_lin = hidx_x + d_hessian_stride[0] * hidx_y + d_hessian_stride[0] * d_octave_params[octave].y_size * hidx_z;
00131
00132 __shared__ float t;
00133 if(threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0)
00134 {
00135 t = d_octave_params[octave].mask_height;
00136 }
00137
00138 __syncthreads();
00139
00140 float fscale = d_hessian_scale[ASRL_SURF_MAX_INTERVALS*octave + hidx_z];
00141
00142 float x = (hidx_x * d_octave_params[octave].step + d_octave_params[octave].border);
00143 float y = (hidx_y * d_octave_params[octave].step + d_octave_params[octave].border);
00144
00145 if(hidx_x < d_octave_params[octave].x_size && hidx_y < d_octave_params[octave].y_size && hidx_z < d_octave_params[octave].nIntervals)
00146 {
00147 float mask_width = d_octave_params[octave].mask_width * fscale;
00148 float mask_height = d_octave_params[octave].mask_height * fscale;
00149
00150 float Dyy = evalDyyByArea(x, y, t, mask_width, mask_height, fscale);
00151 float Dxx = evalDxxByArea(x, y, t, mask_width, mask_height, fscale);
00152 float Dxy = evalDxy(x, y, fscale, octave);
00153 float censure = 0.f;
00154
00155 float result = ( Dxx * Dyy ) - d_octave_params[octave].dxy_scale*(Dxy*Dxy);
00156
00157 switch(component)
00158 {
00159 case FH_DXX:
00160 d_hessian[hidx_lin] = Dxx;
00161 break;
00162 case FH_DYY:
00163 d_hessian[hidx_lin] = Dyy;
00164 break;
00165 case FH_DXX_DYY:
00166 d_hessian[hidx_lin] = Dxx * Dyy;
00167 break;
00168 case FH_DXY:
00169 d_hessian[hidx_lin] = Dxy;
00170 break;
00171 case FH_LINEAR_IDX:
00172 d_hessian[hidx_lin] = hidx_lin;
00173 break;
00174 case FH_HIDX_X_IDX:
00175 d_hessian[hidx_lin] = hidx_x;
00176 break;
00177 case FH_HIDX_Y_IDX:
00178 d_hessian[hidx_lin] = hidx_y;
00179 break;
00180 case FH_FSCALE:
00181 d_hessian[hidx_lin] = fscale;
00182 break;
00183 case FH_X:
00184 d_hessian[hidx_lin] = x;
00185 break;
00186 case FH_Y:
00187 d_hessian[hidx_lin] = y;
00188 break;
00189 case FH_RESULT:
00190 d_hessian[hidx_lin] = result;
00191 break;
00192 case FH_CENSURE:
00193 d_hessian[hidx_lin] = censure;
00194 case FH_RESULT_BIT_SET:
00195 if(Dxx+Dyy > 0.f)
00196 setLastBit(result);
00197 else
00198 clearLastBit(result);
00199 d_hessian[hidx_lin] = result;
00200 break;
00201 default:
00202 d_hessian[hidx_lin] = result;
00203 break;
00204 }
00205 }
00206 }
00207
00208
00209 void run_fasthessian_kernel(dim3 grid, dim3 threads, float * d_hessian, int octave)
00210 {
00211 fasthessian_kernel<<< grid, threads >>>(d_hessian, octave);
00212 }
00213
00214 void run_eval_component_kernel(dim3 grid, dim3 threads, float * d_hessian, int octave, fh_component comp)
00215 {
00216 eval_component_kernel<<< grid, threads >>>(d_hessian, octave, (int)comp);
00217 }
00218
00219 }