Speeded Up SURF
|
|
asrl Namespace Reference
Namespaces |
namespace | detail |
Classes |
struct | GpuSurfConfiguration |
| A structure representing the GPU surf configuration. More...
|
class | GpuSurfDetector |
| The exported class representing the GPU surf detector. More...
|
class | CudaSynchronizedMemory |
| a class that handles synchronization of memory between the host and device More...
|
class | SurfOctaveParameters |
| A structure which holds the constant parameters that describe an octave layout. More...
|
class | Keypoint |
| A keypoint class used on the GPU. More...
|
class | GpuIntegralImage |
| The integral image on the device. More...
|
class | GpuIntegralImageProcessor |
| A class that reserves memory on the GPU for creating integral images. More...
|
class | GpuSurfDetectorInternal |
| The private implementation of the GPU surf detector. More...
|
class | GpuSurfFeatures |
| Memory required for SURF features on the GPU. More...
|
class | GpuSurfOctave |
| A class that reserves memory on the GPU for processing the interest point operator. More...
|
Enumerations |
enum | fh_component {
FH_DXX,
FH_DYY,
FH_DXX_DYY,
FH_DXY,
FH_CENSURE,
FH_RESULT,
FH_RESULT_BIT_SET,
FH_LINEAR_IDX,
FH_HIDX_X_IDX,
FH_HIDX_Y_IDX,
FH_FSCALE,
FH_X,
FH_Y
} |
enum | KeypointLayout {
SF_X,
SF_Y,
SF_SIZE,
SF_RESPONSE,
SF_ANGLE,
SF_OCTAVE,
SF_FEATURE_STRIDE
} |
Functions |
__global__ void | compute_descriptors_kernel (float *d_descriptors, Keypoint *d_features) |
__global__ void | normalize_descriptors_kernel (float *d_descriptors) |
void | compute_descriptors (float *d_descriptors, Keypoint *d_features, int nFeaturesFound) |
void | run_surf_detector (float *d_hessianBuffer, GpuSurfOctave &octave, int octaveIdx, GpuSurfFeatures &features, float threshold, int fh_x_threads, int fh_y_threads, int nonmax_x_threads, int nonmax_y_threads) |
__device__ float | evalDyyByArea (float x, float y, float t, float mask_width, float mask_height, float fscale) |
__device__ float | evalDxxByArea (float x, float y, float t, float mask_width, float mask_height, float fscale) |
__device__ float | evalDxy (float x, float y, float fscale, int octave) |
__global__ void | fasthessian_kernel (float *d_hessian, int octave) |
__global__ void | eval_component_kernel (float *d_hessian, int octave, int component) |
void | run_fasthessian_kernel (dim3 grid, dim3 threads, float *d_hessian, int octave) |
void | run_eval_component_kernel (dim3 grid, dim3 threads, float *d_hessian, int octave, fh_component comp) |
void | fh_untexturizeIntegral () |
void | texturize_integral_image (cudaArray *intImg) |
float | iiAreaLookupC (cudaArray *image, float cx, float cy, float width, float height) |
cudaError_t | texturize_integral_image_c (cudaArray *intImg) |
cudaError_t | fh_untexturizeIntegral_c () |
__device__ float | iiAreaLookupCDHalfWH (float cx, float cy, float halfWidth, float halfHeight) |
__device__ float | iiAreaLookupCD (float cx, float cy, float width, float height) |
__global__ void | iiAreaLookupCDKernel (float *d_result, float cx, float cy, float width, float height) |
void | run_iiAreaLookupCDKernel (dim3 grid, dim3 threads, float *d_result, float cx, float cy, float width, float height) |
void | init_globals (int imWidth, int imHeight, GpuSurfOctave *octaves, int nOctaves) |
bool & | get_s_initialized () |
int & | get_s_initWidth () |
int & | get_s_initHeight () |
SurfOctaveParameters * | get_d_octave_params () |
float * | get_d_hessian_scale () |
int * | get_d_hessian_stride () |
void | call_integral_kernel (size_t width, size_t height, unsigned char *char_data, float *norm_data, float *trans_data, cudaArray *output_int_img, size_t norm_pitch, size_t trans_pitch, size_t char_pitch, CUDPPHandle &rowPlan, CUDPPHandle &colPlan, cudaStream_t stream) |
__device__ void | convert_dev (float &out, unsigned char in) |
__device__ void | convert_dev (float &out, float in) |
template<typename T > |
__global__ void | transpose_kernel (float *odata, size_t o_pitch, T *idata, size_t i_pitch, size_t width, size_t height) |
void | run_transpose_kernel_uchar (dim3 grid, dim3 block, float *odata, size_t o_pitch, unsigned char *idata, size_t i_pitch, size_t width, size_t height) |
void | run_transpose_kernel_float (dim3 grid, dim3 block, float *odata, size_t o_pitch, float *idata, size_t i_pitch, size_t width, size_t height) |
__global__ void | fh_interp_extremum (float *d_hessian, Keypoint *d_features, int4 *d_maxmin, unsigned int *d_feature_counter, unsigned int *d_max_min_counter) |
void | run_fh_interp_extremum (float *d_hessian, Keypoint *d_features, int4 *d_maxmin, unsigned int *d_feature_counter, unsigned int *d_max_min_counter) |
__global__ void | surf_nonmaxonly_kernel (float *d_hessian, int octave, int4 *d_maxmin, unsigned int *d_maxmin_counter, float threshold) |
void | run_surf_nonmaxonly_kernel (dim3 grid, dim3 threads, size_t sharedBytes, float *d_hessian, int octave, int4 *d_maxmin, unsigned int *d_maxmin_counter, float threshold) |
__device__ void | swap (float3 &a, float3 &b) |
__device__ void | setMaxXZ (float3 &dest, float3 &comp) |
__device__ void | setSumXY (float2 &dest, float2 &src) |
__device__ void | setMaxZ3 (float3 &dest, float3 &comp) |
__global__ void | find_orientation_fast_kernel (Keypoint *d_features) |
__global__ void | find_orientation_kernel (Keypoint *d_features) |
void | find_orientation (Keypoint *d_features, int nFeatures) |
void | find_orientation_fast (Keypoint *d_features, int nFeatures) |
Variables |
__constant__ float | dc_3p3gauss1D [20] |
bool | s_initialized = false |
int | s_initWidth = -1 |
int | s_initHeight = -1 |
__constant__ SurfOctaveParameters | d_octave_params [ASRL_SURF_MAX_OCTAVES] |
__constant__ float | d_hessian_scale [ASRL_SURF_MAX_INTERVALS *ASRL_SURF_MAX_OCTAVES] |
__constant__ int | d_hessian_stride [1] |
__shared__ float | fh_vals [] |
__constant__ float | dc_gauss1D [13] = {0.002215924206f, 0.008764150247f, 0.026995483257f, 0.064758797833f, 0.120985362260f, 0.176032663382f, 0.199471140201f, 0.176032663382f, 0.120985362260f, 0.064758797833f, 0.026995483257f, 0.008764150247f, 0.002215924206f} |
Enumeration Type Documentation
An enum specifying the components of the fasthessian algorithm used in debugging.
- Enumerator:
FH_DXX |
|
FH_DYY |
|
FH_DXX_DYY |
|
FH_DXY |
|
FH_CENSURE |
|
FH_RESULT |
|
FH_RESULT_BIT_SET |
|
FH_LINEAR_IDX |
|
FH_HIDX_X_IDX |
|
FH_HIDX_Y_IDX |
|
FH_FSCALE |
|
FH_X |
|
FH_Y |
|
Definition at line 62 of file fasthessian.h.
The layout of a keypoint so the elements may be grabbed as an array.
- Enumerator:
SF_X |
|
SF_Y |
|
SF_SIZE |
|
SF_RESPONSE |
|
SF_ANGLE |
|
SF_OCTAVE |
|
SF_FEATURE_STRIDE |
|
Definition at line 142 of file gpu_globals.h.
Function Documentation
void asrl::call_integral_kernel |
( |
size_t |
width, |
|
|
size_t |
height, |
|
|
unsigned char * |
char_data, |
|
|
float * |
norm_data, |
|
|
float * |
trans_data, |
|
|
cudaArray * |
output_int_img, |
|
|
size_t |
norm_pitch, |
|
|
size_t |
trans_pitch, |
|
|
size_t |
char_pitch, |
|
|
CUDPPHandle & |
rowPlan, |
|
|
CUDPPHandle & |
colPlan, |
|
|
cudaStream_t |
stream | |
|
) |
| | |
The C interface to the CUDA kernel that builds the integral image
- Parameters:
-
| width | the width of the input image |
| height | the height of the input image |
| char_data | a device pointer to the input image |
| norm_data | a device pointer for intermediate storage |
| trans_data | a device pointer for intermediate storage |
| output_int_img | a device pointer to the output buffer for the integral image |
| norm_pitch | the pitch of the rows of the image array in the standard orientation |
| trans_pitch | the pitch of the rows of the transposed image array |
| char_pitch | the pitch of the rows of the character array |
| rowPlan | a handle to the CUDPP plan structure governing the row scan operation |
| colPlan | a handle to the CUDPP plan structure governing the column scan operation |
| stream | An optional cuda stream. |
void asrl::compute_descriptors |
( |
float * |
d_descriptors, |
|
|
Keypoint * |
d_features, |
|
|
int |
nFeaturesFound | |
|
) |
| | |
A function to calculate SURF descriptors from keypoints. The keypoint location, orientation and scale are used in this function.
- Parameters:
-
| d_descriptors | The device pointer to descriptor memory. |
| d_features | The device pointer to feature memory. |
| nFeaturesFound | The number of features to be described. Features should be arranged linearly on the GPU |
__global__ void asrl::compute_descriptors_kernel |
( |
float * |
d_descriptors, |
|
|
Keypoint * |
d_features | |
|
) |
| | |
__device__ void asrl::convert_dev |
( |
float & |
out, |
|
|
float |
in | |
|
) |
| | |
__device__ void asrl::convert_dev |
( |
float & |
out, |
|
|
unsigned char |
in | |
|
) |
| | |
__global__ void asrl::eval_component_kernel |
( |
float * |
d_hessian, |
|
|
int |
octave, |
|
|
int |
component | |
|
) |
| | |
__device__ float asrl::evalDxxByArea |
( |
float |
x, |
|
|
float |
y, |
|
|
float |
t, |
|
|
float |
mask_width, |
|
|
float |
mask_height, |
|
|
float |
fscale | |
|
) |
| | |
__device__ float asrl::evalDxy |
( |
float |
x, |
|
|
float |
y, |
|
|
float |
fscale, |
|
|
int |
octave | |
|
) |
| | |
__device__ float asrl::evalDyyByArea |
( |
float |
x, |
|
|
float |
y, |
|
|
float |
t, |
|
|
float |
mask_width, |
|
|
float |
mask_height, |
|
|
float |
fscale | |
|
) |
| | |
__global__ void asrl::fasthessian_kernel |
( |
float * |
d_hessian, |
|
|
int |
octave | |
|
) |
| | |
__global__ void asrl::fh_interp_extremum |
( |
float * |
d_hessian, |
|
|
Keypoint * |
d_features, |
|
|
int4 * |
d_maxmin, |
|
|
unsigned int * |
d_feature_counter, |
|
|
unsigned int * |
d_max_min_counter | |
|
) |
| | |
void asrl::fh_untexturizeIntegral |
( |
|
) |
|
Remove the integral image texture association.
cudaError_t asrl::fh_untexturizeIntegral_c |
( |
|
) |
|
Remove the integral image texture association. This version throws an exception on error
void asrl::find_orientation |
( |
Keypoint * |
d_features, |
|
|
int |
nFeatures | |
|
) |
| | |
Computes the orientation for a list of keypoints. This is the orientation calculation from the original SURF algorithm
- Parameters:
-
| d_features | A device pointer to the keypoint list. |
| nFeatures | The number of features available |
void asrl::find_orientation_fast |
( |
Keypoint * |
d_features, |
|
|
int |
nFeatures | |
|
) |
| | |
Computes the orientation for a list of keypoints. This is an orientation calculation that is 10x faster than the original
- Parameters:
-
| d_features | A device pointer to the keypoint list. |
| nFeatures | The number of features available |
__global__ void asrl::find_orientation_fast_kernel |
( |
Keypoint * |
d_features |
) |
|
__global__ void asrl::find_orientation_kernel |
( |
Keypoint * |
d_features |
) |
|
float * asrl::get_d_hessian_scale |
( |
|
) |
|
- Returns:
- A __constant__ variable at gpusurf.cu file scope that holds the octave scale constants
int * asrl::get_d_hessian_stride |
( |
|
) |
|
- Returns:
- A __constant__ variable at gpusurf.cu file scope that holds the hessian buffer row stride.
- Returns:
- A __constant__ variable at gpusurf.cu file scope that holds octave parameters
int & asrl::get_s_initHeight |
( |
|
) |
|
- Returns:
- A variable at gpusurf.cu file scope that holds the initialized image height
bool & asrl::get_s_initialized |
( |
|
) |
|
- Returns:
- A variable at gpusurf.cu file scope that says if the constant memory has been initialized.
int & asrl::get_s_initWidth |
( |
|
) |
|
- Returns:
- A variable at gpusurf.cu file scope that holds the initialized image width
float asrl::iiAreaLookupC |
( |
cudaArray * |
image, |
|
|
float |
cx, |
|
|
float |
cy, |
|
|
float |
width, |
|
|
float |
height | |
|
) |
| | |
A debugging function that calculates an area using box filters on the GPU. This is mostly used for debugging.
- Parameters:
-
| image | The integral image that the region area will be calculated on |
| cx | The center of the region in horizontal pixels |
| cy | The center of the region in vertical pixels |
| width | The width of the region. |
| height | The height of the region. |
- Returns:
__device__ float asrl::iiAreaLookupCD |
( |
float |
cx, |
|
|
float |
cy, |
|
|
float |
width, |
|
|
float |
height | |
|
) |
| | |
A CUDA device function for looking up the sum of pixels in an area using an integral image. This is accomplished with 4 lookups. Each pixel in an integral image contains the sum of all pixels above and to the left of it. To calculate the sum of pixels within any area, we look up pixels at the corners:
A *-----------* B
| |
| |
| |
C *-----------* D
Area = A - B - C + D
Cuda requires that texture variables are global within file scope so this function uses the d_integralTex variable
- Parameters:
-
| cx | The horizontal pixel coordinates of the center of the area to look up |
| cy | The vertical pixel coordinates of the center of the area to look up |
| width | The width of the area to look up |
| height | The height of the area to look up |
- Returns:
- The area within the region.
Definition at line 132 of file gpu_area.cu.
__device__ float asrl::iiAreaLookupCDHalfWH |
( |
float |
cx, |
|
|
float |
cy, |
|
|
float |
halfWidth, |
|
|
float |
halfHeight | |
|
) |
| | |
A CUDA device function for looking up the sum of pixels in an area using an integral image. This is accomplished with 4 lookups. Each pixel in an integral image contains the sum of all pixels above and to the left of it. To calculate the sum of pixels within any area, we look up pixels at the corners:
A *-----------* B
| |
| |
| |
C *-----------* D
Area = A - B - C + D
Cuda requires that texture variables are global within file scope so this function uses the d_integralTex variable
- Parameters:
-
| cx | The horizontal pixel coordinates of the center of the area to look up |
| cy | The vertical pixel coordinates of the center of the area to look up |
| halfWidth | Half of the width of the area to look up |
| halfHeight | Half of the height of the area to look up |
- Returns:
- The area within the region.
Definition at line 89 of file gpu_area.cu.
__global__ void asrl::iiAreaLookupCDKernel |
( |
float * |
d_result, |
|
|
float |
cx, |
|
|
float |
cy, |
|
|
float |
width, |
|
|
float |
height | |
|
) |
| | |
A simple kernel that looks up one area on the device
- Parameters:
-
| d_result | The device pointer to a single float used to store the result. |
| cx | The horizontal pixel coordinates of the center of the area to look up |
| cy | The vertical pixel coordinates of the center of the area to look up |
| width | The width of the area to look up |
| height | The height of the area to look up |
Definition at line 146 of file gpu_area.cu.
void asrl::init_globals |
( |
int |
imWidth, |
|
|
int |
imHeight, |
|
|
GpuSurfOctave * |
octaves, |
|
|
int |
nOctaves | |
|
) |
| | |
Initialize global variables used by the SURF detector
- Parameters:
-
| imWidth | The width of the integral image |
| imHeight | The height of the integral image |
| octaves | The octave parameters |
__global__ void asrl::normalize_descriptors_kernel |
( |
float * |
d_descriptors |
) |
|
void asrl::run_eval_component_kernel |
( |
dim3 |
grid, |
|
|
dim3 |
threads, |
|
|
float * |
d_hessian, |
|
|
int |
octave, |
|
|
fh_component |
comp | |
|
) |
| | |
Evaluates a component of the SURF fasthessian algorithm at every point in the buffer. Used for debugging.
- Parameters:
-
| grid | The grid configuration |
| threads | The thread configuration |
| d_hessian | The device buffer to store the fast hessian |
| octave | The octave number to be working on |
| comp | The component to return in the octave buffers |
void asrl::run_fasthessian_kernel |
( |
dim3 |
grid, |
|
|
dim3 |
threads, |
|
|
float * |
d_hessian, |
|
|
int |
octave | |
|
) |
| | |
Runs the SURF fast hessian kernel
- Parameters:
-
| grid | The grid configuration |
| threads | The thread configuration |
| d_hessian | The device buffer to store the fast hessian |
| octave | The octave number to be working on |
void asrl::run_fh_interp_extremum |
( |
float * |
d_hessian, |
|
|
Keypoint * |
d_features, |
|
|
int4 * |
d_maxmin, |
|
|
unsigned int * |
d_feature_counter, |
|
|
unsigned int * |
d_max_min_counter | |
|
) |
| | |
Interpolate a set of keypoints on the GPU
- Parameters:
-
| d_hessian | The interest operator buffer |
| d_features | Device buffer for interpolated features |
| d_maxmin | Device buffer for raw features |
| d_feature_counter | Device counter for number of interpolated features |
| d_max_min_counter | Device counter for number of raw features |
| d_threshold | Device pointers to the threshold |
void asrl::run_iiAreaLookupCDKernel |
( |
dim3 |
grid, |
|
|
dim3 |
threads, |
|
|
float * |
d_result, |
|
|
float |
cx, |
|
|
float |
cy, |
|
|
float |
width, |
|
|
float |
height | |
|
) |
| | |
A kernel to calculate the area of a region on the GPU. The integral image must have been previously passed to texturize_integral_image()
- Parameters:
-
| grid | The grid size for the kernel |
| threads | The thread block size for the kernel |
| result | A device pointer where the result will be stored. |
| cx | The center of the region in horizontal pixels |
| cy | The center of the region in vertical pixels |
| width | The width of the region. |
| height | The height of the region. |
- Returns:
void asrl::run_surf_detector |
( |
float * |
d_hessianBuffer, |
|
|
GpuSurfOctave & |
octave, |
|
|
int |
octaveIdx, |
|
|
GpuSurfFeatures & |
features, |
|
|
float |
threshold, |
|
|
int |
fh_x_threads, |
|
|
int |
fh_y_threads, |
|
|
int |
nonmax_x_threads, |
|
|
int |
nonmax_y_threads | |
|
) |
| | |
A Function that runs the gpusurf detector. This includes
-
Computing the interest operator
-
Finding maxima in the 3d buffer
-
Computing subpixel interpolation of the keypoints
- Parameters:
-
| d_hessianBuffer | A device pointer to the buffer where the interest operator results are stored. |
| octave | The parameters of the octave being processed |
| octaveIdx | The index of the octave being processed |
| features | Memory to hold the resultant features. |
| threshold | The threshold being used |
| fh_x_threads | The number of threads per block used to compute the interest operator (dimension 1) |
| fh_y_threads | The number of threads per block used to compute the interest operator (dimension 2) |
| nonmax_x_threads | The number of threads per block used to find maxima (dimension 1) |
| nonmax_y_threads | The number of threads per block used to find maxima (dimension 2) |
void asrl::run_surf_nonmaxonly_kernel |
( |
dim3 |
grid, |
|
|
dim3 |
threads, |
|
|
size_t |
sharedBytes, |
|
|
float * |
d_hessian, |
|
|
int |
octave, |
|
|
int4 * |
d_maxmin, |
|
|
unsigned int * |
d_maxmin_counter, |
|
|
float |
threshold | |
|
) |
| | |
Find maxima within 3D the interest operator buffer
- Parameters:
-
| grid | The grid configuration |
| threads | The thread configuration |
| sharedBytes | The amount of shared memory used by the kernel |
| d_hessian | The device buffer to store the fast hessian |
| octave | The octave number to be working on |
| d_maxmin | Device buffer for raw features |
| d_maxmin_counter | Device counter for number of raw features |
| threshold | The interest operator threshold |
void asrl::run_transpose_kernel_float |
( |
dim3 |
grid, |
|
|
dim3 |
block, |
|
|
float * |
odata, |
|
|
size_t |
o_pitch, |
|
|
float * |
idata, |
|
|
size_t |
i_pitch, |
|
|
size_t |
width, |
|
|
size_t |
height | |
|
) |
| | |
The interface to the float-to-float matrix transpose operation.
- Parameters:
-
| grid | The kernel grid configuration |
| block | The kernel block configuration |
| odata | The output data (device) |
| o_pitch | The output data pitch |
| idata | The input data (device) |
| i_pitch | The input data pitch |
| width | The input data width |
| height | The input data height |
void asrl::run_transpose_kernel_uchar |
( |
dim3 |
grid, |
|
|
dim3 |
block, |
|
|
float * |
odata, |
|
|
size_t |
o_pitch, |
|
|
unsigned char * |
idata, |
|
|
size_t |
i_pitch, |
|
|
size_t |
width, |
|
|
size_t |
height | |
|
) |
| | |
The interface to the unsigned char-to-float matrix transpose operation.
- Parameters:
-
| grid | The kernel grid configuration |
| block | The kernel block configuration |
| odata | The output data (device) |
| o_pitch | The output data pitch |
| idata | The input data (device) |
| i_pitch | The input data pitch |
| width | The input data width |
| height | The input data height |
__device__ void asrl::setMaxXZ |
( |
float3 & |
dest, |
|
|
float3 & |
comp | |
|
) |
| | |
__device__ void asrl::setMaxZ3 |
( |
float3 & |
dest, |
|
|
float3 & |
comp | |
|
) |
| | |
__device__ void asrl::setSumXY |
( |
float2 & |
dest, |
|
|
float2 & |
src | |
|
) |
| | |
__global__ void asrl::surf_nonmaxonly_kernel |
( |
float * |
d_hessian, |
|
|
int |
octave, |
|
|
int4 * |
d_maxmin, |
|
|
unsigned int * |
d_maxmin_counter, |
|
|
float |
threshold | |
|
) |
| | |
__device__ void asrl::swap |
( |
float3 & |
a, |
|
|
float3 & |
b | |
|
) |
| | [inline] |
void asrl::texturize_integral_image |
( |
cudaArray * |
intImg |
) |
|
Store the integral image as a texture on the GPU.
- Parameters:
-
| intImg | The integral image to use as a texture. |
cudaError_t asrl::texturize_integral_image_c |
( |
cudaArray * |
intImg |
) |
|
Store the integral image as a texture on the GPU. This version does not throw an exception on error
- Parameters:
-
| intImg | The integral image to use as a texture. |
template<typename T >
__global__ void asrl::transpose_kernel |
( |
float * |
odata, |
|
|
size_t |
o_pitch, |
|
|
T * |
idata, |
|
|
size_t |
i_pitch, |
|
|
size_t |
width, |
|
|
size_t |
height | |
|
) |
| | [inline] |
Variable Documentation
Initial value: {0.001917811039f, 0.004382549939f, 0.009136246641f, 0.017375153068f, 0.030144587513f,
0.047710056854f, 0.068885910797f, 0.090734146446f, 0.109026229640f, 0.119511889092f,
0.119511889092f, 0.109026229640f, 0.090734146446f, 0.068885910797f, 0.047710056854f,
0.030144587513f, 0.017375153068f, 0.009136246641f, 0.004382549939f, 0.001917811039f}
Definition at line 37 of file descriptors.cu.
__constant__ float asrl::dc_gauss1D[13] = {0.002215924206f, 0.008764150247f, 0.026995483257f, 0.064758797833f, 0.120985362260f, 0.176032663382f, 0.199471140201f, 0.176032663382f, 0.120985362260f, 0.064758797833f, 0.026995483257f, 0.008764150247f, 0.002215924206f} |
Generated on Fri Apr 30 20:06:20 2010 for gpusurf by 1.6.2
|