Autonomous Space Robotics Lab

Speeded Up SURF

UTIAS ASRL

asrl Namespace Reference

Namespaces

namespace  detail

Classes

struct  GpuSurfConfiguration
 A structure representing the GPU surf configuration. More...
class  GpuSurfDetector
 The exported class representing the GPU surf detector. More...
class  CudaSynchronizedMemory
 a class that handles synchronization of memory between the host and device More...
class  SurfOctaveParameters
 A structure which holds the constant parameters that describe an octave layout. More...
class  Keypoint
 A keypoint class used on the GPU. More...
class  GpuIntegralImage
 The integral image on the device. More...
class  GpuIntegralImageProcessor
 A class that reserves memory on the GPU for creating integral images. More...
class  GpuSurfDetectorInternal
 The private implementation of the GPU surf detector. More...
class  GpuSurfFeatures
 Memory required for SURF features on the GPU. More...
class  GpuSurfOctave
 A class that reserves memory on the GPU for processing the interest point operator. More...

Enumerations

enum  fh_component {
  FH_DXX, FH_DYY, FH_DXX_DYY, FH_DXY,
  FH_CENSURE, FH_RESULT, FH_RESULT_BIT_SET, FH_LINEAR_IDX,
  FH_HIDX_X_IDX, FH_HIDX_Y_IDX, FH_FSCALE, FH_X,
  FH_Y
}
enum  KeypointLayout {
  SF_X, SF_Y, SF_SIZE, SF_RESPONSE,
  SF_ANGLE, SF_OCTAVE, SF_FEATURE_STRIDE
}

Functions

__global__ void compute_descriptors_kernel (float *d_descriptors, Keypoint *d_features)
__global__ void normalize_descriptors_kernel (float *d_descriptors)
void compute_descriptors (float *d_descriptors, Keypoint *d_features, int nFeaturesFound)
void run_surf_detector (float *d_hessianBuffer, GpuSurfOctave &octave, int octaveIdx, GpuSurfFeatures &features, float threshold, int fh_x_threads, int fh_y_threads, int nonmax_x_threads, int nonmax_y_threads)
__device__ float evalDyyByArea (float x, float y, float t, float mask_width, float mask_height, float fscale)
__device__ float evalDxxByArea (float x, float y, float t, float mask_width, float mask_height, float fscale)
__device__ float evalDxy (float x, float y, float fscale, int octave)
__global__ void fasthessian_kernel (float *d_hessian, int octave)
__global__ void eval_component_kernel (float *d_hessian, int octave, int component)
void run_fasthessian_kernel (dim3 grid, dim3 threads, float *d_hessian, int octave)
void run_eval_component_kernel (dim3 grid, dim3 threads, float *d_hessian, int octave, fh_component comp)
void fh_untexturizeIntegral ()
void texturize_integral_image (cudaArray *intImg)
float iiAreaLookupC (cudaArray *image, float cx, float cy, float width, float height)
cudaError_t texturize_integral_image_c (cudaArray *intImg)
cudaError_t fh_untexturizeIntegral_c ()
__device__ float iiAreaLookupCDHalfWH (float cx, float cy, float halfWidth, float halfHeight)
__device__ float iiAreaLookupCD (float cx, float cy, float width, float height)
__global__ void iiAreaLookupCDKernel (float *d_result, float cx, float cy, float width, float height)
void run_iiAreaLookupCDKernel (dim3 grid, dim3 threads, float *d_result, float cx, float cy, float width, float height)
void init_globals (int imWidth, int imHeight, GpuSurfOctave *octaves, int nOctaves)
bool & get_s_initialized ()
int & get_s_initWidth ()
int & get_s_initHeight ()
SurfOctaveParametersget_d_octave_params ()
float * get_d_hessian_scale ()
int * get_d_hessian_stride ()
void call_integral_kernel (size_t width, size_t height, unsigned char *char_data, float *norm_data, float *trans_data, cudaArray *output_int_img, size_t norm_pitch, size_t trans_pitch, size_t char_pitch, CUDPPHandle &rowPlan, CUDPPHandle &colPlan, cudaStream_t stream)
__device__ void convert_dev (float &out, unsigned char in)
__device__ void convert_dev (float &out, float in)
template<typename T >
__global__ void transpose_kernel (float *odata, size_t o_pitch, T *idata, size_t i_pitch, size_t width, size_t height)
void run_transpose_kernel_uchar (dim3 grid, dim3 block, float *odata, size_t o_pitch, unsigned char *idata, size_t i_pitch, size_t width, size_t height)
void run_transpose_kernel_float (dim3 grid, dim3 block, float *odata, size_t o_pitch, float *idata, size_t i_pitch, size_t width, size_t height)
__global__ void fh_interp_extremum (float *d_hessian, Keypoint *d_features, int4 *d_maxmin, unsigned int *d_feature_counter, unsigned int *d_max_min_counter)
void run_fh_interp_extremum (float *d_hessian, Keypoint *d_features, int4 *d_maxmin, unsigned int *d_feature_counter, unsigned int *d_max_min_counter)
__global__ void surf_nonmaxonly_kernel (float *d_hessian, int octave, int4 *d_maxmin, unsigned int *d_maxmin_counter, float threshold)
void run_surf_nonmaxonly_kernel (dim3 grid, dim3 threads, size_t sharedBytes, float *d_hessian, int octave, int4 *d_maxmin, unsigned int *d_maxmin_counter, float threshold)
__device__ void swap (float3 &a, float3 &b)
__device__ void setMaxXZ (float3 &dest, float3 &comp)
__device__ void setSumXY (float2 &dest, float2 &src)
__device__ void setMaxZ3 (float3 &dest, float3 &comp)
__global__ void find_orientation_fast_kernel (Keypoint *d_features)
__global__ void find_orientation_kernel (Keypoint *d_features)
void find_orientation (Keypoint *d_features, int nFeatures)
void find_orientation_fast (Keypoint *d_features, int nFeatures)

Variables

__constant__ float dc_3p3gauss1D [20]
bool s_initialized = false
int s_initWidth = -1
int s_initHeight = -1
__constant__ SurfOctaveParameters d_octave_params [ASRL_SURF_MAX_OCTAVES]
__constant__ float d_hessian_scale [ASRL_SURF_MAX_INTERVALS *ASRL_SURF_MAX_OCTAVES]
__constant__ int d_hessian_stride [1]
__shared__ float fh_vals []
__constant__ float dc_gauss1D [13] = {0.002215924206f, 0.008764150247f, 0.026995483257f, 0.064758797833f, 0.120985362260f, 0.176032663382f, 0.199471140201f, 0.176032663382f, 0.120985362260f, 0.064758797833f, 0.026995483257f, 0.008764150247f, 0.002215924206f}

Enumeration Type Documentation

An enum specifying the components of the fasthessian algorithm used in debugging.

Enumerator:
FH_DXX 
FH_DYY 
FH_DXX_DYY 
FH_DXY 
FH_CENSURE 
FH_RESULT 
FH_RESULT_BIT_SET 
FH_LINEAR_IDX 
FH_HIDX_X_IDX 
FH_HIDX_Y_IDX 
FH_FSCALE 
FH_X 
FH_Y 

Definition at line 62 of file fasthessian.h.

The layout of a keypoint so the elements may be grabbed as an array.

Enumerator:
SF_X 
SF_Y 
SF_SIZE 
SF_RESPONSE 
SF_ANGLE 
SF_OCTAVE 
SF_FEATURE_STRIDE 

Definition at line 142 of file gpu_globals.h.


Function Documentation

void asrl::call_integral_kernel ( size_t  width,
size_t  height,
unsigned char *  char_data,
float *  norm_data,
float *  trans_data,
cudaArray *  output_int_img,
size_t  norm_pitch,
size_t  trans_pitch,
size_t  char_pitch,
CUDPPHandle rowPlan,
CUDPPHandle colPlan,
cudaStream_t  stream 
)

The C interface to the CUDA kernel that builds the integral image

Parameters:
width the width of the input image
height the height of the input image
char_data a device pointer to the input image
norm_data a device pointer for intermediate storage
trans_data a device pointer for intermediate storage
output_int_img a device pointer to the output buffer for the integral image
norm_pitch the pitch of the rows of the image array in the standard orientation
trans_pitch the pitch of the rows of the transposed image array
char_pitch the pitch of the rows of the character array
rowPlan a handle to the CUDPP plan structure governing the row scan operation
colPlan a handle to the CUDPP plan structure governing the column scan operation
stream An optional cuda stream.
void asrl::compute_descriptors ( float *  d_descriptors,
Keypoint *  d_features,
int  nFeaturesFound 
)

A function to calculate SURF descriptors from keypoints. The keypoint location, orientation and scale are used in this function.

Parameters:
d_descriptors The device pointer to descriptor memory.
d_features The device pointer to feature memory.
nFeaturesFound The number of features to be described. Features should be arranged linearly on the GPU
__global__ void asrl::compute_descriptors_kernel ( float *  d_descriptors,
Keypoint *  d_features 
)

Definition at line 45 of file descriptors.cu.

__device__ void asrl::convert_dev ( float &  out,
float  in 
)

Definition at line 37 of file GpuIntegralImage_kernel.cu.

__device__ void asrl::convert_dev ( float &  out,
unsigned char  in 
)

Definition at line 36 of file GpuIntegralImage_kernel.cu.

__global__ void asrl::eval_component_kernel ( float *  d_hessian,
int  octave,
int  component 
)

Definition at line 125 of file fasthessian.cu.

__device__ float asrl::evalDxxByArea ( float  x,
float  y,
float  t,
float  mask_width,
float  mask_height,
float  fscale 
)

Definition at line 49 of file fasthessian.cu.

__device__ float asrl::evalDxy ( float  x,
float  y,
float  fscale,
int  octave 
)

Definition at line 61 of file fasthessian.cu.

__device__ float asrl::evalDyyByArea ( float  x,
float  y,
float  t,
float  mask_width,
float  mask_height,
float  fscale 
)

Definition at line 38 of file fasthessian.cu.

__global__ void asrl::fasthessian_kernel ( float *  d_hessian,
int  octave 
)

Definition at line 77 of file fasthessian.cu.

__global__ void asrl::fh_interp_extremum ( float *  d_hessian,
Keypoint *  d_features,
int4 *  d_maxmin,
unsigned int *  d_feature_counter,
unsigned int *  d_max_min_counter 
)

Definition at line 37 of file keypoint_interpolation.cu.

void asrl::fh_untexturizeIntegral (  ) 

Remove the integral image texture association.

cudaError_t asrl::fh_untexturizeIntegral_c (  ) 

Remove the integral image texture association. This version throws an exception on error

void asrl::find_orientation ( Keypoint *  d_features,
int  nFeatures 
)

Computes the orientation for a list of keypoints. This is the orientation calculation from the original SURF algorithm

Parameters:
d_features A device pointer to the keypoint list.
nFeatures The number of features available
void asrl::find_orientation_fast ( Keypoint *  d_features,
int  nFeatures 
)

Computes the orientation for a list of keypoints. This is an orientation calculation that is 10x faster than the original

Parameters:
d_features A device pointer to the keypoint list.
nFeatures The number of features available
__global__ void asrl::find_orientation_fast_kernel ( Keypoint *  d_features  ) 

Definition at line 72 of file orientation.cu.

__global__ void asrl::find_orientation_kernel ( Keypoint *  d_features  ) 

Definition at line 170 of file orientation.cu.

float * asrl::get_d_hessian_scale (  ) 
Returns:
A __constant__ variable at gpusurf.cu file scope that holds the octave scale constants
int * asrl::get_d_hessian_stride (  ) 
Returns:
A __constant__ variable at gpusurf.cu file scope that holds the hessian buffer row stride.
SurfOctaveParameters * asrl::get_d_octave_params (  ) 
Returns:
A __constant__ variable at gpusurf.cu file scope that holds octave parameters
int & asrl::get_s_initHeight (  ) 
Returns:
A variable at gpusurf.cu file scope that holds the initialized image height
bool & asrl::get_s_initialized (  ) 
Returns:
A variable at gpusurf.cu file scope that says if the constant memory has been initialized.
int & asrl::get_s_initWidth (  ) 
Returns:
A variable at gpusurf.cu file scope that holds the initialized image width
float asrl::iiAreaLookupC ( cudaArray *  image,
float  cx,
float  cy,
float  width,
float  height 
)

A debugging function that calculates an area using box filters on the GPU. This is mostly used for debugging.

Parameters:
image The integral image that the region area will be calculated on
cx The center of the region in horizontal pixels
cy The center of the region in vertical pixels
width The width of the region.
height The height of the region.
Returns:
__device__ float asrl::iiAreaLookupCD ( float  cx,
float  cy,
float  width,
float  height 
)

A CUDA device function for looking up the sum of pixels in an area using an integral image. This is accomplished with 4 lookups. Each pixel in an integral image contains the sum of all pixels above and to the left of it. To calculate the sum of pixels within any area, we look up pixels at the corners:

            A *-----------* B
              |           |
              |           |
              |           |
            C *-----------* D
    

Area = A - B - C + D

Cuda requires that texture variables are global within file scope so this function uses the d_integralTex variable

Parameters:
cx The horizontal pixel coordinates of the center of the area to look up
cy The vertical pixel coordinates of the center of the area to look up
width The width of the area to look up
height The height of the area to look up
Returns:
The area within the region.

Definition at line 132 of file gpu_area.cu.

__device__ float asrl::iiAreaLookupCDHalfWH ( float  cx,
float  cy,
float  halfWidth,
float  halfHeight 
)

A CUDA device function for looking up the sum of pixels in an area using an integral image. This is accomplished with 4 lookups. Each pixel in an integral image contains the sum of all pixels above and to the left of it. To calculate the sum of pixels within any area, we look up pixels at the corners:

            A *-----------* B
              |           |
              |           |
              |           |
            C *-----------* D
    

Area = A - B - C + D

Cuda requires that texture variables are global within file scope so this function uses the d_integralTex variable

Parameters:
cx The horizontal pixel coordinates of the center of the area to look up
cy The vertical pixel coordinates of the center of the area to look up
halfWidth Half of the width of the area to look up
halfHeight Half of the height of the area to look up
Returns:
The area within the region.

Definition at line 89 of file gpu_area.cu.

__global__ void asrl::iiAreaLookupCDKernel ( float *  d_result,
float  cx,
float  cy,
float  width,
float  height 
)

A simple kernel that looks up one area on the device

Parameters:
d_result The device pointer to a single float used to store the result.
cx The horizontal pixel coordinates of the center of the area to look up
cy The vertical pixel coordinates of the center of the area to look up
width The width of the area to look up
height The height of the area to look up

Definition at line 146 of file gpu_area.cu.

void asrl::init_globals ( int  imWidth,
int  imHeight,
GpuSurfOctave *  octaves,
int  nOctaves 
)

Initialize global variables used by the SURF detector

Parameters:
imWidth The width of the integral image
imHeight The height of the integral image
octaves The octave parameters
__global__ void asrl::normalize_descriptors_kernel ( float *  d_descriptors  ) 

Definition at line 208 of file descriptors.cu.

void asrl::run_eval_component_kernel ( dim3  grid,
dim3  threads,
float *  d_hessian,
int  octave,
fh_component  comp 
)

Evaluates a component of the SURF fasthessian algorithm at every point in the buffer. Used for debugging.

Parameters:
grid The grid configuration
threads The thread configuration
d_hessian The device buffer to store the fast hessian
octave The octave number to be working on
comp The component to return in the octave buffers
void asrl::run_fasthessian_kernel ( dim3  grid,
dim3  threads,
float *  d_hessian,
int  octave 
)

Runs the SURF fast hessian kernel

Parameters:
grid The grid configuration
threads The thread configuration
d_hessian The device buffer to store the fast hessian
octave The octave number to be working on
void asrl::run_fh_interp_extremum ( float *  d_hessian,
Keypoint *  d_features,
int4 *  d_maxmin,
unsigned int *  d_feature_counter,
unsigned int *  d_max_min_counter 
)

Interpolate a set of keypoints on the GPU

Parameters:
d_hessian The interest operator buffer
d_features Device buffer for interpolated features
d_maxmin Device buffer for raw features
d_feature_counter Device counter for number of interpolated features
d_max_min_counter Device counter for number of raw features
d_threshold Device pointers to the threshold
void asrl::run_iiAreaLookupCDKernel ( dim3  grid,
dim3  threads,
float *  d_result,
float  cx,
float  cy,
float  width,
float  height 
)

A kernel to calculate the area of a region on the GPU. The integral image must have been previously passed to texturize_integral_image()

Parameters:
grid The grid size for the kernel
threads The thread block size for the kernel
result A device pointer where the result will be stored.
cx The center of the region in horizontal pixels
cy The center of the region in vertical pixels
width The width of the region.
height The height of the region.
Returns:
void asrl::run_surf_detector ( float *  d_hessianBuffer,
GpuSurfOctave &  octave,
int  octaveIdx,
GpuSurfFeatures &  features,
float  threshold,
int  fh_x_threads,
int  fh_y_threads,
int  nonmax_x_threads,
int  nonmax_y_threads 
)

A Function that runs the gpusurf detector. This includes

  1. Computing the interest operator
  2. Finding maxima in the 3d buffer
  3. Computing subpixel interpolation of the keypoints
Parameters:
d_hessianBuffer A device pointer to the buffer where the interest operator results are stored.
octave The parameters of the octave being processed
octaveIdx The index of the octave being processed
features Memory to hold the resultant features.
threshold The threshold being used
fh_x_threads The number of threads per block used to compute the interest operator (dimension 1)
fh_y_threads The number of threads per block used to compute the interest operator (dimension 2)
nonmax_x_threads The number of threads per block used to find maxima (dimension 1)
nonmax_y_threads The number of threads per block used to find maxima (dimension 2)
void asrl::run_surf_nonmaxonly_kernel ( dim3  grid,
dim3  threads,
size_t  sharedBytes,
float *  d_hessian,
int  octave,
int4 *  d_maxmin,
unsigned int *  d_maxmin_counter,
float  threshold 
)

Find maxima within 3D the interest operator buffer

Parameters:
grid The grid configuration
threads The thread configuration
sharedBytes The amount of shared memory used by the kernel
d_hessian The device buffer to store the fast hessian
octave The octave number to be working on
d_maxmin Device buffer for raw features
d_maxmin_counter Device counter for number of raw features
threshold The interest operator threshold
void asrl::run_transpose_kernel_float ( dim3  grid,
dim3  block,
float *  odata,
size_t  o_pitch,
float *  idata,
size_t  i_pitch,
size_t  width,
size_t  height 
)

The interface to the float-to-float matrix transpose operation.

Parameters:
grid The kernel grid configuration
block The kernel block configuration
odata The output data (device)
o_pitch The output data pitch
idata The input data (device)
i_pitch The input data pitch
width The input data width
height The input data height
void asrl::run_transpose_kernel_uchar ( dim3  grid,
dim3  block,
float *  odata,
size_t  o_pitch,
unsigned char *  idata,
size_t  i_pitch,
size_t  width,
size_t  height 
)

The interface to the unsigned char-to-float matrix transpose operation.

Parameters:
grid The kernel grid configuration
block The kernel block configuration
odata The output data (device)
o_pitch The output data pitch
idata The input data (device)
i_pitch The input data pitch
width The input data width
height The input data height
__device__ void asrl::setMaxXZ ( float3 &  dest,
float3 &  comp 
)

Definition at line 49 of file orientation.cu.

__device__ void asrl::setMaxZ3 ( float3 &  dest,
float3 &  comp 
)

Definition at line 63 of file orientation.cu.

__device__ void asrl::setSumXY ( float2 &  dest,
float2 &  src 
)

Definition at line 57 of file orientation.cu.

__global__ void asrl::surf_nonmaxonly_kernel ( float *  d_hessian,
int  octave,
int4 *  d_maxmin,
unsigned int *  d_maxmin_counter,
float  threshold 
)

Definition at line 36 of file non_max_suppression.cu.

__device__ void asrl::swap ( float3 &  a,
float3 &  b 
) [inline]

Definition at line 42 of file orientation.cu.

void asrl::texturize_integral_image ( cudaArray *  intImg  ) 

Store the integral image as a texture on the GPU.

Parameters:
intImg The integral image to use as a texture.
cudaError_t asrl::texturize_integral_image_c ( cudaArray *  intImg  ) 

Store the integral image as a texture on the GPU. This version does not throw an exception on error

Parameters:
intImg The integral image to use as a texture.
template<typename T >
__global__ void asrl::transpose_kernel ( float *  odata,
size_t  o_pitch,
T *  idata,
size_t  i_pitch,
size_t  width,
size_t  height 
) [inline]

Definition at line 42 of file GpuIntegralImage_kernel.cu.


Variable Documentation

__constant__ float asrl::d_hessian_scale[ASRL_SURF_MAX_INTERVALS *ASRL_SURF_MAX_OCTAVES]

Definition at line 39 of file gpu_globals.cu.

__constant__ int asrl::d_hessian_stride[1]

Definition at line 40 of file gpu_globals.cu.

__constant__ SurfOctaveParameters asrl::d_octave_params[ASRL_SURF_MAX_OCTAVES]

Definition at line 38 of file gpu_globals.cu.

__constant__ float asrl::dc_3p3gauss1D[20]
Initial value:
 {0.001917811039f, 0.004382549939f, 0.009136246641f, 0.017375153068f, 0.030144587513f,
                                          0.047710056854f, 0.068885910797f, 0.090734146446f, 0.109026229640f, 0.119511889092f,
                                          0.119511889092f, 0.109026229640f, 0.090734146446f, 0.068885910797f, 0.047710056854f,
                                          0.030144587513f, 0.017375153068f, 0.009136246641f, 0.004382549939f, 0.001917811039f}

Definition at line 37 of file descriptors.cu.

__constant__ float asrl::dc_gauss1D[13] = {0.002215924206f, 0.008764150247f, 0.026995483257f, 0.064758797833f, 0.120985362260f, 0.176032663382f, 0.199471140201f, 0.176032663382f, 0.120985362260f, 0.064758797833f, 0.026995483257f, 0.008764150247f, 0.002215924206f}

Definition at line 40 of file orientation.cu.

__shared__ float asrl::fh_vals[]

Definition at line 37 of file gpu_globals.cu.

bool asrl::s_initialized = false

Definition at line 35 of file gpu_globals.cu.

Definition at line 36 of file gpu_globals.cu.


Generated on Fri Apr 30 20:06:20 2010 for gpusurf by doxygen 1.6.2