asrl Namespace Reference

Namespaces
namespace	detail
Classes
struct	GpuSurfConfiguration
	A structure representing the GPU surf configuration. More...
class	GpuSurfDetector
	The exported class representing the GPU surf detector. More...
class	CudaSynchronizedMemory
	a class that handles synchronization of memory between the host and device More...
class	SurfOctaveParameters
	A structure which holds the constant parameters that describe an octave layout. More...
class	Keypoint
	A keypoint class used on the GPU. More...
class	GpuIntegralImage
	The integral image on the device. More...
class	GpuIntegralImageProcessor
	A class that reserves memory on the GPU for creating integral images. More...
class	GpuSurfDetectorInternal
	The private implementation of the GPU surf detector. More...
class	GpuSurfFeatures
	Memory required for SURF features on the GPU. More...
class	GpuSurfOctave
	A class that reserves memory on the GPU for processing the interest point operator. More...
Enumerations
enum	fh_component { FH_DXX, FH_DYY, FH_DXX_DYY, FH_DXY, FH_CENSURE, FH_RESULT, FH_RESULT_BIT_SET, FH_LINEAR_IDX, FH_HIDX_X_IDX, FH_HIDX_Y_IDX, FH_FSCALE, FH_X, FH_Y }
enum	KeypointLayout { SF_X, SF_Y, SF_SIZE, SF_RESPONSE, SF_ANGLE, SF_OCTAVE, SF_FEATURE_STRIDE }
Functions
__global__ void	compute_descriptors_kernel (float d_descriptors, Keypoint d_features)
__global__ void	normalize_descriptors_kernel (float *d_descriptors)
void	compute_descriptors (float d_descriptors, Keypoint d_features, int nFeaturesFound)
void	run_surf_detector (float *d_hessianBuffer, GpuSurfOctave &octave, int octaveIdx, GpuSurfFeatures &features, float threshold, int fh_x_threads, int fh_y_threads, int nonmax_x_threads, int nonmax_y_threads)
__device__ float	evalDyyByArea (float x, float y, float t, float mask_width, float mask_height, float fscale)
__device__ float	evalDxxByArea (float x, float y, float t, float mask_width, float mask_height, float fscale)
__device__ float	evalDxy (float x, float y, float fscale, int octave)
__global__ void	fasthessian_kernel (float *d_hessian, int octave)
__global__ void	eval_component_kernel (float *d_hessian, int octave, int component)
void	run_fasthessian_kernel (dim3 grid, dim3 threads, float *d_hessian, int octave)
void	run_eval_component_kernel (dim3 grid, dim3 threads, float *d_hessian, int octave, fh_component comp)
void	fh_untexturizeIntegral ()
void	texturize_integral_image (cudaArray *intImg)
float	iiAreaLookupC (cudaArray *image, float cx, float cy, float width, float height)
cudaError_t	texturize_integral_image_c (cudaArray *intImg)
cudaError_t	fh_untexturizeIntegral_c ()
__device__ float	iiAreaLookupCDHalfWH (float cx, float cy, float halfWidth, float halfHeight)
__device__ float	iiAreaLookupCD (float cx, float cy, float width, float height)
__global__ void	iiAreaLookupCDKernel (float *d_result, float cx, float cy, float width, float height)
void	run_iiAreaLookupCDKernel (dim3 grid, dim3 threads, float *d_result, float cx, float cy, float width, float height)
void	init_globals (int imWidth, int imHeight, GpuSurfOctave *octaves, int nOctaves)
bool &	get_s_initialized ()
int &	get_s_initWidth ()
int &	get_s_initHeight ()
SurfOctaveParameters *	get_d_octave_params ()
float *	get_d_hessian_scale ()
int *	get_d_hessian_stride ()
void	call_integral_kernel (size_t width, size_t height, unsigned char char_data, float norm_data, float trans_data, cudaArray output_int_img, size_t norm_pitch, size_t trans_pitch, size_t char_pitch, CUDPPHandle &rowPlan, CUDPPHandle &colPlan, cudaStream_t stream)
__device__ void	convert_dev (float &out, unsigned char in)
__device__ void	convert_dev (float &out, float in)
template<typename T >
__global__ void	transpose_kernel (float odata, size_t o_pitch, T idata, size_t i_pitch, size_t width, size_t height)
void	run_transpose_kernel_uchar (dim3 grid, dim3 block, float odata, size_t o_pitch, unsigned char idata, size_t i_pitch, size_t width, size_t height)
void	run_transpose_kernel_float (dim3 grid, dim3 block, float odata, size_t o_pitch, float idata, size_t i_pitch, size_t width, size_t height)
__global__ void	fh_interp_extremum (float d_hessian, Keypoint d_features, int4 d_maxmin, unsigned int d_feature_counter, unsigned int *d_max_min_counter)
void	run_fh_interp_extremum (float d_hessian, Keypoint d_features, int4 d_maxmin, unsigned int d_feature_counter, unsigned int *d_max_min_counter)
__global__ void	surf_nonmaxonly_kernel (float d_hessian, int octave, int4 d_maxmin, unsigned int *d_maxmin_counter, float threshold)
void	run_surf_nonmaxonly_kernel (dim3 grid, dim3 threads, size_t sharedBytes, float d_hessian, int octave, int4 d_maxmin, unsigned int *d_maxmin_counter, float threshold)
__device__ void	swap (float3 &a, float3 &b)
__device__ void	setMaxXZ (float3 &dest, float3 &comp)
__device__ void	setSumXY (float2 &dest, float2 &src)
__device__ void	setMaxZ3 (float3 &dest, float3 &comp)
__global__ void	find_orientation_fast_kernel (Keypoint *d_features)
__global__ void	find_orientation_kernel (Keypoint *d_features)
void	find_orientation (Keypoint *d_features, int nFeatures)
void	find_orientation_fast (Keypoint *d_features, int nFeatures)
Variables
__constant__ float	dc_3p3gauss1D [20]
bool	s_initialized = false
int	s_initWidth = -1
int	s_initHeight = -1
__constant__ SurfOctaveParameters	d_octave_params [ASRL_SURF_MAX_OCTAVES]
__constant__ float	d_hessian_scale [ASRL_SURF_MAX_INTERVALS *ASRL_SURF_MAX_OCTAVES]
__constant__ int	d_hessian_stride [1]
__shared__ float	fh_vals []
__constant__ float	dc_gauss1D [13] = {0.002215924206f, 0.008764150247f, 0.026995483257f, 0.064758797833f, 0.120985362260f, 0.176032663382f, 0.199471140201f, 0.176032663382f, 0.120985362260f, 0.064758797833f, 0.026995483257f, 0.008764150247f, 0.002215924206f}

Enumeration Type Documentation

enum asrl::fh_component

An enum specifying the components of the fasthessian algorithm used in debugging.

Enumerator:

FH_DXX
FH_DYY
FH_DXX_DYY
FH_DXY
FH_CENSURE
FH_RESULT
FH_RESULT_BIT_SET
FH_LINEAR_IDX
FH_HIDX_X_IDX
FH_HIDX_Y_IDX
FH_FSCALE
FH_X
FH_Y

Definition at line 62 of file fasthessian.h.

enum asrl::KeypointLayout

The layout of a keypoint so the elements may be grabbed as an array.

Enumerator:

SF_X
SF_Y
SF_SIZE
SF_RESPONSE
SF_ANGLE
SF_OCTAVE
SF_FEATURE_STRIDE

Definition at line 142 of file gpu_globals.h.

Function Documentation

void asrl::call_integral_kernel	(	size_t	width,
		size_t	height,
		unsigned char *	char_data,
		float *	norm_data,
		float *	trans_data,
		cudaArray *	output_int_img,
		size_t	norm_pitch,
		size_t	trans_pitch,
		size_t	char_pitch,
		CUDPPHandle &	rowPlan,
		CUDPPHandle &	colPlan,
		cudaStream_t	stream
	)

The C interface to the CUDA kernel that builds the integral image

Parameters:

	width	the width of the input image
	height	the height of the input image
	char_data	a device pointer to the input image
	norm_data	a device pointer for intermediate storage
	trans_data	a device pointer for intermediate storage
	output_int_img	a device pointer to the output buffer for the integral image
	norm_pitch	the pitch of the rows of the image array in the standard orientation
	trans_pitch	the pitch of the rows of the transposed image array
	char_pitch	the pitch of the rows of the character array
	rowPlan	a handle to the CUDPP plan structure governing the row scan operation
	colPlan	a handle to the CUDPP plan structure governing the column scan operation
	stream	An optional cuda stream.

void asrl::compute_descriptors	(	float *	d_descriptors,
		Keypoint *	d_features,
		int	nFeaturesFound
	)

A function to calculate SURF descriptors from keypoints. The keypoint location, orientation and scale are used in this function.

Parameters:

	d_descriptors	The device pointer to descriptor memory.
	d_features	The device pointer to feature memory.
	nFeaturesFound	The number of features to be described. Features should be arranged linearly on the GPU

__global__ void asrl::compute_descriptors_kernel	(	float *	d_descriptors,
		Keypoint *	d_features
	)

Definition at line 45 of file descriptors.cu.

__device__ void asrl::convert_dev	(	float &	out,
		float	in
	)

Definition at line 37 of file GpuIntegralImage_kernel.cu.

__device__ void asrl::convert_dev	(	float &	out,
		unsigned char	in
	)

Definition at line 36 of file GpuIntegralImage_kernel.cu.

__global__ void asrl::eval_component_kernel	(	float *	d_hessian,
		int	octave,
		int	component
	)

Definition at line 125 of file fasthessian.cu.

__device__ float asrl::evalDxxByArea	(	float	x,
		float	y,
		float	t,
		float	mask_width,
		float	mask_height,
		float	fscale
	)

Definition at line 49 of file fasthessian.cu.

__device__ float asrl::evalDxy	(	float	x,
		float	y,
		float	fscale,
		int	octave
	)

Definition at line 61 of file fasthessian.cu.

__device__ float asrl::evalDyyByArea	(	float	x,
		float	y,
		float	t,
		float	mask_width,
		float	mask_height,
		float	fscale
	)

Definition at line 38 of file fasthessian.cu.

__global__ void asrl::fasthessian_kernel	(	float *	d_hessian,
		int	octave
	)

Definition at line 77 of file fasthessian.cu.

__global__ void asrl::fh_interp_extremum	(	float *	d_hessian,
		Keypoint *	d_features,
		int4 *	d_maxmin,
		unsigned int *	d_feature_counter,
		unsigned int *	d_max_min_counter
	)

Definition at line 37 of file keypoint_interpolation.cu.

void asrl::fh_untexturizeIntegral ( )

Remove the integral image texture association.

cudaError_t asrl::fh_untexturizeIntegral_c ( )

Remove the integral image texture association. This version throws an exception on error

void asrl::find_orientation	(	Keypoint *	d_features,
		int	nFeatures
	)

Computes the orientation for a list of keypoints. This is the orientation calculation from the original SURF algorithm

Parameters:

	d_features	A device pointer to the keypoint list.
	nFeatures	The number of features available

void asrl::find_orientation_fast	(	Keypoint *	d_features,
		int	nFeatures
	)

Computes the orientation for a list of keypoints. This is an orientation calculation that is 10x faster than the original

Parameters:

	d_features	A device pointer to the keypoint list.
	nFeatures	The number of features available

__global__ void asrl::find_orientation_fast_kernel ( Keypoint * d_features )

Definition at line 72 of file orientation.cu.

__global__ void asrl::find_orientation_kernel ( Keypoint * d_features )

Definition at line 170 of file orientation.cu.

float * asrl::get_d_hessian_scale ( )

Returns:: A __constant__ variable at gpusurf.cu file scope that holds the octave scale constants

int * asrl::get_d_hessian_stride ( )

Returns:: A __constant__ variable at gpusurf.cu file scope that holds the hessian buffer row stride.

SurfOctaveParameters * asrl::get_d_octave_params ( )

Returns:: A __constant__ variable at gpusurf.cu file scope that holds octave parameters

int & asrl::get_s_initHeight ( )

Returns:: A variable at gpusurf.cu file scope that holds the initialized image height

bool & asrl::get_s_initialized ( )

Returns:: A variable at gpusurf.cu file scope that says if the constant memory has been initialized.

int & asrl::get_s_initWidth ( )

Returns:: A variable at gpusurf.cu file scope that holds the initialized image width

float asrl::iiAreaLookupC	(	cudaArray *	image,
		float	cx,
		float	cy,
		float	width,
		float	height
	)

A debugging function that calculates an area using box filters on the GPU. This is mostly used for debugging.

Parameters:

	image	The integral image that the region area will be calculated on
	cx	The center of the region in horizontal pixels
	cy	The center of the region in vertical pixels
	width	The width of the region.
	height	The height of the region.

Returns:

__device__ float asrl::iiAreaLookupCD	(	float	cx,
		float	cy,
		float	width,
		float	height
	)

A CUDA device function for looking up the sum of pixels in an area using an integral image. This is accomplished with 4 lookups. Each pixel in an integral image contains the sum of all pixels above and to the left of it. To calculate the sum of pixels within any area, we look up pixels at the corners:

            A *-----------* B
              |           |
              |           |
              |           |
            C *-----------* D

Area = A - B - C + D

Cuda requires that texture variables are global within file scope so this function uses the d_integralTex variable

Parameters:

	cx	The horizontal pixel coordinates of the center of the area to look up
	cy	The vertical pixel coordinates of the center of the area to look up
	width	The width of the area to look up
	height	The height of the area to look up

Returns:: The area within the region.

Definition at line 132 of file gpu_area.cu.

__device__ float asrl::iiAreaLookupCDHalfWH	(	float	cx,
		float	cy,
		float	halfWidth,
		float	halfHeight
	)

A CUDA device function for looking up the sum of pixels in an area using an integral image. This is accomplished with 4 lookups. Each pixel in an integral image contains the sum of all pixels above and to the left of it. To calculate the sum of pixels within any area, we look up pixels at the corners:

            A *-----------* B
              |           |
              |           |
              |           |
            C *-----------* D

Area = A - B - C + D

Cuda requires that texture variables are global within file scope so this function uses the d_integralTex variable

Parameters:

	cx	The horizontal pixel coordinates of the center of the area to look up
	cy	The vertical pixel coordinates of the center of the area to look up
	halfWidth	Half of the width of the area to look up
	halfHeight	Half of the height of the area to look up

Returns:: The area within the region.

Definition at line 89 of file gpu_area.cu.

__global__ void asrl::iiAreaLookupCDKernel	(	float *	d_result,
		float	cx,
		float	cy,
		float	width,
		float	height
	)

A simple kernel that looks up one area on the device

Parameters:

	d_result	The device pointer to a single float used to store the result.
	cx	The horizontal pixel coordinates of the center of the area to look up
	cy	The vertical pixel coordinates of the center of the area to look up
	width	The width of the area to look up
	height	The height of the area to look up

Definition at line 146 of file gpu_area.cu.

void asrl::init_globals	(	int	imWidth,
		int	imHeight,
		GpuSurfOctave *	octaves,
		int	nOctaves
	)

Initialize global variables used by the SURF detector

Parameters:

	imWidth	The width of the integral image
	imHeight	The height of the integral image
	octaves	The octave parameters

__global__ void asrl::normalize_descriptors_kernel ( float * d_descriptors )

Definition at line 208 of file descriptors.cu.

void asrl::run_eval_component_kernel	(	dim3	grid,
		dim3	threads,
		float *	d_hessian,
		int	octave,
		fh_component	comp
	)

Evaluates a component of the SURF fasthessian algorithm at every point in the buffer. Used for debugging.

Parameters:

	grid	The grid configuration
	threads	The thread configuration
	d_hessian	The device buffer to store the fast hessian
	octave	The octave number to be working on
	comp	The component to return in the octave buffers

void asrl::run_fasthessian_kernel	(	dim3	grid,
		dim3	threads,
		float *	d_hessian,
		int	octave
	)

Runs the SURF fast hessian kernel

Parameters:

	grid	The grid configuration
	threads	The thread configuration
	d_hessian	The device buffer to store the fast hessian
	octave	The octave number to be working on

void asrl::run_fh_interp_extremum	(	float *	d_hessian,
		Keypoint *	d_features,
		int4 *	d_maxmin,
		unsigned int *	d_feature_counter,
		unsigned int *	d_max_min_counter
	)

Interpolate a set of keypoints on the GPU

Parameters:

	d_hessian	The interest operator buffer
	d_features	Device buffer for interpolated features
	d_maxmin	Device buffer for raw features
	d_feature_counter	Device counter for number of interpolated features
	d_max_min_counter	Device counter for number of raw features
	d_threshold	Device pointers to the threshold

void asrl::run_iiAreaLookupCDKernel	(	dim3	grid,
		dim3	threads,
		float *	d_result,
		float	cx,
		float	cy,
		float	width,
		float	height
	)

A kernel to calculate the area of a region on the GPU. The integral image must have been previously passed to texturize_integral_image()

Parameters:

	grid	The grid size for the kernel
	threads	The thread block size for the kernel
	result	A device pointer where the result will be stored.
	cx	The center of the region in horizontal pixels
	cy	The center of the region in vertical pixels
	width	The width of the region.
	height	The height of the region.

Returns:

void asrl::run_surf_detector	(	float *	d_hessianBuffer,
		GpuSurfOctave &	octave,
		int	octaveIdx,
		GpuSurfFeatures &	features,
		float	threshold,
		int	fh_x_threads,
		int	fh_y_threads,
		int	nonmax_x_threads,
		int	nonmax_y_threads
	)

A Function that runs the gpusurf detector. This includes

Computing the interest operator
Finding maxima in the 3d buffer
Computing subpixel interpolation of the keypoints

Parameters:

	d_hessianBuffer	A device pointer to the buffer where the interest operator results are stored.
	octave	The parameters of the octave being processed
	octaveIdx	The index of the octave being processed
	features	Memory to hold the resultant features.
	threshold	The threshold being used
	fh_x_threads	The number of threads per block used to compute the interest operator (dimension 1)
	fh_y_threads	The number of threads per block used to compute the interest operator (dimension 2)
	nonmax_x_threads	The number of threads per block used to find maxima (dimension 1)
	nonmax_y_threads	The number of threads per block used to find maxima (dimension 2)

void asrl::run_surf_nonmaxonly_kernel	(	dim3	grid,
		dim3	threads,
		size_t	sharedBytes,
		float *	d_hessian,
		int	octave,
		int4 *	d_maxmin,
		unsigned int *	d_maxmin_counter,
		float	threshold
	)

Find maxima within 3D the interest operator buffer

Parameters:

	grid	The grid configuration
	threads	The thread configuration
	sharedBytes	The amount of shared memory used by the kernel
	d_hessian	The device buffer to store the fast hessian
	octave	The octave number to be working on
	d_maxmin	Device buffer for raw features
	d_maxmin_counter	Device counter for number of raw features
	threshold	The interest operator threshold

void asrl::run_transpose_kernel_float	(	dim3	grid,
		dim3	block,
		float *	odata,
		size_t	o_pitch,
		float *	idata,
		size_t	i_pitch,
		size_t	width,
		size_t	height
	)

The interface to the float-to-float matrix transpose operation.

Parameters:

	grid	The kernel grid configuration
	block	The kernel block configuration
	odata	The output data (device)
	o_pitch	The output data pitch
	idata	The input data (device)
	i_pitch	The input data pitch
	width	The input data width
	height	The input data height

void asrl::run_transpose_kernel_uchar	(	dim3	grid,
		dim3	block,
		float *	odata,
		size_t	o_pitch,
		unsigned char *	idata,
		size_t	i_pitch,
		size_t	width,
		size_t	height
	)

The interface to the unsigned char-to-float matrix transpose operation.

Parameters:

	grid	The kernel grid configuration
	block	The kernel block configuration
	odata	The output data (device)
	o_pitch	The output data pitch
	idata	The input data (device)
	i_pitch	The input data pitch
	width	The input data width
	height	The input data height

__device__ void asrl::setMaxXZ	(	float3 &	dest,
		float3 &	comp
	)

Definition at line 49 of file orientation.cu.

__device__ void asrl::setMaxZ3	(	float3 &	dest,
		float3 &	comp
	)

Definition at line 63 of file orientation.cu.

__device__ void asrl::setSumXY	(	float2 &	dest,
		float2 &	src
	)

Definition at line 57 of file orientation.cu.

__global__ void asrl::surf_nonmaxonly_kernel	(	float *	d_hessian,
		int	octave,
		int4 *	d_maxmin,
		unsigned int *	d_maxmin_counter,
		float	threshold
	)

Definition at line 36 of file non_max_suppression.cu.

__device__ void asrl::swap	(	float3 &	a,
		float3 &	b
	)			`[inline]`

Definition at line 42 of file orientation.cu.

void asrl::texturize_integral_image ( cudaArray * intImg )

Store the integral image as a texture on the GPU.

Parameters:

intImg

The integral image to use as a texture.

cudaError_t asrl::texturize_integral_image_c ( cudaArray * intImg )

Store the integral image as a texture on the GPU. This version does not throw an exception on error

Parameters:

intImg

The integral image to use as a texture.

template<typename T >

__global__ void asrl::transpose_kernel	(	float *	odata,
		size_t	o_pitch,
		T *	idata,
		size_t	i_pitch,
		size_t	width,
		size_t	height
	)			`[inline]`

Definition at line 42 of file GpuIntegralImage_kernel.cu.

Variable Documentation

__constant__ float asrl::d_hessian_scale[ASRL_SURF_MAX_INTERVALS *ASRL_SURF_MAX_OCTAVES]

Definition at line 39 of file gpu_globals.cu.

__constant__ int asrl::d_hessian_stride[1]

Definition at line 40 of file gpu_globals.cu.

__constant__ SurfOctaveParameters asrl::d_octave_params[ASRL_SURF_MAX_OCTAVES]

Definition at line 38 of file gpu_globals.cu.

__constant__ float asrl::dc_3p3gauss1D[20]

Initial value:

 {0.001917811039f, 0.004382549939f, 0.009136246641f, 0.017375153068f, 0.030144587513f,
                                          0.047710056854f, 0.068885910797f, 0.090734146446f, 0.109026229640f, 0.119511889092f,
                                          0.119511889092f, 0.109026229640f, 0.090734146446f, 0.068885910797f, 0.047710056854f,
                                          0.030144587513f, 0.017375153068f, 0.009136246641f, 0.004382549939f, 0.001917811039f}

Definition at line 37 of file descriptors.cu.

__constant__ float asrl::dc_gauss1D[13] = {0.002215924206f, 0.008764150247f, 0.026995483257f, 0.064758797833f, 0.120985362260f, 0.176032663382f, 0.199471140201f, 0.176032663382f, 0.120985362260f, 0.064758797833f, 0.026995483257f, 0.008764150247f, 0.002215924206f}

Definition at line 40 of file orientation.cu.

__shared__ float asrl::fh_vals[]

int asrl::s_initHeight = -1

Definition at line 37 of file gpu_globals.cu.

bool asrl::s_initialized = false

Definition at line 35 of file gpu_globals.cu.

int asrl::s_initWidth = -1

Definition at line 36 of file gpu_globals.cu.