00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #include "assert_macros.hpp"
00032 #include "GpuIntegralImage_kernel.h"
00033
00034 namespace asrl {
00035 void call_integral_kernel(size_t width, size_t height,
00036 unsigned char * char_data,
00037 float *norm_data,
00038 float *trans_data,
00039 cudaArray *output_int_img,
00040 size_t norm_pitch,
00041 size_t trans_pitch,
00042 size_t char_pitch,
00043 CUDPPHandle & rowPlan,
00044 CUDPPHandle & colPlan,
00045 cudaStream_t stream)
00046 {
00047
00048 dim3 block(ASRL_TRANSPOSE_BLOCK_DIM, ASRL_TRANSPOSE_BLOCK_DIM, 1);
00049 dim3 grid( (int) ((block.x + width - 1) / block.x), (int) ((block.y + height - 1) / block.y), 1);
00050
00051
00052 run_transpose_kernel_uchar(grid, block, trans_data, trans_pitch/sizeof(float),
00053 char_data, char_pitch/sizeof(unsigned char), width, height);
00054 ASRL_CHECK_CUDA_ERROR_DBG("transpose 1");
00055
00056
00057 cudppMultiScan(colPlan, trans_data, trans_data, height, width);
00058 ASRL_CHECK_CUDA_ERROR_DBG("column scan");
00059
00060
00061
00062 grid.x = (int) ((block.x + height - 1) / block.x);
00063 grid.y = (int) ((block.y + width - 1) / block.y);
00064
00065 run_transpose_kernel_float(grid, block, norm_data, norm_pitch/sizeof(float),
00066 trans_data, trans_pitch/sizeof(float), height, width);
00067 ASRL_CHECK_CUDA_ERROR_DBG("transpose 2");
00068
00069
00070 cudppMultiScan(rowPlan, norm_data, norm_data, width, height);
00071 ASRL_CHECK_CUDA_ERROR_DBG("row scan");
00072
00073
00074 cudaError_t err = cudaMemcpy2DToArray(output_int_img, 0, 0, norm_data, norm_pitch,
00075 width*sizeof(float), height, cudaMemcpyDeviceToDevice);
00076 ASRL_ASSERT_EQ(err,cudaSuccess, "Unable to copy the integral image to the texture buffer: (" << err << "): " << cudaGetErrorString(err));
00077 }
00078 }