Autonomous Space Robotics Lab: gpusurf: GpuIntegralImage

00001 /*
00002 Copyright (c) 2010, Paul Furgale and Chi Hay Tong
00003 All rights reserved.
00004 
00005 Redistribution and use in source and binary forms, with or without 
00006 modification, are permitted provided that the following conditions are 
00007 met:
00008 
00009 * Redistributions of source code must retain the above copyright notice, 
00010   this list of conditions and the following disclaimer.
00011 * Redistributions in binary form must reproduce the above copyright 
00012   notice, this list of conditions and the following disclaimer in the 
00013   documentation and/or other materials provided with the distribution.
00014 * The names of its contributors may not be used to endorse or promote 
00015   products derived from this software without specific prior written 
00016   permission.
00017 
00018 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
00019 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
00020 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 
00021 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER
00022 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
00023 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
00024 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
00025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
00026 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
00027 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
00028 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029 */
00030 
00031 #include "assert_macros.hpp"
00032 #include "GpuIntegralImage_kernel.h"
00033 
00034 namespace asrl {
00035   void call_integral_kernel(size_t width, size_t height,
00036                             unsigned char * char_data,          // initial storage on the gpu of the unsigned char data
00037                             float *norm_data, 
00038                             float *trans_data,  // storage of floats on the gpu
00039                             cudaArray *output_int_img,
00040                             size_t norm_pitch, 
00041                             size_t trans_pitch, 
00042                             size_t char_pitch,
00043                             CUDPPHandle & rowPlan,
00044                             CUDPPHandle & colPlan,
00045                             cudaStream_t stream)
00046   {
00047     // transpose and convert the data into floats
00048     dim3 block(ASRL_TRANSPOSE_BLOCK_DIM, ASRL_TRANSPOSE_BLOCK_DIM, 1);
00049     dim3 grid( (int) ((block.x + width - 1) / block.x), (int) ((block.y + height - 1) / block.y), 1);
00050 
00051     //TIME_START("iimg: transpose 1");
00052     run_transpose_kernel_uchar(grid, block, trans_data, trans_pitch/sizeof(float), 
00053                                         char_data, char_pitch/sizeof(unsigned char), width, height);
00054     ASRL_CHECK_CUDA_ERROR_DBG("transpose 1");    
00055 
00056     // compute row scan on transposed image (columns)
00057     cudppMultiScan(colPlan, trans_data, trans_data, height, width);
00058     ASRL_CHECK_CUDA_ERROR_DBG("column scan");
00059 
00060 
00061     // transpose (again, to return to original orientation)
00062     grid.x = (int) ((block.x + height - 1) / block.x);
00063     grid.y = (int) ((block.y + width - 1) / block.y);
00064     
00065     run_transpose_kernel_float(grid, block, norm_data, norm_pitch/sizeof(float), 
00066                                 trans_data, trans_pitch/sizeof(float), height, width);
00067     ASRL_CHECK_CUDA_ERROR_DBG("transpose 2");
00068 
00069     // compute row scan on re-transposed image (rows)
00070     cudppMultiScan(rowPlan, norm_data, norm_data, width, height);
00071     ASRL_CHECK_CUDA_ERROR_DBG("row scan");
00072 
00073     // transfer data to cudaArray
00074     cudaError_t err = cudaMemcpy2DToArray(output_int_img, 0, 0, norm_data, norm_pitch, 
00075                                           width*sizeof(float), height, cudaMemcpyDeviceToDevice);
00076     ASRL_ASSERT_EQ(err,cudaSuccess, "Unable to copy the integral image to the texture buffer: (" << err << "): " << cudaGetErrorString(err));           
00077   }
00078 }
Autonomous Space Robotics Lab

Speeded Up SURF

GpuIntegralImage_kernel.cpp