00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #include "GpuIntegralImageProcessor.hpp"
00032 #include <cudpp.h>
00033 #include <cuda.h>
00034 #include <builtin_types.h>
00035 #include <channel_descriptor.h>
00036 #include <iostream>
00037 #include "GpuIntegralImage_kernel.h"
00038 #include "GpuIntegralImage.hpp"
00039 #include "assert_macros.hpp"
00040
00041
00042 namespace asrl {
00043
00044 GpuIntegralImageProcessor::GpuIntegralImageProcessor(int width, int height)
00045 {
00046 cudaError_t err;
00047 m_width = width;
00048 m_height = height;
00049
00050
00051 unsigned char * char_ptr;
00052 err = cudaMallocPitch( (void**) &char_ptr, &char_pitch, width*sizeof(unsigned char), height);
00053 ASRL_ASSERT_EQ(err,cudaSuccess, "Unable to allocate CUDA char* input image.");
00054 char_data.reset(char_ptr,&cudaFree);
00055
00056 float * norm_ptr;
00057 err = cudaMallocPitch( (void**) &norm_ptr, &norm_pitch, width*sizeof(float), height);
00058 ASRL_ASSERT_EQ(err, cudaSuccess, "Unable to allocate CUDA normally oriented float integral image.");
00059 norm_data.reset(norm_ptr,&cudaFree);
00060
00061 float * trans_ptr;
00062 err = cudaMallocPitch( (void**) &trans_ptr, &trans_pitch, height*sizeof(float), width);
00063 ASRL_ASSERT_EQ(err, cudaSuccess, "Unable to allocate CUDA transpose oriented integral image.");
00064 trans_data.reset(trans_ptr,&cudaFree);
00065
00066 CUDPPConfiguration config = { CUDPP_SCAN, CUDPP_ADD, CUDPP_FLOAT, CUDPP_OPTION_FORWARD | CUDPP_OPTION_INCLUSIVE };
00067 CUDPPResult result = cudppPlan(&colPlan, config, width*height, width, trans_pitch/sizeof(float));
00068 ASRL_ASSERT_EQ(result, CUDPP_SUCCESS, "Error creating scanPlan (column scan):" << result);
00069
00070 result = cudppPlan(&rowPlan, config, width*height, height, norm_pitch/sizeof(float));
00071 ASRL_ASSERT_EQ(result,CUDPP_SUCCESS, "Error creating scanPlan (row scan): " << result);
00072 }
00073
00074 GpuIntegralImageProcessor::~GpuIntegralImageProcessor()
00075 {
00076 try{
00077 CUDPPResult result = cudppDestroyPlan(colPlan);
00078 if(result != CUDPP_SUCCESS)
00079 std::cerr << "Unable to destroy column plan. Err code: " << result;
00080 } catch(std::exception const & e) {
00081 std::cout << e.what() << std::endl;
00082 }
00083 try{
00084 CUDPPResult result = cudppDestroyPlan(rowPlan);
00085 if(result != CUDPP_SUCCESS)
00086 std::cerr << "Unable to destroy column plan. Err code: " << result;
00087 } catch(std::exception const & e) {
00088 std::cout << e.what() << std::endl;
00089 }
00090 }
00091
00092
00093 void GpuIntegralImageProcessor::upload(cv::Mat & image)
00094 {
00095 ASRL_ASSERT_EQ(image.type(),CV_8UC1,"The image must be single channel, 8 bit");
00096 ASRL_ASSERT(image.isContinuous(),"The image must be tightly packed. width: " << image.cols << ", step: " << image.step);
00097
00098
00099 cudaError_t err = cudaMemcpy2D( (void*) char_data.get(), char_pitch, (void*) image.ptr(), image.cols*sizeof(unsigned char),
00100 image.cols*sizeof(unsigned char), image.rows, cudaMemcpyHostToDevice );
00101 ASRL_ASSERT_EQ(err,cudaSuccess, "Unable to copy image to GPU: (" << err << "): " << cudaGetErrorString(err));
00102 }
00103
00104 void GpuIntegralImageProcessor::process(cv::Mat & image, GpuIntegralImage & outImage, cudaStream_t stream){
00105 upload(image);
00106 process(outImage, stream);
00107 }
00108
00109 void GpuIntegralImageProcessor::process(GpuIntegralImage & outImage, cudaStream_t stream)
00110 {
00111 call_integral_kernel((size_t) width(), (size_t)height(),
00112 char_data.get(),
00113 norm_data.get(),
00114 trans_data.get(),
00115 outImage.d_get(),
00116 norm_pitch,
00117 trans_pitch,
00118 char_pitch,
00119 rowPlan,
00120 colPlan,
00121 stream);
00122 }
00123
00124 }