00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 #include "GpuIntegralImageProcessor.hpp"
00032 #include <cudpp.h>
00033 #include <cuda.h>
00034 #include <builtin_types.h>
00035 #include <channel_descriptor.h>
00036 #include <iostream>
00037 #include "GpuIntegralImage_kernel.h"
00038 #include "GpuIntegralImage.hpp"
00039 #include "assert_macros.hpp"
00040 
00041 
00042 namespace asrl {
00043 
00044   GpuIntegralImageProcessor::GpuIntegralImageProcessor(int width, int height)
00045   {
00046     cudaError_t err;
00047     m_width = width;
00048     m_height = height;
00049 
00050     
00051     unsigned char * char_ptr;
00052     err = cudaMallocPitch( (void**) &char_ptr, &char_pitch, width*sizeof(unsigned char), height);
00053     ASRL_ASSERT_EQ(err,cudaSuccess, "Unable to allocate CUDA char* input image.");
00054     char_data.reset(char_ptr,&cudaFree);
00055 
00056     float * norm_ptr;
00057     err = cudaMallocPitch( (void**) &norm_ptr, &norm_pitch, width*sizeof(float), height);
00058     ASRL_ASSERT_EQ(err, cudaSuccess, "Unable to allocate CUDA normally oriented float integral image.");
00059     norm_data.reset(norm_ptr,&cudaFree);
00060 
00061     float * trans_ptr;
00062     err = cudaMallocPitch( (void**) &trans_ptr, &trans_pitch, height*sizeof(float), width);
00063     ASRL_ASSERT_EQ(err, cudaSuccess, "Unable to allocate CUDA transpose oriented integral image.");
00064     trans_data.reset(trans_ptr,&cudaFree);
00065 
00066     CUDPPConfiguration config = { CUDPP_SCAN, CUDPP_ADD, CUDPP_FLOAT, CUDPP_OPTION_FORWARD | CUDPP_OPTION_INCLUSIVE };
00067     CUDPPResult result = cudppPlan(&colPlan, config, width*height, width, trans_pitch/sizeof(float));
00068     ASRL_ASSERT_EQ(result, CUDPP_SUCCESS, "Error creating scanPlan (column scan):" << result);
00069 
00070     result = cudppPlan(&rowPlan, config, width*height, height, norm_pitch/sizeof(float));
00071     ASRL_ASSERT_EQ(result,CUDPP_SUCCESS, "Error creating scanPlan (row scan): " << result);
00072   }
00073 
00074   GpuIntegralImageProcessor::~GpuIntegralImageProcessor()
00075   {
00076     try{
00077       CUDPPResult result = cudppDestroyPlan(colPlan);
00078       if(result != CUDPP_SUCCESS)
00079         std::cerr << "Unable to destroy column plan. Err code: " << result;
00080     } catch(std::exception const & e) {
00081       std::cout << e.what() << std::endl;
00082     }
00083     try{
00084       CUDPPResult result = cudppDestroyPlan(rowPlan);
00085       if(result != CUDPP_SUCCESS)
00086         std::cerr << "Unable to destroy column plan. Err code: " << result;
00087     } catch(std::exception const & e) {
00088       std::cout << e.what() << std::endl;
00089     }
00090   }
00091 
00092 
00093   void GpuIntegralImageProcessor::upload(cv::Mat & image)
00094   {
00095     ASRL_ASSERT_EQ(image.type(),CV_8UC1,"The image must be single channel, 8 bit");
00096     ASRL_ASSERT(image.isContinuous(),"The image must be tightly packed. width: " << image.cols << ", step: " << image.step);
00097     
00098     
00099     cudaError_t err = cudaMemcpy2D( (void*) char_data.get(), char_pitch, (void*) image.ptr(), image.cols*sizeof(unsigned char),
00100                                     image.cols*sizeof(unsigned char), image.rows, cudaMemcpyHostToDevice );
00101     ASRL_ASSERT_EQ(err,cudaSuccess, "Unable to copy image to GPU: (" << err << "): " << cudaGetErrorString(err));               
00102   }
00103 
00104   void GpuIntegralImageProcessor::process(cv::Mat & image, GpuIntegralImage & outImage, cudaStream_t stream){
00105     upload(image);
00106     process(outImage, stream);
00107   }
00108 
00109   void GpuIntegralImageProcessor::process(GpuIntegralImage & outImage, cudaStream_t stream)
00110   {
00111     call_integral_kernel((size_t) width(), (size_t)height(),
00112                          char_data.get(),               
00113                          norm_data.get(), 
00114                          trans_data.get(),      
00115                          outImage.d_get(),
00116                          norm_pitch, 
00117                          trans_pitch, 
00118                          char_pitch,
00119                          rowPlan,
00120                          colPlan,
00121                          stream);
00122   }
00123 
00124 }