下載本文檔
版權說明:本文檔由用戶提供并上傳,收益歸屬內(nèi)容提供方,若內(nèi)容存在侵權,請進行舉報或認領
文檔簡介
1、OpenCV 環(huán)境下 CUDA 編程示例在 CUDA 平臺上對圖像算法進行并行加速是目前并行計算方面比較簡單易行的一種方式,而同時利用 OpenCV 提供的一些庫函數(shù)的話,那么事情將會變得更加easy。以下是我個人采用的一種模板,這個模板是從 OpenCV 里的算法CUDA 源碼挖掘出來的,我感覺這個用起來比較傲方便,所 以經(jīng)常采用。首先大牛們寫的源碼都很魯棒,考慮的比較全 面(如大部分算法將 1,3,4 通道的圖像同時搞定) ,感覺還有個比較神奇的地方在于 CPU 端 GpuMat 和 GPU 端PtrStepSzb 的轉換,讓我欲罷不能,一個不太理想的地方在 于第一幀的初始化時間比較長,應
2、該是 CPU 到 GPU 的數(shù)據(jù) 傳輸。代碼中有考慮流,但貌似沒有使用。我使用的是趙開勇的 CUDA_VS_Wizard ,主函數(shù)還是用的cu 文件。以下代碼是對 Vibe 背景建模算法的并行,背景建 模算法是目前接觸到易于并行的一類,如 GMM 等,而且加 速效果不錯,因為一個線程執(zhí)行的數(shù)據(jù)就是對應一個像素 點。代碼如下:sample.cucpp view plaincopy<spanstyle="font-size:14px;">/*sample.cu* * This is a example of the CUDA program.*#in
3、clude <stdio.h>#include#include<stdlib.h> #include <cutil_inline.h><iostream> #include <string> #include "opencv2/core/core.hpp" #include "opencv2/gpu/gpu.hpp" #include "opencv2/highgui/highgui.hpp" #
4、include "Vibe_M_kernel.cu" #include "Vibe_M.h" using namespacestd; using namespace cv; using namespace cv:gpu;enum Method FGD_STAT,MOG,MOG2,VIBE,GMG ; int main(int argc,const char* argv) cv:CommandLineParser cmd(argc,argv," c | camera | flase| use camera "" m |gmg)
5、 "" h | help | false| print help" f | file | 768x576.avi | input video file "method | vibe| method (fgd, mog, mog2, vibe,message ");<< endl;if (cmd.get<bool>("help")cout << "Usage : bgfg_segm options"cout <
6、< "Avaible options:"0;bool useCamera =cmd.get<bool>("camera");string file =cmd.get<string>("file");string method =cmd.get<string>("method");if (method != "fgd"&& method != "mog&quo
7、t; && method !="mog2" && method != "vibe" &&method != "gmg")cerr << "Incorrectmethod" << endl;return -1;Method m = method = "fgd" ? FGD_STAT : method ="mog" ? MOG :
8、 method = "mog2" ? MOG2 : method ="vibe" ? VIBE : GMG;VideoCapture cap;if(useCamera)cap.open(0);elsecap.open(file);if (!cap.isOpened()cerr << "can not open camera or video file"<< endl;return -1;Mat origin,frame;cap >> origin;cv
9、tColor(origin,frame,CV_BGR2GRAY);GpuMatd_frame(frame);Vibe_M vibe;GpuMat d_fgmask;Mat fgmask;Mat fgimg;Mat bgimg;case VIBE:break;switch (m) vibe.initialize(d_frame);namedWindow("image", WINDOW_NORMAL);namedWindow("foreground mask", WINDOW_NORMAL);for(;)cap >> origin
10、;ifbreak;(origin.empty()cvtColor(origin,frame,CV_BGR2GRAY);d_frame.upload(frame);/update the modelswitch (m)case VIBE:vibe(d_frame, d_fgmask);break;d_fgmask.download(fgmask);imshow("image",frame);imshow("foreground mask", fgmask);int key = waitKey(30);if (key = 27)break;else if(k
11、ey = ' ')cvWaitKey(0);exit(0);</span> Vibe_M.cppcpp view plaincopy<span style="font-size:14px;">#include "Vibe_M.h"voidnamespace cv namespace gpu namespace devicenamespace vibe_mloadConstants(int nbSamples, int reqMatches, int radius, intsubsam
12、plingFactor);void init_gpu(PtrStepSzbframe, int cn, PtrStepSzb samples, PtrStepSz<unsignedvoidint> randStates, cudaStream_t stream);update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask,PtrStepSzb samples, PtrStepSz<unsigned int> randStates,cudaStream_t stream); namespac
13、econst int defaultNbSamples = 20;const intdefaultReqMatches = 2;const int defaultRadius = 20;const int defaultSubsamplingFactor = 16; Vibe_M:Vibe_M(unsigned long rngSeed) : frameSize_(0, 0),rngSeed_(rngSeed) nbSamples = defaultNbSamples;reqMatches = defaultReqMatches;radius = defaultRadius;subsampli
14、ngFactor = defaultSubsamplingFactor; voidVibe_M:initialize(const GpuMat& firstFrame,Stream& s) using namespacecv:gpu:device:vibe_m;CV_Assert(firstFrame.type()/cudaStream_t= CV_8UC1 | firstFrame.type() = CV_8UC3 | firstFrame.type() = CV_8UC4);stream = StreamAccessor:getStream(s);loadC
15、onstants(nbSamples, reqMatches, radius,subsamplingFactor);frameSize_ = firstFrame.size();if (randStates_.size() != frameSize_)cv:RNG rng(rngSeed_);cv:Math_randStates(frameSize_, CV_8UC4);int ch =rng.fill(h_randStates, cv:RNG:UNIFORM, 0, 255);randStates_.upload(h_randStates);firstFrame.channels();int
16、 sample_ch = ch = 1 ? 1 : 4;samples_.create(nbSamples * frameSize_.height, frameSize_.width, CV_8UC(sample_ch);init_gpu(firstFrame, ch, samples_, randStates_, 0); voidVibe_M:operator()(const GpuMat& frame, GpuMat&fgmask, Stream& s) using namespacecv:gpu:device:vibe_m;CV_Asser
17、t(frame.depth() =CV_8U);int ch = frame.channels();intsample_ch = ch = 1 ? 1 : 4;if (frame.size() !=frameSize_ | sample_ch != samples_.channels()initialize(frame);fgmask.create(frameSize_,CV_8UC1);update_gpu(frame, ch, fgmask, samples_,randStates_, StreamAccessor:getStream(s); voidVibe_M:release() fr
18、ameSize_ = Size(0, 0);randStates_.release();samples_.release(); </span> Vibe_M.hcpp view plaincopy<span style="font-size:14px;">#ifndef _VIBE_M_H_ #defineVIBE_M_H#ifndef SKIP_INCLUDES #include<vector>#include <memory> #include<
19、iosfwd>#endif #include "opencv2/core/core.hpp"#include "opencv2/core/gpumat.hpp" #include"opencv2/gpu/gpu.hpp" #include"opencv2/imgproc/imgproc.hpp" #include"opencv2/objdetect/objdetect.hpp" #include"opencv2/features2d/features2d.hpp"
20、; using namespace std;using namespace cv; using namespace cv:gpu; classVibe_M public:/! the default constructorexplicit Vibe_M(unsigned long rngSeed = 1234567);/!re-initiaization methodvoid initialize(constGpuMat& firstFrame, Stream& stream =Stream:Null();/! the update operatorvoidGp
21、uMat randStates_;GpuMat samples_; ;/! releases alloperator()(const GpuMat& frame, GpuMat& fgmask,Stream& stream = Stream:Null();inner buffersvoid release();int nbSamples;/ number of samples per pixelint reqMatches;/ #_minint radius;/ RintsubsamplingFactor; / amount of random
22、subsamplingprivate:Size frameSize_;unsigned long rngSeed_;Stream& s) using namespace#endif</span> Vibe_M.cuhtml view plaincopy<span style="font-size:14px;">#include "Vibe_M.h" #include "opencv2/gpu/stream_accessor.hpp" namespace cvnam
23、espacevibe_m namespace gpu namespace device void loadConstants(int nbSamples,int reqMatches, int radius, int subsamplingFactor);void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples,PtrStepSz<unsigned int> randStates, cudaStream_t stream);void update_gpu(PtrStepSzb frame, int cn,
24、PtrStepSzb fgmask,PtrStepSzb samples, PtrStepSz<unsigned int> randStates,cudaStream_t stream); namespaceconst int defaultNbSamples = 20;const intdefaultReqMatches = 2;const int defaultRadius = 20;const int defaultSubsamplingFactor = 16; Vibe_M:Vibe_M(unsigned long rngSeed) : frameSize_
25、(0, 0),rngSeed_(rngSeed) nbSamples = defaultNbSamples;reqMatches = defaultReqMatches;radius = defaultRadius;subsamplingFactor = defaultSubsamplingFactor; voidVibe_M:initialize(const GpuMat& firstFrame,cv:gpu:device:vibe_m;CV_Assert(firstFrame.type()cudaStream_t stream= CV_8UC1 | firstFrame.t
26、ype() = CV_8UC3 | firstFrame.type() = CV_8UC4);= cv:gpu:StreamAccessor:getStream(s);loadConstants(nbSamples, reqMatches, radius,subsamplingFactor);frameSize_ = firstFrame.size();if (randStates_.size() != frameSize_)cv:RNG rng(rngSeed_);cv:Math_randStates(frameSize_, CV_8UC4);rng.fill(h_randStates, c
27、v:RNG:UNIFORM, 0, 255);randStates_.upload(h_randStates);int ch =firstFrame.channels();int sample_ch = ch = 1 ? 1 : 4;samples_.create(nbSamples * frameSize_.height,frameSize_.width, CV_8UC(sample_ch);init_gpu(firstFrame, ch, samples_, randStates_, stream); void Vibe_M:operator()(const GpuMat&
28、 frame,GpuMat& fgmask, Stream& s) usingnamespace cv:gpu:device:vibe_m;CV_Assert(frame.depth() = CV_8U);int ch =frame.channels();int sample_ch = ch = 1 ? 1 : 4;if (frame.size() != frameSize_ | sample_ch !=samples_.channels()initialize(frame);fgmask.create(frameSize_, CV_8UC1);update_g
29、pu(frame, ch, fgmask, samples_, randStates_,cv:gpu:StreamAccessor:getStream(s); voidVibe_M:release() frameSize_ = Size(0, 0);randStates_.release();samples_.release(); </span> Vibe_M_kernel.cucpp view plaincopy<spanstyle="font-size:14px;">#include"opencv2/gp
30、u/device/common.hpp" #include"opencv2/gpu/device/vec_math.hpp" namespace cv namespace gpu namespace device namespacevibe_mconstant_ int c_nbSamples;constant_ int c_reqMatches;constant_ intc_radius;constant_ int c_subsamplingFactor;void loadConstants(int nbSamples, int reqMatches, int
31、radius,int subsamplingFactor)cudaSafeCall( cudaMemcpyToSymbol(c_nbSamples, &nbSamples, sizeof(int) );cudaSafeCall( cudaMemcpyToSymbol(c_reqMatches,&reqMatches, sizeof(int) );cudaSafeCall( cudaMemcpyToSymbol(c_radius, &radius, sizeof(int) );cudaSafeCall( cudaMemcpyToSymbol(c_s
32、ubsamplingFactor, &subsamplingFactor, sizeof(int) );device_ _forceinline_ uint nextRand(uint& state)/const unsigned int CV_RNG_COEFF =4164903690U;/ 已經(jīng)定義state = state *CV_RNG_COEFF + (state >> 16);return state;constant_ intc_xoff9 = -1, 0, 1, -1, 1, -1, 0, 1, 0;constant_
33、 int c_yoff9 = -1, -1, -1,0, 0, 1, 1, 1, 0;device_ _forceinline_ int2 chooseRandomNeighbor(int x, int y, uint& randState, int count = 8)int idx = nextRand(randState) % count;return make_int2(x + c_xoffidx, y + c_yoffidx);device_ _forceinline_ uchar cvt(uchar val)return val;device_ _forceinli
34、ne_ uchar4 cvt(const uchar3& val)return make_uchar4(val.x, val.y, val.z,0);device_ _forceinline_ uchar4cvt(const uchar4& val)return val;template <typename SrcT, typename SampleT>global_ void init(const PtrStepSz<SrcT> frame,PtrStep<SampleT> s
35、amples, PtrStep<uint>randStates)const int x =blockIdx.x * blockDim.x + threadIdx.x;constint y = blockIdx.y * blockDim.y + threadIdx.y;if (x >= frame.cols | y >= frame.rows)return;uint localState = randStates(y, x);for (int k = 0; k < c_nbSamples; +k)int2 np = choos
36、eRandomNeighbor(x, y,localState, 9);np.x= :max(0, :min(np.x, frame.cols - 1);np.y = :max(0, :min(np.y, frame.rows - 1);SrcT pix = frame(np.y, np.x);samples(k* frame.rows + y, x) = cvt(pix);randStates(y, x) = localState;template <typename SrcT, typename SampleT>void init_caller(PtrStepS
37、zb frame, PtrStepSzb samples,PtrStepSz<uint> randStates, cudaStream_t stream)dim3 block(32, 8);dim3grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y);cudaSafeCall( cudaFuncSetCacheConfig(init<SrcT,SampleT>, cudaFuncCachePreferL1) );init<SrcT, SampleT&g
38、t;<<<grid, block, 0, stream>>>(PtrStepSz<SrcT>) frame,if(PtrStepSz<SampleT>) samples, randStates);cudaSafeCall( cudaGetLastError() );(stream = 0) cudaSafeCall( cudaDeviceSynchronize() );void init_gpu(PtrStepSzb frame, int cn, PtrS
39、tepSzb samples,PtrStepSz<uint> randStates, cudaStream_t stream)typedef void (*func_t)(PtrStepSzb frame,PtrStepSzb samples, PtrStepSz<uint> randStates,cudaStream_t stream);static const func_tfuncs =0,init_caller<uchar, uchar>, 0, init_caller<uchar3,uch
40、ar4>, init_caller<uchar4,uchar4>funcscn(frame, samples, randStates, stream);device_ _forceinline_ int calcDist(uchar a, uchar b)return :abs(a - b);device_ _forceinline_ int calcDist(const uchar3& a,const uchar4& b)return(:abs(a.x - b.x) + :abs(a.y - b.y) + :abs(a
41、.z - b.z) /3;device_ _forceinline_ intcalcDist(const uchar4& a, const uchar4& b)return (:abs(a.x - b.x) + :abs(a.y - b.y)+ :abs(a.z - b.z) / 3;template<typename SrcT, typename SampleT>global_ void update(const PtrStepSz<SrcT> frame,PtrStepb fgmask, PtrStep
42、<SampleT> samples,PtrStep<uint> randStates)const int x = blockIdx.x * blockDim.x +threadIdx.x;const int y = blockIdx.y *blockDim.y + threadIdx.y;if (x >=frame.cols | y >= frame.rows)return;uint localState = randStates(y, x);SrcTimgPix = frame(y, x);/ compari
43、son with themodelint count = 0;for (intk = 0; (count < c_reqMatches) && (k <c_nbSamples); +k)SampleT samplePix = samples(k *frame.rows + y, x);int distance =calcDist(imgPix, samplePix);if(distance < c_radius)+count;/ pixelclassification according to reqMatches fg
44、mask(y, x) = (uchar) (-(count < c_reqMatches);/ 當count<2 時,為前景 當計數(shù)器 count>=2 時,為背景if (count >= c_reqMatches)/ the pixel belongs to the background/ gets a random number between 0 and subsamplingFactor-1 int randomNumber = nextRand(localState) %/ update of thec_subsamplingFactor;current pixel model if (randomNumber =0)/ randomsubsamplingint k =nextRand(localState) % c_nbSamples;samples(k * frame.rows + y, x) =cvt(imgPix);/update of a neighboring pixel modelrandomNumber = nextRand(localState) % c_subsamplingFactor;if (randomNumber = 0)/ random subsamp
溫馨提示
- 1. 本站所有資源如無特殊說明,都需要本地電腦安裝OFFICE2007和PDF閱讀器。圖紙軟件為CAD,CAXA,PROE,UG,SolidWorks等.壓縮文件請下載最新的WinRAR軟件解壓。
- 2. 本站的文檔不包含任何第三方提供的附件圖紙等,如果需要附件,請聯(lián)系上傳者。文件的所有權益歸上傳用戶所有。
- 3. 本站RAR壓縮包中若帶圖紙,網(wǎng)頁內(nèi)容里面會有圖紙預覽,若沒有圖紙預覽就沒有圖紙。
- 4. 未經(jīng)權益所有人同意不得將文件中的內(nèi)容挪作商業(yè)或盈利用途。
- 5. 人人文庫網(wǎng)僅提供信息存儲空間,僅對用戶上傳內(nèi)容的表現(xiàn)方式做保護處理,對用戶上傳分享的文檔內(nèi)容本身不做任何修改或編輯,并不能對任何下載內(nèi)容負責。
- 6. 下載文件中如有侵權或不適當內(nèi)容,請與我們聯(lián)系,我們立即糾正。
- 7. 本站不保證下載資源的準確性、安全性和完整性, 同時也不承擔用戶因使用這些下載資源對自己和他人造成任何形式的傷害或損失。
最新文檔
- 二零二五年度退休返聘人員合同終止告知書
- 2025年度醫(yī)療健康管理系統(tǒng)軟件購銷合同模板
- 2025年度汽車零部件車輛質(zhì)押租賃協(xié)議
- 2025年度股權并購終止協(xié)議
- 2025年度電商平臺內(nèi)容創(chuàng)作者孵化合作合同
- 二零二五年度海洋生態(tài)保護區(qū)海域租賃合同
- 二零二五年度新能源儲能設備融資租賃合同主體權益與能源安全
- 2025年度遺產(chǎn)繼承財產(chǎn)分配與公司股權激勵及員工持股協(xié)議
- 2025版互聯(lián)網(wǎng)保險產(chǎn)品退款協(xié)議合同3篇
- 2025年度創(chuàng)業(yè)孵化器股權變更合作協(xié)議
- 2025年度公務車輛私人使用管理與責任協(xié)議書3篇
- 售后工程師述職報告
- 綠化養(yǎng)護難點要點分析及技術措施
- 2024年河北省高考歷史試卷(含答案解析)
- 車位款抵扣工程款合同
- 小學六年級數(shù)學奧數(shù)題100題附答案(完整版)
- 高中綜評項目活動設計范文
- 英漢互譯單詞練習打印紙
- 2023湖北武漢華中科技大學招聘實驗技術人員24人筆試參考題庫(共500題)答案詳解版
- 一氯二氟甲烷安全技術說明書MSDS
- 物流簽收回執(zhí)單
評論
0/150
提交評論