并行代碼學習資料_第1頁
并行代碼學習資料_第2頁
并行代碼學習資料_第3頁
并行代碼學習資料_第4頁
并行代碼學習資料_第5頁
已閱讀5頁,還剩11頁未讀, 繼續(xù)免費閱讀

下載本文檔

版權說明:本文檔由用戶提供并上傳,收益歸屬內容提供方,若內容存在侵權,請進行舉報或認領

文檔簡介

用CilkPlus實現并行性#include<stdio.h>#include<windows.h>#include<mmsystem.h>#include<math.h>#include<cilk/cilk.h>#include<cilk/reducer_opadd.h>constlongintVERYBIG=100000;//*******************************************************************************************intmain(void){ inti; DWORDstarttime,elapsedtime; //---------------------------------------------------------------------- //Outputastartmessage //printf("NoneParallelTimingsfor%diterations\n\n",VERYBIG); printf("CilkPlusParallelTimingsfor%diterations\n\n",VERYBIG); //repeatexperimentseveraltimes for(i=0;i<6;i++) { //getstartingtime starttime=timeGetTime(); //resetchecksum&runningtotal cilk::reducer_opadd<longint>sum(0); cilk::reducer_opadd<double>total(0.0); //WorkLoop,dosomeworkbyloopingVERYBIGtimes cilk_for(intj=0;j<VERYBIG;j++) { longintk; doublesumx,sumy; //incrementchecksum sum+=1; //Calculatefirstarithmeticseries sumx=0.0; for(k=0;k<j;k++) sumx=sumx+(double)k; //Calculatesecondarithmeticseries sumy=0.0; for(k=j;k>0;k--) sumy=sumy+(double)k; if(sumx>0.0)total=total+1.0/sqrt(sumx); if(sumy>0.0)total=total+1.0/sqrt(sumy); } //getendingtimeanduseittodetermineelapsedtime elapsedtime=timeGetTime()-starttime; //reportelapsedtime printf("TimeElapsed%10dmSecsTotal=%lfCheckSum=%ld\n",(int)elapsedtime,total.get_value(),sum.get_value()); } //returnintegerasrequiredbyfunctionheader return0;}Windows多線程SemaphoreOpenMP錯誤檢測#include<stdio.h>#include<omp.h>staticlongnum_steps=10000*4;doublestep,gsum1,gsum2;voidSafeAdd(doublesum1,doublesum2,omp_lock_t&lock1,omp_lock_t&lock2){ //lockgsum1andupdate omp_set_lock(&lock1); gsum1+=sum1; //lockgsum2andupdate omp_set_lock(&lock2); gsum2+=sum2; omp_unset_lock(&lock2); omp_unset_lock(&lock1);}intmain(){ inti; doublex1,x2; omp_lock_tlock1,lock2; gsum1=0.0; gsum2=0.0; omp_init_lock(&lock1); omp_init_lock(&lock2); printf("CalculatingPi...\n"); step=1.0/(double)num_steps; for(i=0;i<num_steps;i+=4) { doublesum1,sum2; #pragmaompparallelsections//#pragmaompparallelsectionsprivate(x1,x2,sum1,sum2)數據競爭 { #pragmaompsection { //calculatefirstbar x1=(i+0.5)*step; x1*=x1; sum1=4.0/(1.0+x1); //calculatesecondbar x2=(i+1.5)*step; x2*=x2; sum2=4.0/(1.0+x2); SafeAdd(sum1,sum2,lock1,lock2); } #pragmaompsection { //calculatethirdbar x1=(i+2.5)*step; x1*=x1; sum1=4.0/(1.0+x1); //calculatefourthbar x2=(i+3.5)*step; x2*=x2; sum2=4.0/(1.0+x2); SafeAdd(sum1,sum2,lock2,lock1);//死鎖 } } } //calacvalueofpi doublepi=step*(gsum1+gsum2); printf("pi:%2.21f\n",pi); omp_destroy_lock(&lock1); omp_destroy_lock(&lock2);}內存錯誤MPI#include"mpi.h"#include<stdio.h>#include<string.h>intmain(intargc,char*argv[]){inti,rank,size,namelen;charname[MPI_MAX_PROCESSOR_NAME];MPI_Statusstat;MPI_Init(&argc,&argv);MPI_Comm_size(MPI_COMM_WORLD,&size);MPI_Comm_rank(MPI_COMM_WORLD,&rank);MPI_Get_processor_name(name,&namelen);if(rank==0){ printf("Helloworld:rank%dof%drunningon%s\n",rank,size,name); for(i=1;i<size;i++){ MPI_Recv(&rank,1,MPI_INT,i,1,MPI_COMM_WORLD,&stat); MPI_Recv(&size,1,MPI_INT,i,1,MPI_COMM_WORLD,&stat); MPI_Recv(&namelen,1,MPI_INT,i,1,MPI_COMM_WORLD,&stat); MPI_Recv(name,namelen+1,MPI_CHAR,i,1,MPI_COMM_WORLD,&stat); printf("Helloworld:rank%dof%drunningon%s\n",rank,size,name); }}else{ MPI_Send(&rank,1,MPI_INT,0,1,MPI_COMM_WORLD); MPI_Send(&size,1,MPI_INT,0,1,MPI_COMM_WORLD); MPI_Send(&namelen,1,MPI_INT,0,1,MPI_COMM_WORLD); MPI_Send(name,namelen+1,MPI_CHAR,0,1,MPI_COMM_WORLD);}MPI_Finalize();return(0);}CUDA#include<stdio.h>#include<cuda_runtime.h>#defineNUM_THREADS256#defineN1000boolInitCUDA();voidmatgen(float*a,intlda,intn);clock_tmatmult(constfloat*a,intlda,constfloat*b,intldb,float*c,intldc,intn);voidcompare_mat(constfloat*a,intlda,constfloat*b,intldb,intn);clock_tmatmultCUDA(constfloat*a,intlda,constfloat*b,intldb,float*c,intldc,intn);__global__staticvoidmatMultCUDA(constfloat*a,size_tlda,constfloat*b,size_tldb,float*c,size_tldc,intn);intmain(){ float*a,*b,*c,*d; if(!InitCUDA())return0; a=(float*)malloc(sizeof(float)*N*N); b=(float*)malloc(sizeof(float)*N*N); c=(float*)malloc(sizeof(float)*N*N); d=(float*)malloc(sizeof(float)*N*N); srand(0); matgen(a,N,N); matgen(b,N,N); clock_ttime1=matmultCUDA(a,N,b,N,c,N,N); clock_ttime2=matmult(a,N,b,N,d,N,N); compare_mat(c,N,d,N,N); doublesec1=(double)time1/CLOCKS_PER_SEC; doublesec2=(double)time2/CLOCKS_PER_SEC; printf("Timeused:%.2fseconds(%.2lfGFLOPS)inCUDA,Timeused:%.2fseconds(%.2lfGFLOPS)inCPU\n",sec1,2.0*N*N*N/(sec1*1E9),sec2,2.0*N*N*N/(sec2*1E9)); return0;}boolInitCUDA(){ intcount; cudaGetDeviceCount(&count); if(count==0){ fprintf(stderr,"Thereisnodevice.\n"); returnfalse; } inti; for(i=0;i<count;i++){ cudaDevicePropprop; if(cudaGetDeviceProperties(&prop,i)==cudaSuccess){ if(prop.major>=1){ break; } } } if(i==count){ fprintf(stderr,"ThereisnodevicesupportingCUDA1.x.\n"); returnfalse; } cudaSetDevice(i); returntrue;}voidmatgen(float*a,intlda,intn){ inti,j; for(i=0;i<n;i++){ for(j=0;j<n;j++){ a[i*lda+j]=(float)rand()/RAND_MAX+ (float)rand()/(RAND_MAX*RAND_MAX); } }}clock_tmatmult(constfloat*a,intlda,constfloat*b,intldb,float*c,intldc,intn){ clock_tstart,end; inti,j,k; start=clock(); for(i=0;i<n;i++){ for(j=0;j<n;j++){ doublet=0; for(k=0;k<n;k++){ t+=a[i*lda+k]*b[k*ldb+j]; } c[i*ldc+j]=t; } } end=clock(); returnend-start;}voidcompare_mat(constfloat*a,intlda,constfloat*b,intldb,intn){ floatmax_err=0; floataverage_err=0; inti,j; for(i=0;i<n;i++){ for(j=0;j<n;j++){ if(b[i*ldb+j]!=0){ floaterr=fabs((a[i*lda+j]- b[i*ldb+j])/b[i*ldb+j]); if(max_err<err)max_err=err; average_err+=err; } } } printf("Maxerror:%gAverageerror:%g\n",max_err,average_err/(n*n));}clock_tmatmultCUDA(constfloat*a,intlda,constfloat*b,intldb,float*c,intldc,intn){ float*ac,*bc,*cc; clock_tstart,end; start=clock(); size_tpitch_a,pitch_b,pitch_c; cudaMallocPitch((void**)&ac,&pitch_a,sizeof(float)*n,n); cudaMallocPitch((void**)&bc,&pitch_b,sizeof(float)*n,n); cudaMallocPitch((void**)&cc,&pitch_c,sizeof(float)*n,n); cudaMemcpy2D(ac,pitch_a,a,sizeof(float)*lda,sizeof(float)*n,n,cudaMemcpyHostToDevice); cudaMemcpy2D(bc,pitch_b,b,sizeof(float)*ldb,sizeof(float)*n,n,cudaMemcpyHostToDevice); //intblocks=(n+NUM_THREADS-1)/NUM_THREADS; matMultCUDA<<<n,NUM_THREADS,sizeof(float)*n>>>(ac,pitch_a/sizeof(float),bc,pitch_b/sizeof(float),cc,pitch_c/sizeof(float),n); cudaMemcpy2D(c,sizeof(float)*ldc,cc,pitch_c,sizeof(float)*n,n,cudaMemcpyDeviceToHost); cudaFree(ac); cudaFree(bc); cudaFree(cc); end=clock(); returnend-start;}__global__staticvoidmatMultCUDA(constfloat*a,size_tlda,constfloat*b,size_tldb,float*c,size_tldc,intn){ extern__shared__floatdata[]; constinttid=threadIdx.x; constintrow=blockIdx.x; inti,j; for(i=tid;i<n;i+=blockDim.x){ data[i]=a[row*lda+i]; } __syncthreads(); for(j=tid;j<n;j+=blockDim.x){ floatt=0; floaty=0; for(i=0;i<n;i++){ floatr; y-=data[i]*b[i*ldb+j]; r=t-y; y=(r-t)+y; t=r; } c[row*ldc+j]=t; }}Win32全局變量Win32事件Win32CriticalSectionWin32MutexesLinux#include<pthread.h>#include<stdlib.h>#defineMAX_THREADS512void*compute_pi(void*);....main(){...pthread_tp_threads[MAX_THREADS];pthread_attr_tattr;pthread_attr_init(&attr);for(i=0;i<num_threads;i++){hits[i]=i;pthread_create(&p_threads[i],&attr,compute_pi,(void*)&hits[i]);}for(i=0;i<num_threads;i++){pthread_join(p_threads[i],NULL);total_hits+=hits[i];}void*compute_pi(void*s){……pthread_exit(0);}Linux#include "unpthread.h"void *copyto(void*);staticint sockfd; /*globalforboththreadstoaccess*/staticFILE *fp;voidstr_cli(FILE*fp_arg,intsockfd_arg){ char recvline[MAXLINE]; pthread_t tid; sockfd=sockfd_arg; /*copyargumentstoexternals*/ fp=fp_arg; Pthread_create(&tid,NULL,copyto,NULL); while(Readline(sockfd,recvline,MAXLINE)>0) Fputs(recvline,stdout);}void*copyto(void*arg){ char sendline[MAXLINE]; while(Fgets(sendline,MAXLINE,fp)!=NULL) Writen(sockfd,sendline,strlen(sendline)); Shutdown(sockfd,SHUT_WR); /*EOFonstdin,sendFIN*/ return(NULL); /*return(i.e.,threadterminates)whenend-of-fileonstdin*/}#include "unpthread.h"staticvoid *doit(void*); /*eachthreadexecutesthisfunction*/intmain(intargc,char**argv){ int listenfd,connfd; socklen_t addrlen,len; structsockaddr *cliaddr; if(argc==2) listenfd=Tcp_listen(NULL,argv[1],&addrlen); elseif(argc==3) listenfd=Tcp_listen(argv[1],argv[2],&addrlen); else err_quit("usage:tcpserv01[<host>]<serviceorport>"); cliaddr=Malloc(addrlen); for(;;){ len=addrlen; connfd=Accept(listenfd,cliaddr,&len); Pthread_create(NULL,NULL,&doit,(void*)connfd); }}staticvoid*doit(void*arg){ Pthread_detach(pthread_self()); str_echo((int)arg); /*samefunctionasbefore*/ Close((int)arg); /*wearedonewithconnectedsocket*/ return(NULL);}Linuxmutexmain(){....pthread_mutex_init(&minimum_value_lock,NULL);....}void*find_min(void*list_ptr){....pthread_mutex_lock(&minimum_value_lock);if(my_min<minimum_value)minimum_value=my_min;/*andunlockthemutex*/pthread_mutex_unlock(&minimum_value_lock);#include<stdio.h>#include<pthread.h>#defineTHREAD_NUMBER10pthread_mutex_tmutex=PTHREAD_MUTEX_INITIALIZER;pthread_cond_tcond=PTHREAD_COND_INITIALIZER;intsum=0;void*th_counter(void*argc){inti;i=*(int*)argc;sleep(1);pthread_mutex_lock(&mutex);sum=sum+i;if(sum>10)pthread_cond_signal(&cond);pthread_mutex_unlock(&mutex);printf("count%disover\n",i);return;}void*waitsum(void*argc){pthread_mutex_lock(&mutex);while(sum<=10)pthread_cond_wait(&cond,&mutex);printf("Getasignalthatthesumhasbeenupto10!\n");pthread_mutex_unlock(&mutex);}intmain(void){pthread_tpt[THREAD_NUMBER];inti;intarg[THREAD_NUMBER];pthread_create(&pt[THREAD_NUMBER-1],NULL,waitsum,NULL);for(i=0;i<THREAD_NUMBER-1;i++){arg[i]=i;pthread_create(&pt[i],NULL,th_counter,(void*)&arg[i]);}for(i=0;i<THREAD_NUMBER;i++)pthread_detach(pt[i]);//pthread_join(pt[i],NULL);printf("Themainthreadiswaitingforallthethreadsfinishing...\n");sleep(5);printf("sumis%d\n",sum);pthread_mutex_destroy(&mutex);pthread_cond_destroy(&cond);return0;}Linux生產者消費者pthread_cond_tcond_queue_empty,cond_queue_full;pthread_mutex_ttask_queue_cond_lock;inttask_available;/*otherdatastructureshere*/main(){/*declarationsandinitializations*/task_available=0;pthread_init();pthread_cond_init(&cond_queue_empty,NULL);pthread_cond_init(&cond_queue_full,NULL);pthread_mutex_init(&task_queue_cond_lock,NULL);/*createandjoinproducerandconsumerthreads*/}void*producer(void*producer_thread_data){intinserted;while(!done()){create_task();pthread_mutex_lock(&task_queue_cond_lock);while(task_available==1)pthread_cond_wait(&cond_queue_empty,task_queue_cond_lock);insert_into_queue();task_available=1;pthread_cond_signal(&cond_queue_full);pthread_mutex_unlock(&task_queue_cond_lock);}}void*consumer(void*consumer_thread_data){while(!done()){pthread_mutex_lock(&task_queue_cond_lock);while(task_available==0)pthread_cond_wait(&cond_queue_full,&task_queue_cond_lock);my_task=extract_from_queue();task_available=0;pthread_cond_signal(&cond_queue_empty);pthread_mutex_unlock(&task_queue_cond_lock);process_task(my_task);}}Linux讀寫鎖typedefstruct{intreaders;intwriter;pthread_cond_treaders_proceed;pthread_cond_twriter_proceed;intpending_writers;pthread_mutex_tread_write_lock;}mylib_rwlock_t;voidmylib_rwlock_init(mylib_rwlock_t*l){l->readers=l->writer=l->pending_writers=0;pthread_mutex_init(&(l->read_write_lock),NULL);pthread_cond_init(&(l->readers_proceed),NULL);pthread_cond_init(&(l->writer_proceed),NULL);}voidmylib_rwlock_rlock(mylib_rwlock_t*l){/*ifthereisawritelockorpendingwriters,performconditionwait..elseincrementcountofreadersandgrantreadlock*/pthread_mutex_lock(&(l->read_write_lock));while((l->pending_writers>0)||(l->writer>0))pthread_cond_wait(&(l->readers_proceed),&(l->read_write_lock));l->readers++;pthread_mutex_unlock(&(l->read_write_lock));}voidmylib_rwlock_wlock(mylib_rwlock_t*l){/*iftherearereadersorwriters,incrementpendingwriterscountandwait.Onbeingwoken,decrementpendingwriterscountandincrementwritercount*/pthread_mutex_lock(&(l->read_write_lock));while((l->writer>0)||(l->readers>0)){l->pending_writers++;pthread_cond_wait(&(l->writer_proceed),&(l->read_write_lock));}l->pending_writers--;l->writer++;pthread_mutex_unlock(&(l->read_write_lock));}voidmylib_rwlock_unlock(mylib_rwlock_t*l){/*ifthereisawritelockthenunlock,elseiftherearereadlocks,decrementcountofreadlocks.Ifthecountis0andthereisapendingwriter,letitthrough,elseiftherearependingreaders,letthemallgothrough*/pthread_mutex_lock(&(l->read_write_lock));if(l->writer>0)l->writer=0;elseif(l->readers>0)l->readers--;pthread_mutex_unlock(&(l->read_write_lock

溫馨提示

  • 1. 本站所有資源如無特殊說明,都需要本地電腦安裝OFFICE2007和PDF閱讀器。圖紙軟件為CAD,CAXA,PROE,UG,SolidWorks等.壓縮文件請下載最新的WinRAR軟件解壓。
  • 2. 本站的文檔不包含任何第三方提供的附件圖紙等,如果需要附件,請聯(lián)系上傳者。文件的所有權益歸上傳用戶所有。
  • 3. 本站RAR壓縮包中若帶圖紙,網頁內容里面會有圖紙預覽,若沒有圖紙預覽就沒有圖紙。
  • 4. 未經權益所有人同意不得將文件中的內容挪作商業(yè)或盈利用途。
  • 5. 人人文庫網僅提供信息存儲空間,僅對用戶上傳內容的表現方式做保護處理,對用戶上傳分享的文檔內容本身不做任何修改或編輯,并不能對任何下載內容負責。
  • 6. 下載文件中如有侵權或不適當內容,請與我們聯(lián)系,我們立即糾正。
  • 7. 本站不保證下載資源的準確性、安全性和完整性, 同時也不承擔用戶因使用這些下載資源對自己和他人造成任何形式的傷害或損失。

評論

0/150

提交評論