1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | /* * This is a program to implement Odd-Even Sort using CUDA * It uses N/2 threads */ #include<cuda.h> #include<stdio.h> #include<time.h> #include<stdlib.h> #define N 1024 __global__ void oddeven(int *a, int flag) { int index = blockIdx.x * blockDim.x + threadIdx.x; int temp; if((index >= N/2 - 1) && flag % 2 != 0) return; if(flag % 2 == 0) //if even phase { if(a[index *2 ] > a[index * 2 + 1]) { temp = a[index * 2]; a[index * 2] = a[index *2 +1]; a[index * 2 +1] = temp; } } else { //if odd phase if(a[index * 2 +1 ] > a[index *2 + 2]) { temp = a[index * 2 + 1]; a[index * 2 + 1] = a[index*2+2]; a[index*2+2] = temp; } } } int main() { int *a; int *deva; int i; int size = sizeof(int) * N; srand((unsigned)time(NULL)); //allocate memory in host a = (int *)malloc(size); //allocate memory in CUDA (device) memory cudaMalloc((void **)&deva, size); //puting some random values in memory for generating data for sorting for(i=0;i<N;i++) { a[i] = rand()%N; } printf("\nNumbers before sorting: "); for(i=0;i<N;i++) { printf("%d ", a[i]); } //recording starting time double start_time = clock(); //copy host memory data in CUDA (device) memory cudaMemcpy(deva, a, size, cudaMemcpyHostToDevice); // launch a kernel N-1 times for Odd-even sort for(i=0;i<N;i++) { oddeven<<<N/1024, 512>>>(deva, i); //512 threads per block and total N/2/512 blocks } //copy the result back into host memory cudaMemcpy(a, deva, size, cudaMemcpyDeviceToHost); //Lets see the execution time printf("\nExecution time : %lf seconds", (clock()-start_time)/CLOCKS_PER_SEC); //print the result printf("\nOutput: "); for(i=0;i<N;i++) { printf("%d ", a[i]); } return 0; } |
Implementation of Odd-Even Sort using CUDA
Subscribe to:
Posts (Atom)
No comments:
Post a Comment