Friday, April 1, 2016

Implementation of Odd-Even Sort using CUDA


 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
/*
 * This is a program to implement Odd-Even Sort using CUDA
 * It uses N/2 threads
 */
#include<cuda.h>
#include<stdio.h>
#include<time.h>
#include<stdlib.h>
#define N 1024

__global__ void oddeven(int *a, int flag)
{
 int index = blockIdx.x * blockDim.x + threadIdx.x;
  int temp;
  if((index >= N/2 - 1) && flag % 2 != 0) return;

  if(flag % 2 == 0) //if even phase
  {
    if(a[index *2 ] > a[index * 2 + 1])
    {
      temp = a[index * 2];
      a[index * 2] = a[index *2 +1];
      a[index * 2 +1] = temp;
    }
  }
  else { //if odd phase
    if(a[index * 2 +1 ] > a[index *2 + 2])
  {
      temp = a[index * 2 + 1];
      a[index * 2 + 1] = a[index*2+2];
      a[index*2+2] = temp;
    }
  }
}

int main()
{
 int *a;
  int *deva;
  int i;
  int size = sizeof(int) * N;
  srand((unsigned)time(NULL));

  //allocate memory in host
 a = (int *)malloc(size);

  //allocate memory in CUDA (device) memory
  cudaMalloc((void **)&deva, size);

  //puting some random values in memory for generating data for sorting
  for(i=0;i<N;i++)
  {
    a[i] = rand()%N;
  }

 printf("\nNumbers before sorting: ");
  for(i=0;i<N;i++)
   {
     printf("%d ", a[i]);
   }

 //recording starting time
 double start_time = clock();  

 //copy host memory data in CUDA (device) memory 
  cudaMemcpy(deva, a, size, cudaMemcpyHostToDevice);

  // launch a kernel N-1 times for Odd-even sort
  for(i=0;i<N;i++)
  {
    oddeven<<<N/1024, 512>>>(deva, i); //512 threads per block and total N/2/512 blocks
   }

  //copy the result back into host memory
  cudaMemcpy(a, deva, size, cudaMemcpyDeviceToHost);

  //Lets see the execution time
  printf("\nExecution time : %lf seconds", (clock()-start_time)/CLOCKS_PER_SEC);

  //print the result
  printf("\nOutput: ");
  for(i=0;i<N;i++)
  {
    printf("%d ", a[i]);
  }
  return 0;
}