1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | /* * This is a program to implmenent matrix multiplication using CUDA * This program uses massively parallel threads to calculate resultant element */ #include<cuda.h> #include<stdio.h> #include<stdlib.h> #define N 1024 __global__ void matmul(int *a, int *b, int *c) { int row = blockIdx.y * blockDim.y + threadIdx.y; //Find row number of the resultant element int col = blockIdx.x * blockDim.x + threadIdx.x; //Find column number of the resultant element for(int k = 0;k<N;k++) { c[row * N + col] += a[row * N + k] * b[k*N + col]; } } int main() { int *a, *b, *c; int *deva, *devb, *devc; int i, j; int size = sizeof(int) * N * N; unsigned int start_time = 0; start_time = clock(); //allocate memory in host a = (int *)malloc(size); b = (int *)malloc(size); c = (int *)malloc(size); //allocate memory in CUDA (device) memory cudaMalloc((void **)&deva, size); cudaMalloc((void **)&devb, size); cudaMalloc((void **)&devc, size); //puting some values in memory for computation for(i=0;i<N;i++) { for(j=0;j<N;j++) { a[i*N + j] = b[i*N +j] = 1; c[i*N+j] = 0; } } //copy host memory data in CUDA (device) memory cudaMemcpy(deva, a, size, cudaMemcpyHostToDevice); cudaMemcpy(devb, b, size, cudaMemcpyHostToDevice); cudaMemcpy(devc, c, size, cudaMemcpyHostToDevice); // launch a kernel for doing matrix multiplication dim3 blocksize(N/16, N/16); dim3 threadsize(16, 16); matmul<<<blocksize, threadsize>>>(deva, devb, devc); //copy the result back into host memory cudaMemcpy(c, devc, size, cudaMemcpyDeviceToHost); //Lets see the execution time printf("\nExecution time of %d x %d matrix multiplication: %ld seconds ", N, N, (clock()-start_time)/CLOCKS_PER_SEC); //print the result for(i=0;i<N;i++) { printf("\n"); for(j=0;j<N;j++) { printf("%d ", c[i*N + j]); } } return 0; } |
Implementation of Matrix multiplication using CUDA
Subscribe to:
Posts (Atom)
No comments:
Post a Comment