From f8a9ff6afafccfa0785d0b488d0a8a0e598ec558 Mon Sep 17 00:00:00 2001 From: Carlos Teijeiro Date: Tue, 12 Mar 2019 10:46:16 +0100 Subject: [PATCH] New CUDA codes: vector addition --- README.md | 11 +++- vector_add/c/solution/vector_add.cu | 70 ++++++++++++++++++++++ vector_add/c/vector_add.cu | 63 +++++++++++++++++++ vector_add/python/README.md | 6 ++ vector_add/python/solution/vector_add_1.py | 22 +++++++ vector_add/python/solution/vector_add_2.py | 31 ++++++++++ vector_add/python/solution/vector_add_3.py | 41 +++++++++++++ vector_add/python/vector_add_1.py | 22 +++++++ vector_add/python/vector_add_2.py | 28 +++++++++ vector_add/python/vector_add_3.py | 40 +++++++++++++ 10 files changed, 332 insertions(+), 2 deletions(-) create mode 100644 vector_add/c/solution/vector_add.cu create mode 100644 vector_add/c/vector_add.cu create mode 100644 vector_add/python/README.md create mode 100644 vector_add/python/solution/vector_add_1.py create mode 100644 vector_add/python/solution/vector_add_2.py create mode 100644 vector_add/python/solution/vector_add_3.py create mode 100644 vector_add/python/vector_add_1.py create mode 100644 vector_add/python/vector_add_2.py create mode 100644 vector_add/python/vector_add_3.py diff --git a/README.md b/README.md index 85380fd..ebc27f3 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,10 @@ -# CUDA +# GPU programming with CUDA + +The present project contains exercises using CUDA C and PyCUDA for GPU programming using C and Python respectively. + +In particular, the Python codes are presented without any print statement for compatibility purposes, so that they can be directly executed in an interactive session or inside a Jupyter notebook. + +## Exercises + + - [Vector addition](vector_add) Simple vector addition. Level: **basic** -CUDA codes \ No newline at end of file diff --git a/vector_add/c/solution/vector_add.cu b/vector_add/c/solution/vector_add.cu new file mode 100644 index 0000000..e432242 --- /dev/null +++ b/vector_add/c/solution/vector_add.cu @@ -0,0 +1,70 @@ +#include +#include +// CUDA Kernel +__global__ void +vectorAdd(const float *A, const float *B, float *C, int numElements) +{ + // Define a target element "i" in terms of block and thread identifiers + int i = blockDim.x * blockIdx.x + threadIdx.x; + + // Perform the vector addition checking the limits of the array! + if (i < numElements) + { + C[i] = A[i] + B[i]; + } +} + +/** + * Host main routine + */ +int +main(void) +{ + int numElements = 150000; + size_t size = numElements * sizeof(float); + printf("[Vector addition of %d elements]\n", numElements); + + float a[numElements],b[numElements],c[numElements]; + float *a_gpu,*b_gpu,*c_gpu; + + // Allocate device global memory + cudaMalloc((void **)&a_gpu, size); + cudaMalloc((void **)&b_gpu, size); + cudaMalloc((void **)&c_gpu, size); + + for (int i=0;i>>(a_gpu, b_gpu, c_gpu, numElements); + + // Copy the device result vector in device memory to the host result vector + // in host memory. + printf("Copy output data from the CUDA device to the host memory\n"); + cudaMemcpy(c, c_gpu, size, cudaMemcpyDeviceToHost); + + // Free device global memory + cudaFree(a_gpu); + cudaFree(b_gpu); + cudaFree(c_gpu); + + for (int i=0;i +#include +// CUDA Kernel +__global__ void +vectorAdd(const float *A, const float *B, float *C, int numElements) +{ + // Define a target element "i" in terms of block and thread identifiers + + // Perform the vector addition checking the limits of the array! +} + +/** + * Host main routine + */ +int +main(void) +{ + int numElements = 150000; + size_t size = numElements * sizeof(float); + printf("[Vector addition of %d elements]\n", numElements); + + float a[numElements],b[numElements],c[numElements]; + float *a_gpu,*b_gpu,*c_gpu; + + // Allocate device global memory + // ... + // ... + // ... + + for (int i=0;i