vector_add_3.py

# Required imports for the exercise
from pycuda import autoinit
from pycuda import gpuarray
import numpy as np
from pycuda.compiler import SourceModule

# Initialize vectors "a" and "b" with some numbers,
# and vector "c" with zeros
# Use numpy to define single precision vectors
# a = ...
# b = ...
# c = ...

# Create the corresponding vectors in GPU memory
# a_gpu = ...
# b_gpu = ...
# c_gpu = ...

# CREATE A CUDA C FILE WITH A VECTOR ADDITION KERNEL
# (call it vector_add.cu)

# Open and read the file with CUDA code
# ...

# Create the source module for the code
# ...

# Import the kernel
# ...

# Define the desired number of threads per block,
# blocks per grid and grids per block 
# ...

# Execute the imported kernel with the previous data layout
# ...

# Get the result
# ...