# Required imports for the exercise from pycuda import autoinit from pycuda import gpuarray import numpy as np from pycuda.elementwise import ElementwiseKernel # Initialize vectors "a" and "b" with some numbers, # and vector "c" with zeros # Use numpy to define single precision vectors aux = range(150000) a = np.array(aux).astype(np.float32) b = (a*a).astype(np.float32) c = np.zeros(len(aux)).astype(np.float32) # Create the corresponding vectors in GPU memory a_gpu = gpuarray.to_gpu(a) b_gpu = gpuarray.to_gpu(b) c_gpu = gpuarray.to_gpu(c) # Define a CUDA function for vector addition # using an element-wise kernel myCudaFunc = ElementwiseKernel(arguments = "float *a, float *b, float *c", operation = "c[i] = a[i]+b[i]", name = "mySumK") # Call the created function myCudaFunc(a_gpu,b_gpu,c_gpu) # Get the result c = c_gpu.get()