from pycuda import autoinit
from pycuda import gpuarray
import numpy as np

# Initialize vectors "a" and "b" with some numbers,
# and vector "c" with zeros
# Use numpy to define single precision vectors
aux = range(150000)
a = np.array(aux).astype(np.float32)
b = (a*a).astype(np.float32)
c = np.zeros(len(aux)).astype(np.float32)

# Create the corresponding vectors in GPU memory
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c_gpu = gpuarray.to_gpu(c)

# Perform the vector addition on the GPU
c_gpu = a_gpu+b_gpu

# Collect the result from GPU memory
c = c_gpu.get()