Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# Required imports for the exercise
from pycuda import autoinit
from pycuda import gpuarray
import numpy as np
from pycuda.compiler import SourceModule
# Initialize vectors "a" and "b" with some numbers,
# and vector "c" with zeros
# Use numpy to define single precision vectors
# a = ...
# b = ...
# c = ...
# Create the corresponding vectors in GPU memory
# a_gpu = ...
# b_gpu = ...
# c_gpu = ...
# CREATE A CUDA C FILE WITH A VECTOR ADDITION KERNEL
# (call it vector_add.cu)
# Open and read the file with CUDA code
# ...
# Create the source module for the code
# ...
# Import the kernel
# ...
# Define the desired number of threads per block,
# blocks per grid and grids per block
# ...
# Execute the imported kernel with the previous data layout
# ...
# Get the result
# ...