Commit 1e62375b authored by Agnes Ansari's avatar Agnes Ansari
Browse files

add cifar10 benchmark

parents
from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Input, Conv2D, MaxPooling2D
import tensorflow as tf
from keras.utils import multi_gpu_model
from keras.callbacks import TensorBoard
import os
import time
from datetime import date
start = time.time()
today = date.today()
num_gpus = int(os.environ.get('CIFAR10_NUM_GPUS'))
num_cores = int(os.environ.get('CIFAR10_NUM_CORES'))
batch_size = int(os.environ.get('CIFAR10_BATCHSIZE'))
num_classes = 10
epochs = 200
num_predictions = 20
save_dir = os.environ.get('CIFAR10_PATH_SAVED_MODELS')
log_dir = os.path.join(os.environ.get('CIFAR10_PATH_TENSORBOARD_LOGS'), "".join(['gpus',str(num_gpus),'_cores', str(num_cores),'_batchsize',str(batch_size),'_',str(today.year),'_',str(today.month),'_',str(today.day)]))
model_name = 'keras_cifar10_trained_model_gpus'+str(num_gpus)+'_cores'+str(num_cores)+'_batchsize'+str(batch_size)+'_'+str(today.year)+'_'+str(today.month)+'_'+str(today.day)+'.h5'
if not os.path.isdir(save_dir):
os.makedirs(save_dir)
if not os.path.isdir(log_dir):
os.makedirs(log_dir)
# The data, shuffled and split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print('y_train shape:', y_train.shape)
num_train, height, width, depth = x_train.shape
print (x_train.shape)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy',
optimizer=opt,
metrics=['accuracy'])
parallel_model = multi_gpu_model(model, gpus=num_gpus)
parallel_model.compile(loss='categorical_crossentropy',
optimizer=opt,
metrics=['accuracy'])
tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=True, write_grads=False, write_images=False)
parallel_model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(x_test, y_test),
shuffle=True,
callbacks=[tensorboard])
print('###############################\n')
# Save model and weights
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Saved trained model: %s \n' % model_path)
print('Tensorboard log: %s \n' % log_dir)
# Score trained model.
scores = model.evaluate(x_test, y_test, verbose=1)
print('GPUS:', num_gpus, '\n')
print('Batch size:', batch_size, '\n')
print('Test loss:', scores[0], '\n')
print('Test accuracy:', scores[1], '\n')
end = time.time()
print('Total duration:', ((end - start)/60), 'minutes')
from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Input, Conv2D, MaxPooling2D
import tensorflow as tf
from keras.utils import multi_gpu_model
from keras.callbacks import TensorBoard
import os
import time
from datetime import date
start = time.time()
today = date.today()
num_gpus = int(os.environ.get('CIFAR10_NUM_GPUS'))
num_cores = int(os.environ.get('CIFAR10_NUM_CORES'))
batch_size = int(os.environ.get('CIFAR10_BATCHSIZE'))
num_classes = 10
epochs = 200
num_predictions = 20
save_dir = os.environ.get('CIFAR10_PATH_SAVED_MODELS')
log_dir = os.path.join(os.environ.get('CIFAR10_PATH_TENSORBOARD_LOGS'), "".join(['gpus',str(num_gpus),'_cores', str(num_cores),'_batchsize',str(batch_size),'_',str(today.year),'_',str(today.month),'_',str(today.day)]))
model_name = 'keras_cifar10_trained_model_gpus'+str(num_gpus)+'_cores'+str(num_cores)+'_batchsize'+str(batch_size)+'_'+str(today.year)+'_'+str(today.month)+'_'+str(today.day)+'.h5'
if not os.path.isdir(save_dir):
os.makedirs(save_dir)
if not os.path.isdir(log_dir):
os.makedirs(log_dir)
# The data, shuffled and split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print('y_train shape:', y_train.shape)
num_train, height, width, depth = x_train.shape
print (x_train.shape)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy',
optimizer=opt,
metrics=['accuracy'])
tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=True, write_grads=False, write_images=False)
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(x_test, y_test),
shuffle=True,
callbacks=[tensorboard])
print('###############################\n')
# Save model and weights
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Saved trained model: %s \n' % model_path)
print('Tensorboard log: %s \n' % log_dir)
# Score trained model.
scores = model.evaluate(x_test, y_test, verbose=1)
print('GPUS:', num_gpus, '\n')
print('Batch size:', batch_size, '\n')
print('Test loss:', scores[0], '\n')
print('Test accuracy:', scores[1], '\n')
end = time.time()
print('Total duration:', ((end - start)/60), 'minutes')
#!/bin/bash
export CIFAR10_PATH_BASE=<path_to_this_repo>
export CIFAR10_PYTHON_FILE_NAME=<cifar10_cnn_single_gpu.py or cifar10_cnn_multi_gpu.py>
export CIFAR10_NUM_GPUS=<1-4>
export CIFAR10_BATCHSIZE=<64-2048>
export CIFAR10_NUM_CORES=<1-16>
export CIFAR10_PATH_SAVED_MODELS=<path_to_saved_models_folder>
export CIFAR10_PATH_TENSORBOARD_LOGS=<path_to_tensorboard_logs_folder>
export KERAS_BACKEND=tensorflow
How to run this benchmark
=========================
Requirements: Python (tested with python3) with tensorflow-gpu (or tensoflow at least) and keras modules
Step 2: modify config.sh in order to have appropriate environment variables that fit your infrastructure
Step 3: run the benchmark with this command
./run.sh <path_to_config_file>
As CIFAR10 is a very common dataset, Keras has a built-in, ready to use function to download it (in ~/.keras/datasets) and create train and test sets:
from keras.datasets import cifar10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
outputs:
- model (located in CIFAR10_PATH_SAVED_MODELS)
- logs (located in CIFAR10_PATH_TENSORBOARD_LOGS)
To view logs, run this command:
tensorboard --logdir <CIFAR10_PATH_TENSORBOARD_LOGS>/gpus<CIFAR10_NUM_GPUS>_cores<CIFAR10_NUM_CORES>_batchsize<CIFAR10_BATCHSIZE>_<year>_<month>_<day>/
This command will start a local web server and provide beautiful graphs and diagrams through html pages
#!/bin/bash
CONFIG=$1
source $CONFIG
echo "####### CONFIG #######"
echo "CIFAR10_PATH_BASE: $CIFAR10_PATH_BASE"
echo "CIFAR10_PYTHON_FILE_NAME: $CIFAR10_PYTHON_FILE_NAME"
echo "CIFAR10_NUM_GPUS: $CIFAR10_NUM_GPUS"
echo "CIFAR10_NUM_CORES: $CIFAR10_NUM_CORES"
echo "CIFAR10_BATCHSIZE: $CIFAR10_BATCHSIZE"
echo "CIFAR10_PATH_SAVED_MODELS: $CIFAR10_PATH_SAVED_MODELS"
echo "CIFAR10_PATH_TENSORBOARD_LOGS: $CIFAR10_PATH_TENSORBOARD_LOGS"
python $CIFAR10_PATH_BASE/$CIFAR10_PYTHON_FILE_NAME
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment