96.24% accuracy with higher epoch numbers for Convolutional Neural Network
For Convolutional Neural Network, there are a lots of factors affect the model accuracy.
Factors included the structure of Convolutional Neural Network, hyper parameters values, over-fitting and etc.
Only correct or suitable network structure can produce high model accuracy. If wrong network structure is used, whatever you tune the hyper parameters, the accuracy will still worse.
So, after we found the right network structure, only we can start to tune the hyper parameters. However, over-fitting may happened on your model accuracy. There are several methods to prevent that.
One of the over-fitting prevention is by using the dropout function. Dropout give the model a way to find alternative way to train the model by removed some characteristic according to the ratio.
Another way to prevent over-fitting is by increasing the number of samples. Besides, we can also simplify the network structure. Some complicated network structure sometimes can cause over-fitting too.
Finally, there's one function is very useful in model training progress. Batch normalization can increase the learning rate, increase accuracy and whitening the inputs of layers.
Here's the examples of train mode by increasing the number of epoch but same structure with previous model ConvNet test1.
from __future__ import print_function
from six.moves import cPickle as pickle
import os
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.optimizers import RMSprop
from keras.layers.normalization import BatchNormalization
batch_sizes = 64
num_classes = 10
epochs = 20
train_samples = 100000
def load_data():
test_filename = "notMNIST.pickle"
if os.path.exists(test_filename):
with open(test_filename, 'rb') as f:
letter_dataset = pickle.load(f)
return (letter_dataset)
lt_dt = load_data()
x_train = lt_dt['train_dataset'][:train_samples]
y_train = lt_dt['train_labels'][:train_samples]
x_valid = lt_dt['valid_dataset']
y_valid = lt_dt['valid_labels']
x_test = lt_dt['test_dataset']
y_test = lt_dt['test_labels']
# input image dimensions
img_rows, img_cols = 28, 28
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_valid = x_valid.reshape(x_valid.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_valid = x_valid.reshape(x_valid.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_valid = x_valid.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_valid /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_valid.shape[0], 'valid samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_valid = keras.utils.to_categorical(y_valid, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=input_shape))
model.add(BatchNormalization(axis=-1))
model.add(Conv2D(64, (3, 3), activation='relu'))
#model.add(MaxPooling2D(pool_size=(2, 2)))
#model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu'))
#model.add(BatchNormalization(axis=-1))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.summary()
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.fit(x_train, y_train,
batch_size=batch_sizes,
epochs=epochs,
verbose=1,
validation_data=(x_valid, y_valid))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
#Save model into pickle
from keras.models import model_from_json
# serialize model to JSON
model_json = model.to_json()
with open("conv2D_test5.json", "w") as json_file:
json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("conv2D_test5.h5")
print("Saved model to disk")
Outputs:
#Plot ROC
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
def generate_results(y_test, y_score):
fpr, tpr, _ = roc_curve(y_test, y_score)
roc_auc = auc(fpr, tpr)
plt.figure()
lw = 2
plt.plot(fpr, tpr, lw=lw, label='ROC curve for average class (area = %0.5f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.05])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve for ConvNet for %d samples' %sample_size)
plt.legend(loc="lower right")
plt.show()
print('AUC: %f' % roc_auc)
print('Predicting on test data')
y_score = model.predict(x_test)
print('Generating results')
generate_results(y_test[:, 0], y_score[:, 0])
ROC Curve: