Accuracy for Logistic Regression model is always lower than using Convolutional neural network
Here's the coding for a logistic regression model with 100k samples.
The accuracy is only 86.7% compared than 96% for ConvNet.
ROC Curve for logistic is 0.97 compared to 0.99 for ConvNet.
The accuracy is only 86.7% compared than 96% for ConvNet.
ROC Curve for logistic is 0.97 compared to 0.99 for ConvNet.
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.metrics import roc_curve, auc
from sklearn.linear_model import LogisticRegression
from six.moves import cPickle as pickle
from sklearn.metrics import confusion_matrix
def load_data():
test_filename = "notMNIST.pickle"
if os.path.exists(test_filename):
with open(test_filename, 'rb') as f:
letter_dataset = pickle.load(f)
return (letter_dataset)
sample_size = 10000
print('Sample size: ', sample_size)
lt_dt = load_data()
x_train = lt_dt['train_dataset'][:sample_size]
y_train = lt_dt['train_labels'][:sample_size]
x_test = lt_dt['test_dataset']
y_test = lt_dt['test_labels']
regr = LogisticRegression(solver='sag')
x_test = x_test.reshape(x_test.shape[0], 28 * 28)
x_train = x_train.reshape(x_train.shape[0], 784)
regr.fit(x_train, y_train)
y_predict = regr.predict(x_test)
confusion_matrix(y_test, y_predict)
y_predict_proba = regr.predict_proba(x_test)
print('Accuracy: ',regr.score(x_test, y_test))
# Compute ROC curve and ROC AUC for each class
n_classes = 10
fpr = dict()
tpr = dict()
roc_auc = dict()
all_y_test_i = np.array([])
all_y_predict_proba = np.array([])
for i in range(n_classes):
y_test_i = map(lambda x: 1 if x == i else 0, y_test)
all_y_test_i = np.concatenate([all_y_test_i, y_test_i])
all_y_predict_proba = np.concatenate([all_y_predict_proba, y_predict_proba[:, i]])
fpr[i], tpr[i], _ = roc_curve(y_test_i, y_predict_proba[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
# Compute micro-average ROC curve and ROC area
fpr["average"], tpr["average"], _ = roc_curve(all_y_test_i, all_y_predict_proba)
roc_auc["average"] = auc(fpr["average"], tpr["average"])
plt.figure()
lw = 2
plt.plot(fpr["average"], tpr["average"], color='darkorange',
lw=lw, label='ROC curve for average class (area = %0.2f)' % roc_auc["average"])
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic for %d samples' %sample_size)
plt.legend(loc="lower right")
plt.show()