###########################################################################################
# digit code source: #
# “Handwritten Digit Recognition with Scikit-Learn.” #
# The Data Frog, thedatafrog.com/en/articles/handwritten-digit-recognition-scikit-learn/. #
###########################################################################################
from sklearn import datasets
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from scipy.io import loadmat
import numpy as np
def show_digit(i):
fig = plt.figure(figsize=(4,4))
#plt.subplot(4,4,i+1)
plt.imshow(i, cmap='binary')
#plt.title(digits.target[i])
plt.axis('off')
plt.show()
digits = datasets.load_digits()
#####################################################################################################
# conv and pool functions code source: #
# https://towardsdatascience.com/a-guide-to-convolutional-neural-networks-from-scratch-f1e3bfc3e2de #
#####################################################################################################
def relu(Z):
return np.maximum(0, Z)
def conv(x, filt, bias, stride, pad):
"""
- N: number of images
- H: height (number of rows)
- W: width (number of columns)
- F: number of filters
- bias: array of biases to apply to each filter
- stride: stride value to apply to each image when convolving
- pad: number of rows and columns to zero-pad around image
"""
N, H, W = x.shape
F, HH, WW = filt.shape
# get output dimensions
H_out = 1 + (H + 2 * pad - HH) // stride
W_out = 1 + (W + 2 * pad - WW) // stride
out = np.zeros((N, F, H_out, W_out))
pad_widths = ((0,), (pad,), (pad,))
xpad = np.pad(x, pad_widths, 'constant')
Npad, Hpad, Wpad = xpad.shape
# perform convolution
for n in range(N):
for f in range(F):
for i in range(0, Hpad-(HH-1), stride):
for j in range(0, Wpad-(WW-1), stride):
prod = np.sum(np.multiply(filt[f], xpad[n, i:i+HH, j:j+WW]))
out[n, f, int(i/stride), int(j/stride)] = prod + bias[f]
cache = (x, filt, bias, stride, pad)
return out, cache
def pool(x, Hp, Wp, stride):
"""
- N: number of images
- F: number of filters applied to each image
- H: height (number of rows)
- W: width (number of columns)
- Hp: height to pool by
- Wp: width to pool by
- stride: stride value to apply to each image when pooling
"""
# currently does max pooling
# get output dimensions
N, F, H, W = x.shape
H_out = (H - Hp) // stride + 1
W_out = (W - Wp) // stride + 1
out = np.zeros((N, F, H_out, W_out))
# pool
for n in range(N):
for f in range(F):
for i in range(H_out):
for j in range(W_out):
out[n, f, i, j] = np.max(x[n, f, i*stride:i*stride+Hp, j*stride:j*stride+Wp])
cache = (x, Hp, Wp, stride)
return out, cache
def CNN(filts_size, numFilts, convStride, poolStride, activ, hidden1, hidden2):
"""
- filts_size: specifies the size of filters to use. Can be 2 (2x2), 3 (3x3), or 23 (both)
- numFilts: specifies the number of filters to use. If unused, just set to large number.
- convStride: stride of the convolutional layer
- poolStride: stride of the pooling layer
- activ: the activation function to use in the fully-connected network. Can be 'identity', 'logistic', 'relu', 'softmax', 'tanh'
- hidden1: The number of nodes to use in the first hidden layer
- hidden2: The number of nodes to use in the second hidden layer
"""
### Preprocessing input
# Flatten images
# load UCI ML hand-written digits
y = digits.target
x = digits.images.reshape((len(digits.images), -1))
dim = 8
# Split data into training and testing
x_train = x[:1000]
y_train = y[:1000]
x_test = x[1000:]
y_test = y[1000:]
# some example filters to apply at the convolutional layer
rand_filt2 = np.random.rand(2, 2)
identity2 = np.array([[1, 0],[0,0]])
top_edge_filt2 = np.array([[-1, -1],[1, 1]])
bot_edge_filt2 = np.array([[1, 1],[-1, -1]])
rgt_edge_filt2 = np.array([[-1, 1],[-1, 1]])
lft_edge_filt2 = np.array([[1, -1],[1, -1]])
diag_filt2 = np.array([[1, -1],[-1, 1]])
back_diag_filt2 = np.array([[-1, 1],[1, -1]])
rand_filt3 = np.random.rand(3, 3)
identity3 = np.array([[0,0,0],[0,1,0],[0,0,0]])
top_edge_filt3 = np.array([[-1, -1,-1],[0,0,0],[1,1,1]])
bot_edge_filt3 = np.array([[1,1,1],[0,0,0],[-1,-1,-1]])
rgt_edge_filt3 = np.array([[-1,0,1],[-1,0,1],[-1,0,1]])
lft_edge_filt3 = np.array([[1,0,-1],[1,0,-1],[1,0,-1]])
diag_filt3 = np.array([[1,0,-1],[0,0,0],[-1,0,1]])
back_diag_filt3 = np.array([[-1,0,1],[0,0,0],[1,0,-1]])
top_edge_filt23 = np.array([[-1, -1,0],[1, 1,0],[0,0,0]])
bot_edge_filt23 = np.array([[1, 1,0],[-1, -1,0],[0,0,0]])
rgt_edge_filt23 = np.array([[-1, 1,0],[-1, 1,0],[0,0,0]])
lft_edge_filt23 = np.array([[1, -1,0],[1, -1,0],[0,0,0]])
diag_filt23 = np.array([[1, -1,0],[-1, 1,0],[0,0,0]])
back_diag_filt23 = np.array([[-1, 1,0],[1, -1,0],[0,0,0]])
x_train = x_train.reshape(x_train.shape[0],dim,dim)
if (filts_size == 2):
filts = np.array([rand_filt2, identity2, top_edge_filt2, bot_edge_filt2, rgt_edge_filt2, lft_edge_filt2, diag_filt2, back_diag_filt2])
elif (filts_size == 3):
filts = np.array([rand_filt3, identity3, top_edge_filt3, bot_edge_filt3, rgt_edge_filt3, lft_edge_filt3, diag_filt3, back_diag_filt3])
elif (filts_size == 23):
filts = np.array([rand_filt3, identity3, top_edge_filt3, bot_edge_filt3, rgt_edge_filt3, lft_edge_filt3, diag_filt3, back_diag_filt3, top_edge_filt23, bot_edge_filt23, rgt_edge_filt23, lft_edge_filt23, diag_filt23, back_diag_filt23])
if (numFilts < filts.size):
filts = filts[:numFilts]
bias = np.zeros((filts.shape[0], 1))
### Set up the CNN for training
# Layer 1: convolutional
y0, c0 = conv(x_train,filts,bias,convStride,1)
# Layer 2: ReLu activation
N, F, H, W = y0.shape
for n in range(N):
for f in range(F):
y0[n,f,:,:] = relu(y0[n,f,:,:])
# Layer 3: Pooling
p0, c1 = pool(y0, 2, 2, poolStride)
# Flatten input for NN
p0 = p0.reshape(y_train.size, int(p0.size/y_train.size))
N, F = p0.shape
# Train sklearn's neural net module using the output of our pooling layer
clf = MLPClassifier(activation=activ, solver='sgd', alpha=1e-5,hidden_layer_sizes=(hidden1, hidden2), max_iter=1000, random_state=1)
clf.fit(p0, y_train)
y_hat_train = clf.predict(p0)
print("Training Error Rate: ", np.count_nonzero(y_hat_train - y_train)/y_train.shape[0])
### Test the CNN
# Feed the testing data forward through conv and pooling layers
# Layer 1: convolutional
x_test = x_test.reshape(x_test.shape[0],dim,dim)
y0, c0 = conv(x_test,filts,bias,convStride,1)
# Layer 2: ReLu activation
N, F, H, W = y0.shape
for n in range(N):
for f in range(F):
y0[n,f,:,:] = relu(y0[n,f,:,:])
# Layer 3: Pooling
p0, c1 = pool(y0, 2, 2, poolStride)
# Flatten input for NN
p0 = p0.reshape(N, p0[0].size)
N, F = p0.shape
# Layer 4: predict with sklearn neural network
y_hat = clf.predict(p0)
print("Testing Error Rate: ", np.count_nonzero(y_hat - y_test)/y_test.shape[0])