In this challenge, you must use what you've learned to train a convolutional neural network model that classifies images of animals you might find on a safari adventure.
The training images you must use are in the /safari/training folder. Run the cell below to see an example of each image class, and note the shape of the images (which indicates the dimensions of the image and its color channels).
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
# The images are in the data/shapes folder
data_path = 'data/safari/training'
# Get the class names
classes = os.listdir(data_path)
classes.sort()
print(len(classes), 'classes:')
print(classes)
# Show the first image in each folder
fig = plt.figure(figsize=(12, 12))
i = 0
for sub_dir in os.listdir(data_path):
i+=1
img_file = os.listdir(os.path.join(data_path,sub_dir))[0]
img_path = os.path.join(data_path, sub_dir, img_file)
img = mpimg.imread(img_path)
img_shape = np.array(img).shape
a=fig.add_subplot(1, len(classes),i)
a.axis('off')
imgplot = plt.imshow(img)
a.set_title(img_file + ' : ' + str(img_shape))
plt.show()
Now that you've seen the images, use your preferred framework (PyTorch or TensorFlow) to train a CNN classifier for them. Your goal is to train a classifier with a validation accuracy of 95% or higher.
Add cells as needed to create your solution.
# CNN model using PyTorch
# Import PyTorch libraries
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
# Other libraries we'll use
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
print("Libraries imported - ready to use PyTorch", torch.__version__)
# Function to ingest data using training and test loaders
def load_dataset(data_path):
# Load all of the images
transformation = transforms.Compose([
# transform to tensors
transforms.ToTensor(),
# Normalize the pixel values (in R, G, and B channels)
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])
# Load all of the images, transforming them
full_dataset = torchvision.datasets.ImageFolder(
root=data_path,
transform=transformation
)
# Split into training (70% and testing (30%) datasets)
train_size = int(0.7 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])
# define a loader for the training data we can iterate through in 50-image batches
train_loader = torch.utils.data.DataLoader(
train_dataset,
batch_size=50,
num_workers=0,
shuffle=False
)
# define a loader for the testing data we can iterate through in 50-image batches
test_loader = torch.utils.data.DataLoader(
test_dataset,
batch_size=50,
num_workers=0,
shuffle=False
)
return train_loader, test_loader
# Get the iterative dataloaders for test and training data
train_loader, test_loader = load_dataset(data_path)
print('Data loaders ready')
# Create a neural net class
class Net(nn.Module):
# Constructor
def __init__(self, num_classes=4):
super(Net, self).__init__()
# Our images are RGB, so input channels = 3. We'll apply 12 filters in the first convolutional layer
self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
# We'll apply max pooling with a kernel size of 2
self.pool = nn.MaxPool2d(kernel_size=2)
# A second convolutional layer takes 12 input channels, and generates 12 outputs
self.conv2 = nn.Conv2d(in_channels=12, out_channels=12, kernel_size=3, stride=1, padding=1)
# A third convolutional layer takes 12 inputs and generates 24 outputs
self.conv3 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, stride=1, padding=1)
# A drop layer deletes 20% of the features to help prevent overfitting
self.drop = nn.Dropout2d(p=0.2)
# Our 200x200 image tensors will be pooled twice with a kernel size of 2. 200/2/2 is 50.
# So our feature tensors are now 50 x 50, and we've generated 24 of them
# We need to flatten these and feed them to a fully-connected layer
# to map them to the probability for each class
self.fc = nn.Linear(in_features=50 * 50 * 24, out_features=num_classes)
def forward(self, x):
# Use a relu activation function after layer 1 (convolution 1 and pool)
x = F.relu(self.pool(self.conv1(x)))
# Use a relu activation function after layer 2 (convolution 2 and pool)
x = F.relu(self.pool(self.conv2(x)))
# Select some features to drop after the 3rd convolution to prevent overfitting
x = F.relu(self.drop(self.conv3(x)))
# Only drop the features if this is a training pass
x = F.dropout(x, training=self.training)
# Flatten
x = x.view(-1, 50 * 50 * 24)
# Feed to fully-connected layer to predict class
x = self.fc(x)
# Return class probabilities via a log_softmax function
return F.log_softmax(x, dim=1)
print("CNN model class defined!")
def train(model, device, train_loader, optimizer, epoch):
# Set the model to training mode
model.train()
train_loss = 0
print("Epoch:", epoch)
# Process the images in batches
for batch_idx, (data, target) in enumerate(train_loader):
# Use the CPU or GPU as appropriate
data, target = data.to(device), target.to(device)
# Reset the optimizer
optimizer.zero_grad()
# Push the data forward through the model layers
output = model(data)
# Get the loss
loss = loss_criteria(output, target)
# Keep a running total
train_loss += loss.item()
# Backpropagate
loss.backward()
optimizer.step()
# Print metrics for every 10 batches so we see some progress
if batch_idx % 10 == 0:
print('Training set [{}/{} ({:.0f}%)] Loss: {:.6f}'.format(
batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
# return average loss for the epoch
avg_loss = train_loss / (batch_idx+1)
print('Training set: Average loss: {:.6f}'.format(avg_loss))
return avg_loss
def test(model, device, test_loader):
# Switch the model to evaluation mode (so we don't backpropagate or drop)
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
batch_count = 0
for data, target in test_loader:
batch_count += 1
data, target = data.to(device), target.to(device)
# Get the predicted classes for this batch
output = model(data)
# Calculate the loss for this batch
test_loss += loss_criteria(output, target).item()
# Calculate the accuracy for this batch
_, predicted = torch.max(output.data, 1)
correct += torch.sum(target==predicted).item()
# Calculate the average loss and total accuracy for this epoch
avg_loss = test_loss/batch_count
print('Validation set: Average loss: {:.6f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
avg_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
# return average loss for the epoch
return avg_loss
# Now use the train and test functions to train and test the model
device = "cpu"
if (torch.cuda.is_available()):
# if GPU available, use cuda (on a cpu, training will take a considerable length of time!)
device = "cuda"
print('Training on', device)
# Create an instance of the model class and allocate it to the device
model = Net(num_classes=len(classes)).to(device)
# Use an "Adam" optimizer to adjust weights
# (see https://pytorch.org/docs/stable/optim.html#algorithms for details of supported algorithms)
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Specify the loss criteria
loss_criteria = nn.CrossEntropyLoss()
# Track metrics in these arrays
epoch_nums = []
training_loss = []
validation_loss = []
# Train over 5 epochs (in a real scenario, you'd likely use many more)
epochs = 5
for epoch in range(1, epochs + 1):
train_loss = train(model, device, train_loader, optimizer, epoch)
test_loss = test(model, device, test_loader)
epoch_nums.append(epoch)
training_loss.append(train_loss)
validation_loss.append(test_loss)
%matplotlib inline
from matplotlib import pyplot as plt
plt.plot(epoch_nums, training_loss)
plt.plot(epoch_nums, validation_loss)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['training', 'validation'], loc='upper right')
plt.show()
# Pytorch doesn't have a built-in confusion matrix metric, so we'll use SciKit-Learn
from sklearn.metrics import confusion_matrix
# Set the model to evaluate mode
model.eval()
# Get predictions for the test data and convert to numpy arrays for use with SciKit-Learn
print("Getting predictions from test set...")
truelabels = []
predictions = []
for data, target in test_loader:
for label in target.cpu().data.numpy():
truelabels.append(label)
for prediction in model.cpu()(data).data.numpy().argmax(1):
predictions.append(prediction)
# Plot the confusion matrix
cm = confusion_matrix(truelabels, predictions)
plt.imshow(cm, interpolation="nearest", cmap=plt.cm.Blues)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
plt.xlabel("Predicted Shape")
plt.ylabel("Actual Shape")
plt.show()
Add code below to save your model's trained weights.
# Save the model weights
model_file = 'safari_classifier.pt'
torch.save(model.state_dict(), model_file)
del model
print('model saved as', model_file)
# Code to save your model
Now that we've trained your model, modify the following code as necessary to use it to predict the classes of the provided test images.
# Function to predict the class of an image
def predict_image(classifier, image):
import numpy
# Default value
index = 0
# Set the classifer model to evaluation mode
classifier.eval()
# Apply the same transformations as we did for the training images
transformation = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])
# Preprocess the image
image_tensor = transformation(image).float()
# Add an extra batch dimension since pytorch treats all inputs as batches
image_tensor = image_tensor.unsqueeze_(0)
# Turn the input into a Variable
input_features = Variable(image_tensor)
# Predict the class of the image
output = classifier(input_features)
index = output.data.numpy().argmax()
# Return the predicted index
return index
# Load your model
model = Net()
model.load_state_dict(torch.load(model_file))
# The images are in the data/shapes folder
test_data_path = 'data/safari/test'
# Show the test images with predictions
fig = plt.figure(figsize=(8, 12))
i = 0
for img_file in os.listdir(test_data_path):
i+=1
img_path = os.path.join(test_data_path, img_file)
img = mpimg.imread(img_path)
# Get the image class prediction
index = predict_image(model, np.array(img))
a=fig.add_subplot(1, len(classes),i)
a.axis('off')
imgplot = plt.imshow(img)
a.set_title(classes[index])
plt.show()
Hopefully, your model predicted all four of the image classes correctly!
import tensorflow
from tensorflow import keras
print('TensorFlow version:',tensorflow.__version__)
print('Keras version:',keras.__version__)
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
%matplotlib inline
# The images are in the data/shapes folder
data_folder = 'data/safari/training'
# Get the class names
classes = os.listdir(data_folder)
classes.sort()
print(len(classes), 'classes:')
print(classes)
# Show the first image in each folder
fig = plt.figure(figsize=(8, 12))
i = 0
for sub_dir in os.listdir(data_folder):
i+=1
img_file = os.listdir(os.path.join(data_folder,sub_dir))[0]
img_path = os.path.join(data_folder, sub_dir, img_file)
img = mpimg.imread(img_path)
a=fig.add_subplot(1, len(classes),i)
a.axis('off')
imgplot = plt.imshow(img)
a.set_title(img_file)
plt.show()
from tensorflow.keras.preprocessing.image import ImageDataGenerator
img_size = (200, 200)
batch_size = 30
print("Getting Data...")
datagen = ImageDataGenerator(rescale=1./255, # normalize pixel values
validation_split=0.3) # hold back 30% of the images for validation
print("Preparing training dataset...")
train_generator = datagen.flow_from_directory(
data_folder,
target_size=img_size,
batch_size=batch_size,
class_mode='categorical',
subset='training') # set as training data
print("Preparing validation dataset...")
validation_generator = datagen.flow_from_directory(
data_folder,
target_size=img_size,
batch_size=batch_size,
class_mode='categorical',
subset='validation') # set as validation data
classnames = list(train_generator.class_indices.keys())
print('Data generators ready')
# Define a CNN classifier network
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
# Define the model as a sequence of layers
model = Sequential()
# The input layer accepts an image and applies a convolution that uses 32 6x6 filters and a rectified linear unit activation function
model.add(Conv2D(32, (6, 6), input_shape=train_generator.image_shape, activation='relu'))
# Next we'll add a max pooling layer with a 2x2 patch
model.add(MaxPooling2D(pool_size=(2,2)))
# We can add as many layers as we think necessary - here we'll add another convolution and max pooling layer
model.add(Conv2D(32, (6, 6), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# And another set
model.add(Conv2D(32, (6, 6), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# A dropout layer randomly drops some nodes to reduce inter-dependencies (which can cause over-fitting)
model.add(Dropout(0.2))
# Flatten the feature maps
model.add(Flatten())
# Generate a fully-connected output layer with a predicted probability for each class
# (softmax ensures all probabilities sum to 1)
model.add(Dense(train_generator.num_classes, activation='softmax'))
# With the layers defined, we can now compile the model for categorical (multi-class) classification
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
print(model.summary())
# Train the model over 5 epochs using 30-image batches and using the validation holdout dataset for validation
num_epochs = 5
history = model.fit(
train_generator,
steps_per_epoch = train_generator.samples // batch_size,
validation_data = validation_generator,
validation_steps = validation_generator.samples // batch_size,
epochs = num_epochs)
%matplotlib inline
from matplotlib import pyplot as plt
epoch_nums = range(1,num_epochs+1)
training_loss = history.history["loss"]
validation_loss = history.history["val_loss"]
plt.plot(epoch_nums, training_loss)
plt.plot(epoch_nums, validation_loss)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['training', 'validation'], loc='upper right')
plt.show()
# Tensorflow doesn't have a built-in confusion matrix metric, so we'll use SciKit-Learn
import numpy as np
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline
print("Generating predictions from validation data...")
# Get the image and label arrays for the first batch of validation data
x_test = validation_generator[0][0]
y_test = validation_generator[0][1]
# Use the model to predict the class
class_probabilities = model.predict(x_test)
# The model returns a probability value for each class
# The one with the highest probability is the predicted class
predictions = np.argmax(class_probabilities, axis=1)
# The actual labels are hot encoded (e.g. [0 1 0], so get the one with the value 1
true_labels = np.argmax(y_test, axis=1)
# Plot the confusion matrix
cm = confusion_matrix(true_labels, predictions)
plt.imshow(cm, interpolation="nearest", cmap=plt.cm.Blues)
plt.colorbar()
tick_marks = np.arange(len(classnames))
plt.xticks(tick_marks, classnames, rotation=85)
plt.yticks(tick_marks, classnames)
plt.xlabel("Predicted Shape")
plt.ylabel("Actual Shape")
plt.show()
# Save the trained model
modelFileName = 'safari_classifier.h5'
model.save(modelFileName)
del model # deletes the existing model variable
print('model saved as', modelFileName)
from tensorflow.keras import models
import numpy as np
from random import randint
import os
%matplotlib inline
# Function to predict the class of an image
def predict_image(classifier, image):
from tensorflow import convert_to_tensor
# The model expects a batch of images as input, so we'll create an array of 1 image
imgfeatures = img.reshape(1, img.shape[0], img.shape[1], img.shape[2])
# We need to format the input to match the training data
# The generator loaded the values as floating point numbers
# and normalized the pixel values, so...
imgfeatures = imgfeatures.astype('float32')
imgfeatures /= 255
# Use the model to predict the image class
class_probabilities = classifier.predict(imgfeatures)
# Find the class predictions with the highest predicted probability
index = int(np.argmax(class_probabilities, axis=1)[0])
return index
# Load your model
model = models.load_model(modelFileName) # loads the saved model
# The images are in the data/shapes folder
test_data_path = 'data/safari/test'
# Show the test images with predictions
fig = plt.figure(figsize=(8, 12))
i = 0
for img_file in os.listdir(test_data_path):
i+=1
img_path = os.path.join(test_data_path, img_file)
img = mpimg.imread(img_path)
# Get the image class prediction
index = predict_image(model, np.array(img))
a=fig.add_subplot(1, len(classes),i)
a.axis('off')
imgplot = plt.imshow(img)
a.set_title(classes[index])
plt.show()