Assignment: Do Problems 11.3 and 11.4 in the textbook.

Problem 11.3

Setup + Preprocessing

library(tidyverse)
library(neuralnet)
library(NeuralNetTools)
library(caret)
library(forecast)
library(gains)
ToyotaCorolla <- read.csv("/Users/Simbo/Desktop/School/STAT415/DMBA-R-datasets/ToyotaCorolla.csv")

ToyotaCorolla <- ToyotaCorolla %>% 
                    select(Price, Age_08_04, KM, Fuel_Type, HP, Automatic, Doors, 
                           Quarterly_Tax, Mfr_Guarantee, Guarantee_Period, Airco, 
                           Automatic_airco, CD_Player, Powered_Windows, Sport_Model, Tow_Bar)

summary(ToyotaCorolla)
##      Price         Age_08_04           KM          Fuel_Type        
##  Min.   : 4350   Min.   : 1.00   Min.   :     1   Length:1436       
##  1st Qu.: 8450   1st Qu.:44.00   1st Qu.: 43000   Class :character  
##  Median : 9900   Median :61.00   Median : 63390   Mode  :character  
##  Mean   :10731   Mean   :55.95   Mean   : 68533                     
##  3rd Qu.:11950   3rd Qu.:70.00   3rd Qu.: 87021                     
##  Max.   :32500   Max.   :80.00   Max.   :243000                     
##        HP          Automatic           Doors       Quarterly_Tax   
##  Min.   : 69.0   Min.   :0.00000   Min.   :2.000   Min.   : 19.00  
##  1st Qu.: 90.0   1st Qu.:0.00000   1st Qu.:3.000   1st Qu.: 69.00  
##  Median :110.0   Median :0.00000   Median :4.000   Median : 85.00  
##  Mean   :101.5   Mean   :0.05571   Mean   :4.033   Mean   : 87.12  
##  3rd Qu.:110.0   3rd Qu.:0.00000   3rd Qu.:5.000   3rd Qu.: 85.00  
##  Max.   :192.0   Max.   :1.00000   Max.   :5.000   Max.   :283.00  
##  Mfr_Guarantee    Guarantee_Period     Airco        Automatic_airco  
##  Min.   :0.0000   Min.   : 3.000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.: 3.000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.0000   Median : 3.000   Median :1.0000   Median :0.00000  
##  Mean   :0.4095   Mean   : 3.815   Mean   :0.5084   Mean   :0.05641  
##  3rd Qu.:1.0000   3rd Qu.: 3.000   3rd Qu.:1.0000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :36.000   Max.   :1.0000   Max.   :1.00000  
##    CD_Player      Powered_Windows  Sport_Model        Tow_Bar      
##  Min.   :0.0000   Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :1.000   Median :0.0000   Median :0.0000  
##  Mean   :0.2187   Mean   :0.562   Mean   :0.3001   Mean   :0.2779  
##  3rd Qu.:0.0000   3rd Qu.:1.000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.000   Max.   :1.0000   Max.   :1.0000

Convert categorical variables to dummies:

# Fuel_Type variable
ToyotaCorolla$Petrol = 1*(ToyotaCorolla$Fuel_Type == "Petrol")
ToyotaCorolla$Diesel = 1*(ToyotaCorolla$Fuel_Type == "Diesel")
ToyotaCorolla$CNG = 1*(ToyotaCorolla$Fuel_Type == "CNG")

# Doors variable
ToyotaCorolla$Door2 = 1*(ToyotaCorolla$Doors == 2)
ToyotaCorolla$Door3 = 1*(ToyotaCorolla$Doors == 3)
ToyotaCorolla$Door4 = 1*(ToyotaCorolla$Doors == 4)
ToyotaCorolla$Door5 = 1*(ToyotaCorolla$Doors == 5)

Partition data into 60/40 Split:

set.seed(100)
n = nrow(ToyotaCorolla)

train.index = sample(n, n*0.6)
train.df = ToyotaCorolla[train.index, ]   # 861 observations
valid.df = ToyotaCorolla[-train.index, ]  # 575 observations

Preprocess (normalize) quantitative variables to a 0-1 scale:

norm.values = preProcess(train.df, method = "range")

train.norm = predict(norm.values, train.df)
valid.norm = predict(norm.values, valid.df)

summary(train.norm)
##      Price          Age_08_04            KM          Fuel_Type        
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Length:861        
##  1st Qu.:0.1523   1st Qu.:0.5443   1st Qu.:0.1733   Class :character  
##  Median :0.2061   Median :0.7595   Median :0.2611   Mode  :character  
##  Mean   :0.2339   Mean   :0.6948   Mean   :0.2793                     
##  3rd Qu.:0.2748   3rd Qu.:0.8608   3rd Qu.:0.3534                     
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000                     
##        HP           Automatic           Doors        Quarterly_Tax   
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.1707   1st Qu.:0.00000   1st Qu.:0.3333   1st Qu.:0.1894  
##  Median :0.3333   Median :0.00000   Median :0.6667   Median :0.2500  
##  Mean   :0.2663   Mean   :0.04646   Mean   :0.6752   Mean   :0.2529  
##  3rd Qu.:0.3333   3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.2500  
##  Max.   :1.0000   Max.   :1.00000   Max.   :1.0000   Max.   :1.0000  
##  Mfr_Guarantee    Guarantee_Period      Airco        Automatic_airco  
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.00000   Median :1.0000   Median :0.00000  
##  Mean   :0.4065   Mean   :0.02432   Mean   :0.5145   Mean   :0.05343  
##  3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :1.00000   Max.   :1.0000   Max.   :1.00000  
##    CD_Player      Powered_Windows   Sport_Model        Tow_Bar      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :1.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.2149   Mean   :0.5575   Mean   :0.2927   Mean   :0.2846  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##      Petrol           Diesel            CNG               Door2         
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:0.000000   1st Qu.:0.000000  
##  Median :1.0000   Median :0.0000   Median :0.000000   Median :0.000000  
##  Mean   :0.8839   Mean   :0.1069   Mean   :0.009291   Mean   :0.001161  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.000000   3rd Qu.:0.000000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.000000   Max.   :1.000000  
##      Door3           Door4             Door5       
##  Min.   :0.000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.000   Median :0.00000   Median :0.0000  
##  Mean   :0.439   Mean   :0.09292   Mean   :0.4669  
##  3rd Qu.:1.000   3rd Qu.:0.00000   3rd Qu.:1.0000  
##  Max.   :1.000   Max.   :1.00000   Max.   :1.0000

Create Few Different Neural Network Models

Neural Network #1 (1 Hidden Layer, 2 Nodes)

nn1 <- neuralnet(Price ~ 
                  Age_08_04 + KM + HP + Automatic + Quarterly_Tax +
                  Mfr_Guarantee + Guarantee_Period + Airco + Automatic_airco + 
                  CD_Player + Powered_Windows + Sport_Model + Tow_Bar +
                  Petrol + Diesel + Door2 + Door3 + Door4,
                data = train.norm, 
                hidden = 2,
                linear.output = TRUE)

plot(nn1, rep = "best")

Neural Network #2 (1 Hidden Layer, 5 Nodes)

nn2 <- neuralnet(Price ~ 
                  Age_08_04 + KM + HP + Automatic + Quarterly_Tax +
                  Mfr_Guarantee + Guarantee_Period + Airco + Automatic_airco + 
                  CD_Player + Powered_Windows + Sport_Model + Tow_Bar +
                  Petrol + Diesel + Door2 + Door3 + Door4,
                data = train.norm, 
                hidden = 5,
                linear.output = TRUE)

plot(nn2, rep = "best")

Neural Network #3 (2 Hidden Layers, 5 Nodes each)

nn3 <- neuralnet(Price ~ 
                  Age_08_04 + KM + HP + Automatic + Quarterly_Tax +
                  Mfr_Guarantee + Guarantee_Period + Airco + Automatic_airco + 
                  CD_Player + Powered_Windows + Sport_Model + Tow_Bar +
                  Petrol + Diesel + Door2 + Door3 + Door4,
                data = train.norm, 
                hidden = c(5,5),
                linear.output = TRUE)

plot(nn3, rep = "best")

Comparing Neural Network Models

# These will help unscale our predictions later
min = min(train.df$Price)
max = max(train.df$Price)

unscale.price <- function(scaled.price){
  unscaled = scaled.price*(max-min) + min
  return(unscaled)
}

Performance of Neural Network 1 (Single hidden layer, 2 nodes)

Using the first neural network (with 1 hidden layer, 2 nodes):
Let’s compare the predictions of the training data with the actual price.

pred1.train = predict(nn1, newdata = train.norm)

pred1.unscaled = unscale.price(pred1.train)

pred1 <- data.frame(Actual = train.df$Price, 
                    Predicted = pred1.unscaled)
head(pred1)
##      Actual Predicted
## 503    9900 10014.089
## 1382   7750  7617.275
## 985    9450  9656.039
## 1004  10250 10769.756
## 919    8950  9820.961
## 470   11250 10694.251

We can look at our predictions (on the training data) for the first model graphically:

plot(Actual ~ Predicted, data = pred1)
abline(0, 1, col = "red")

The RMSE is the average standard deviation of our predictions from the actual car price. If our RMSE was 0, the points in the graph above would fall exactly on the red line. Our goal is to minimize the distance between the points and this line.

The RMSE for our training data for our first Neural Network Model is:
$961.74

pred1.lm = lm(Predicted ~ Actual, data = pred1)
accuracy(pred1.lm)
##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -2.434109e-14 944.7547 739.3042 -0.5832482 7.252601 0.3010922

As we would expect, the RMSE is bigger when predicting the validation set than the training set:
The RMSE for our validation set for the 1st model is $1098.23:

val1 = predict(nn1, newdata = valid.norm)

val1.unscaled = unscale.price(val1)

val1.pred <- data.frame(Actual = valid.df$Price,
                        Predicted = val1.unscaled)

val1.lm = lm(Predicted ~ Actual, data = val1.pred)

accuracy(val1.lm)
##                        ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 9.977314e-14 1107.249 829.6625 -0.7938356 7.928735 0.3150899

So now let’s compare the first model’s RMSE to the other models containing more nodes and more layers:

Performance of Neural Network 2 (Single hidden layer, 5 nodes)

The training set for our second model results in a RMSE of $858.93, indicating having more nodes trains our model better on the training set.

pred2.train = predict(nn2, newdata = train.norm)

pred2.unscaled = unscale.price(pred2.train)

pred2 <- data.frame(Actual = train.df$Price, 
                    Predicted = pred2.unscaled)

pred2.lm = lm(Predicted ~ Actual, data = pred2)
accuracy(pred2.lm)
##                        ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 5.243821e-15 892.4856 700.2037 -0.5490938 6.979723 0.2866437

Let’s see how our second model (1 hidden layer, 5 nodes) does on unseen data:
Although the RMSE for the training set is less for this model, this model results in a RMSE of $1188.92 on the validation set, which is about $90 more than first model.

val2 = predict(nn2, newdata = valid.norm)

val2.unscaled = unscale.price(val2)

val2.pred <- data.frame(Actual = valid.df$Price,
                        Predicted = val2.unscaled)

val2.lm = lm(Predicted ~ Actual, data = val2.pred)

accuracy(val2.lm)
##                        ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 2.613297e-13 1172.428 853.6804 -0.9622825 8.306225 0.3231541

Performance of Neural Network 3 (2 hidden layers, 5 nodes each)

The training set for our third model results in a RMSE of $871.01, which is lower than our first model and slightly higher than our second model.

pred3.train = predict(nn3, newdata = train.norm)

pred3.unscaled = unscale.price(pred3.train)

pred3 <- data.frame(Actual = train.df$Price, 
                    Predicted = pred3.unscaled)

pred3.lm = lm(Predicted ~ Actual, data = pred3)
accuracy(pred3.lm)
##                       ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 9.46636e-15 885.2456 699.0072 -0.5427908 6.963836 0.2860731

Our third model has a RMSE (on the validation data) of $1145.06, which is higher than the first model, but less than the second model.

val3 = predict(nn3, newdata = valid.norm)

val3.unscaled = unscale.price(val3)

val3.pred <- data.frame(Actual = valid.df$Price,
                        Predicted = val3.unscaled)

val3.lm = lm(Predicted ~ Actual, data = val3.pred)

accuracy(val3.lm)
##                        ME    RMSE      MAE        MPE     MAPE      MASE
## Training set 7.635651e-14 1190.08 878.1159 -0.9896157 8.423273 0.3312438

Summary of Problem 11.3

As we saw, the first neural network (1 hidden layer, 2 nodes) performed best on a validation set, although its RMSE on the training set was the highest (worst) of the 3 models.

accuracy(val1.lm)
##                        ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 9.977314e-14 1107.249 829.6625 -0.7938356 7.928735 0.3150899
accuracy(val2.lm)
##                        ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 2.613297e-13 1172.428 853.6804 -0.9622825 8.306225 0.3231541
accuracy(val3.lm)
##                        ME    RMSE      MAE        MPE     MAPE      MASE
## Training set 7.635651e-14 1190.08 878.1159 -0.9896157 8.423273 0.3312438

This shows that having a model that performs awesome on the training set, might result in overfitting and perform worse on unseen data.
From these models, I would suggest using the first model because it has the lowest RMSE on the validation data.

However, in a real-world setting, I would be very cautious using this model because, on average, the predictions are still over $900 off.
If I was a used-car dealership, this difference might be enough to put us out of business.

Furthermore, there are a few rows in this data set I would want to explore further.
For example, Row 524 is the worst prediction by all 3 models (over $6,000 different from the actual price). Below are graphs of the residuals and numerical values, to indicate that there might be outliers within the dataset that need further investigation. Although Model 1 had the worst RMSE on the training data, it predicted Row 524 the best. Perhaps this row is an outlier, and if removed, could completely change the order of models’ RMSE.

ToyotaCorolla[524, ]
##     Price Age_08_04    KM Fuel_Type  HP Automatic Doors Quarterly_Tax
## 524 18950        49 49568    Petrol 110         0     3            19
##     Mfr_Guarantee Guarantee_Period Airco Automatic_airco CD_Player
## 524             0                3     1               1         0
##     Powered_Windows Sport_Model Tow_Bar Petrol Diesel CNG Door2 Door3 Door4
## 524               0           0       0      1      0   0     0     1     0
##     Door5
## 524     0
val1.res = residuals(val1.lm)
sort(abs(val1.res)) %>% tail()
##      117      558      669     1059      139      524 
## 3251.805 3345.832 3380.911 4243.596 5959.788 6005.280
plot(val1.res ~ val1.pred$Actual, 
     col = ifelse(abs(val1.res) > 6000, "red", "black"),
     pch = ifelse(abs(val1.res) > 6000, 19, 1),
     ylab = "Residuals", xlab = "Car Price",
     main = "Neural Network #1 (1 Hidden Layer, 2 Nodes)",
     sub = "* Row 524 (potential outlier) is colored Red")
abline(0,0, col = "red")

val2.res = residuals(val2.lm)
sort(abs(val2.res)) %>% tail()
##      105      115      117      139     1059      524 
## 3586.150 3591.908 4411.091 4968.184 5740.484 8113.415
plot(val2.res ~ val2.pred$Actual,
     col = ifelse(abs(val2.res) > 8000, "red", "black"),
     pch = ifelse(abs(val2.res) > 8000, 19, 1),
     ylab = "Residuals", xlab = "Car Price",
     main = "Neural Network #2 (1 Hidden Layer, 5 Nodes)",
     sub = "* Row 524 (potential outlier) is colored Red")
abline(0,0, col = "red")

val3.res = residuals(val3.lm)
sort(abs(val3.res)) %>% tail()
##     1110      134      125      115     1059      524 
## 3643.014 3661.807 4089.827 4722.373 5067.182 8252.993
plot(val3.res ~ val3.pred$Actual, 
     col = ifelse(abs(val3.res) > 8000, "red", "black"),
     pch = ifelse(abs(val3.res) > 8000, 19, 1),
     ylab = "Residuals", xlab = "Car Price",
     main = "Neural Network #3 (2 Hidden Layers, 5 Nodes)",
     sub = "* Row 524 (potential outlier) is colored Red")
abline(0,0, col = "red")

Anyways, of the first 3 models, I’d choose the first model (with 1 hidden layer and 2 nodes) because it had the lowest RMSE when predicting unseen data, but there might be concerns with the large RMSE value (>$900) and we might want to explore outliers (Row 524 as an example.)

Problem 11.4

Airlines <- read.csv("/Users/Simbo/Desktop/School/STAT415/DMBA-R-datasets/EastWestAirlinesNN.csv") %>% na.omit()

Partition data:

set.seed(100)
n = nrow(Airlines)

train.index = sample(n, n*0.6)
train.df = Airlines[train.index, ]   # 2991 observations
valid.df = Airlines[-train.index, ]  # 1994 observations

Preprocess (normalize) quantitative variables to a 0-1 scale:

norm.values = preProcess(train.df, method = "range")

train.norm = predict(norm.values, train.df)
valid.norm = predict(norm.values, valid.df)

summary(train.norm)
##       ID.           Topflight         Balance           Qual_miles     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.000000   Min.   :0.00000  
##  1st Qu.:0.2509   1st Qu.:0.0000   1st Qu.:0.008171   1st Qu.:0.00000  
##  Median :0.4987   Median :0.0000   Median :0.020575   Median :0.00000  
##  Mean   :0.5003   Mean   :0.1618   Mean   :0.036826   Mean   :0.01255  
##  3rd Qu.:0.7493   3rd Qu.:0.0000   3rd Qu.:0.045097   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.000000   Max.   :1.00000  
##    cc1_miles.       cc2_miles.        cc3_miles.        Bonus_miles      
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.004157  
##  Median :1.0000   Median :0.00000   Median :0.000000   Median :0.023559  
##  Mean   :0.5169   Mean   :0.04146   Mean   :0.004681   Mean   :0.064052  
##  3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.090832  
##  Max.   :1.0000   Max.   :1.00000   Max.   :1.000000   Max.   :1.000000  
##   Bonus_trans      Flight_miles_12mo Flight_trans_12     Online_12       
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:0.03409   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000000  
##  Median :0.11364   Median :0.00000   Median :0.00000   Median :0.000000  
##  Mean   :0.12049   Mean   :0.01429   Mean   :0.02304   Mean   :0.005167  
##  3rd Qu.:0.18182   3rd Qu.:0.00649   3rd Qu.:0.01887   3rd Qu.:0.000000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :1.000000  
##      Email         Club_member      Any_cc_miles_12mo   Phone_sale    
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.0000    Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000    1st Qu.:0.0000  
##  Median :1.0000   Median :0.00000   Median :1.0000    Median :0.0000  
##  Mean   :0.6536   Mean   :0.08893   Mean   :0.5336    Mean   :0.1264  
##  3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:1.0000    3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.00000   Max.   :1.0000    Max.   :1.0000

Neural Network 1 (Single hidden layer, 5 nodes)

nn1 <- neuralnet(Phone_sale ~ 
                   Topflight + Balance + Qual_miles + `cc1_miles.` + `cc2_miles.` + 
                   `cc3_miles.` + Bonus_miles + Bonus_trans + Flight_miles_12mo + 
                   Flight_trans_12 + Online_12 + Email + Club_member + Any_cc_miles_12mo,
                data = train.norm, 
                hidden = 5,
                linear.output = TRUE)

plot(nn1, rep = "best")

List of all Weights used in 1st Neural Network:

nn1$weights
## [[1]]
## [[1]][[1]]
##               [,1]          [,2]         [,3]         [,4]        [,5]
##  [1,]    44.333702   -98.2273975   143.695287  -1.77937913   1.3231829
##  [2,]   310.626866   -92.6761717  -725.470587  -0.48419314  -1.5959937
##  [3,]  -438.135543   316.6368512  -298.189625 -13.12537999   9.2574629
##  [4,]   377.793840   442.8833599 -1393.816552  -5.41801909   3.8721191
##  [5,] -2064.513556    -0.3026662  -208.154797  -1.44868278   0.4991160
##  [6,]   183.849663   -66.4872998    76.609445  -0.06725919   0.6176397
##  [7,]   -18.048133 -1351.2228697   502.676564   1.25917754  -3.9104904
##  [8,]  1377.875207   480.9152060    94.151923   1.92582234   5.2519945
##  [9,]  -500.636998  -137.9055566   583.512026   3.69786029  -1.7015612
## [10,] -6722.580401 -3086.7223723  -148.092986   1.60608125 -10.4163521
## [11,]  -244.838603   899.8878808   905.455321   6.33153859  -9.9697235
## [12,]   185.219565  6704.9377262 -1941.283421   0.81675157  11.5963245
## [13,]  -118.387144   185.4003218   -17.863963  -0.20799279   2.2444867
## [14,]    73.881981    69.5605094   -55.389523  -0.20479470   0.2377842
## [15,]    -7.261135   -12.2401752    -6.008596   1.96606750  -2.3229538
## 
## [[1]][[2]]
##            [,1]
## [1,] -0.2435554
## [2,] -0.1797808
## [3,] -0.2607485
## [4,] -0.1830889
## [5,]  0.7014701
## [6,]  0.6794478

Least to Most important Variables in N.N. 1 (based on weights):
Flight miles is the most important variable, Online_12 is the least important in this model.

olden(nn1) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Decile-wise Lift Charts for Neural Network 1

Training:

nn1.train = predict(nn1, newdata = train.norm)

gain1.train = gains(train.df$Phone_sale, nn1.train)
heights = gain1.train$mean.resp/mean(train.df$Phone_sale)

b = barplot(heights, names.arg = gain1.train$depth, 
            xlab = "Percentile", ylab = "Response Ratio", 
            main = "Performance of Training Set with N.N. 1",
            ylim = c(0,3.25))

text(b, heights, labels = round(heights, 2), pos=3)

Validation:

nn1.val = predict(nn1, newdata = valid.norm)

gain1.val = gains(valid.df$Phone_sale, nn1.val)
heights = gain1.val$mean.resp/mean(valid.df$Phone_sale)

b = barplot(heights, names.arg = gain1.val$depth, 
            xlab = "Percentile", ylab = "Response Ratio",
            main = "Performance of Validation Set with N.N. 1", 
            ylim = c(0, 2))

text(b, heights, labels = round(heights, 2), pos=3) 


Neural Network 2 (Single hidden layer, 1 node)

nn2 <- neuralnet(Phone_sale ~ 
                   Topflight + Balance + Qual_miles + `cc1_miles.` + `cc2_miles.` + 
                   `cc3_miles.` + Bonus_miles + Bonus_trans + Flight_miles_12mo + 
                   Flight_trans_12 + Online_12 + Email + Club_member + Any_cc_miles_12mo,    
                data = train.norm, 
                hidden = 1,
                linear.output = TRUE)

plot(nn2, rep = "best")

List of all Weights used in 2nd Neural Network:

nn2$weights
## [[1]]
## [[1]][[1]]
##              [,1]
##  [1,] -4.95924001
##  [2,] -0.12037224
##  [3,] -4.09455859
##  [4,]  0.06270885
##  [5,] -0.19549678
##  [6,]  0.05340138
##  [7,] -0.32492839
##  [8,]  1.90874940
##  [9,]  2.22582985
## [10,]  0.23434217
## [11,] -1.36722120
## [12,]  1.57296800
## [13,]  0.35044781
## [14,]  0.09258580
## [15,]  0.84654234
## 
## [[1]][[2]]
##            [,1]
## [1,] 0.03864154
## [2,] 4.77404435

Least to Most important Variables in N.N. 2 (based on weights):
Balance is the least important variable. Bonus miles and Bonus trans are the most important.
Online_12 was the least important in the previous model, but is the third most important using this model.

olden(nn2) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Decile-wise Lift Charts for Neural Network 2

Training:

nn2.train = predict(nn2, newdata = train.norm)

gain2.train = gains(train.df$Phone_sale, nn2.train)
heights = gain2.train$mean.resp/mean(train.df$Phone_sale)

b = barplot(heights, names.arg = gain2.train$depth, 
            xlab = "Percentile", ylab = "Response Ratio", 
            main = "Performance of Training Set with N.N. 2",
            ylim = c(0, 2))

text(b, heights, labels = round(heights, 2), pos=3)

Validation:

nn2.val = predict(nn2, newdata = valid.norm)

gain2.val = gains(valid.df$Phone_sale, nn2.val)
heights = gain2.val$mean.resp/mean(valid.df$Phone_sale)

b = barplot(heights, names.arg = gain2.val$depth, 
            xlab = "Percentile", ylab = "Response Ratio",
            main = "Performance of Validation Set with N.N. 2", 
            ylim = c(0, 2))

text(b, heights, labels = round(heights, 2), pos=3) 

Summary of Decile-Wise Lift Charts

From the above decile-wise lift charts, we can see the 1st model (with 5 nodes) does a really good job predicting the first 30% in its training set. However, on the validation set, it doesn’t predict the Phone Sale very well. Targeting the first 20% could increase the percent of sales, but from there the model struggles to accurately predict Phone Sales.
In contrast, the 2nd model (with just one node) doesn’t predict its training set as well, however the response ratio for its validation set predictions is greater than 1 for the first 60%. In the first model, this ratio drops below 1 in the third decile, indicating issues with overfitting.
If this model was real-world applicaiton, I’d want to look at the correlations between the predictors and phone sales, then maybe reduce the number of predictors. Otherwise, maybe fitting a neural network with 3 nodes or a couple of hidden layers may improve our model’s performance on unseen data.