Question 11.3 a

Fit a neural network model to the data. Use a single hidden layer with 2 nodes. • Use predictors Age_08_04, KM, Fuel_Type, HP, Automatic, Doors, Quarterly_Tax, Mfr_Guarantee, Guarantee_Period, Airco, Automatic_airco, CD_Player, Powered_Windows, Sport_Model, and Tow_Bar. • Remember to first scale the numerical predictor and outcome variables to a 0–1 scale (use function preprocess() with method = “range”—see Chapter 7) and convert categorical predictors to dummies. Record the RMS error for the training data and the validation data. Repeat the process, changing the number of hidden layers and nodes to {single layer with 5 nodes}, {two layers, 5 nodes in each layer}.

# Loading data
toyota.df <- read.csv("~/ToyotaCorolla.csv")

# Dumifying categorical variables

# Fuel_type Variables
toyota.df$Petrol = (toyota.df$Fuel_Type == "Petrol")
toyota.df$Diesel = (toyota.df$Fuel_Type == "Diesel")
toyota.df$CNG = (toyota.df$Fuel_Type == "CNG")

# Doors variable
toyota.df$Door2 = (toyota.df$Doors == 2)
toyota.df$Door3 = (toyota.df$Doors == 3)
toyota.df$Door4 = (toyota.df$Doors == 4)
toyota.df$Door5 = (toyota.df$Doors == 5)

# Partition of the data into training(60%) and validaton(40%)
set.seed(3.14)
partitionData <- function(df, prop = c(0.60, 0.20, 0.20)){
  n = nrow(df)
  idx = sample(1:n, n)
  n1 = round(n*prop[1])
  n2 = round(n*prop[2])
  
  train.idx = idx[1:n1]
  valid.idx = idx[(n1+1):(n1+n2)]
  test.idx = idx[-c(1:(n1+n2))]
  
  train = df[train.idx, ]
  valid = df[valid.idx, ]
  test = df[test.idx, ]
  
  list(train = train, valid = valid, test = test)
}
partition <- partitionData(toyota.df, c(0.60, 0.40))

train.toyota <- partition$train
valid.toyota <- partition$valid

# Normalization of the data
norm.values = preProcess(train.toyota, method = "range")
## Warning in preProcess.default(train.toyota, method = "range"): No variation for
## for: Cylinders
# Predicting training and validation data
train.norm.df <- predict(norm.values, train.toyota)
valid.norm.df <- predict(norm.values, valid.toyota)

# Fitting Neural model
nn <- neuralnet(formula = Price ~Age_08_04 + KM + HP + Automatic + Quarterly_Tax 
                + Powered_Windows + Sport_Model + Tow_Bar + Petrol + Diesel + 
                  Door2+ Door3+ Door4, data = train.norm.df, linear.output = T,
                hidden = 2)
plot(nn, rep = "best")

Question 11.3ai

Record the RMS error for the training data and the validation data. Repeat the process,changing the number of hidden layers and nodes to {single layer with 5 nodes}, {two layers, 5 nodes in each layer}.

# Neural Network one hidden layer with 5 nodes
nn1 <-neuralnet(formula = Price ~Age_08_04 + KM + HP + Automatic + Quarterly_Tax 
                + Powered_Windows + Sport_Model + Tow_Bar + Petrol + Diesel + 
                  Door2+ Door3+ Door4, data = train.norm.df, linear.output = T,
                hidden = 5)

plot(nn1, rep = "best")

# Neural Network one hidden layer with 5 nodes
nn2 <-neuralnet(formula = Price ~Age_08_04 + KM + HP + Automatic + Quarterly_Tax 
                + Powered_Windows + Sport_Model + Tow_Bar + Petrol + Diesel + 
                  Door2+ Door3+ Door4, data = train.norm.df, linear.output = T,
                hidden = c(5,5))

plot(nn2, rep = "best")

Question 11.3aii

What happens to the RMS error for the training data as the number of layers and nodes increases?

min = min(train.toyota$Price)
max = max(train.toyota$Price)

unscale.price <- function(scaled.price){
  unscaled = scaled.price*(max-min) + min
  
  return(unscaled)
}
# Performance of single hidden layer with 2 nodes
pred.train1 <- predict(nn, newdata = train.norm.df)
pred.unscaled = unscale.price(pred.train1)
pred1 <- data.frame(actual = train.toyota$Price, predicted = pred.unscaled)
pred1.lm = lm(predicted~actual, data = pred1)

pred.valid1 <- predict(nn, newdata = valid.norm.df)
pred1.unscaled <- unscale.price(pred.valid1)
pred2 <- data.frame(actual = valid.toyota$Price, predicted = pred1.unscaled)
pred2.lm <- lm(predicted~actual, data = pred2)

# Performance of single hidden layer with 5 nodes
pred.train2 <- predict(nn1, newdata = train.norm.df)
pred2.unscaled = unscale.price(pred.train2)
pred3 <- data.frame(actual = train.toyota$Price, predicted = pred2.unscaled)
pred3.lm <- lm(predicted~actual, data = pred3)

pred.valid2 <- predict(nn1, newdata = valid.norm.df)
pred3.unscaled = unscale.price(pred.valid2)
pred4 <- data.frame(actual = valid.toyota$Price, predicted = pred3.unscaled)
pred4.lm <- lm(predicted~actual, data = pred4)

# Performance of two hidden layers with 5 nodes
pred.train3 <- predict(nn2, newdata = train.norm.df)
pred4.unscaled = unscale.price(pred.train3)
pred5 <- data.frame(actual=train.toyota$Price, predicted = pred4.unscaled)
pred5.lm <- lm(predicted~actual, data =pred5)

pred.valid3 <- predict(nn2, newdata = valid.norm.df)
pred6.unscaled = unscale.price(pred.valid3)
pred6 <- data.frame(actual = valid.toyota$Price, predicted = pred6.unscaled)
pred6.lm <- lm(predicted~actual, data = pred6)

# Reporting the RMSE of single hidden layer and two hidden layer for training
forecast::accuracy(pred1.lm)
##                        ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 3.140502e-14 1086.157 834.5514 -0.7559274 7.903761 0.3106933
forecast::accuracy(pred5.lm)
##                         ME     RMSE     MAE        MPE     MAPE      MASE
## Training set -9.018205e-14 1031.012 788.634 -0.6884923 7.435634 0.2943318
# Comments
" We can see that the RMSE for training data on single hidden layer is higher 
than the RMSE for two hidden layer therefore the increase hidden layer perform 
better on trainig data than one single layer"
## [1] " We can see that the RMSE for training data on single hidden layer is higher \nthan the RMSE for two hidden layer therefore the increase hidden layer perform \nbetter on trainig data than one single layer"

Question 11.3aii

What happens to the RMS error for the validation data?

# Reporting the RMSE of single hidden layer and two hidden layer for validation
forecast::accuracy(pred2.lm)
##                       ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 1.43037e-14 1060.536 798.8958 -0.7282198 7.792082 0.3469178
forecast::accuracy(pred6.lm)
##                        ME     RMSE      MAE        MPE    MAPE      MASE
## Training set 1.014806e-13 1046.533 785.5127 -0.7707524 7.75696 0.3382167
# Comments
"We can see that the RMSE for validation data on single hidden layer is higher
 than the two hidden layer with increase nodes therefore the two hidden layer
perform better than the single layer."
## [1] "We can see that the RMSE for validation data on single hidden layer is higher\n than the two hidden layer with increase nodes therefore the two hidden layer\nperform better than the single layer."

Question 11.3aiii

Comment on the appropriate number of layers and nodes for this application.

"From these models, I would suggest using the twohidden model because it has the 
lowest RMSE on the validation data"
## [1] "From these models, I would suggest using the twohidden model because it has the \nlowest RMSE on the validation data"

Question 11.4a

Run a neural net model on these data, using a single hidden layer with 5 nodes. Remember to first convert categorical variables into dummies and scale numerical predictor variables to a 0–1 (use function preprocess() with method = “range”—see Chapter 7). Generate a decile-wise lift chart for the training and validation sets. Interpret the meaning (in business terms) of the leftmost bar of the validation decilewise lift chart.

# Loading of the data  EastWestAirlinesNN.CSV
airlines <- read.csv("EastWestAirlinesNN.csv") %>% na.omit()
airlines <-  data.frame(lapply(airlines, as.numeric))

# Partition of the Data into Training(60%) and Validation(60%)
partition1 <- partitionData(airlines, c(0.60, 0.40))
train.airlines <- partition1$train
valid.airlines <- partition1$valid


# Normalization of the data
norm.value = preProcess(train.airlines, method = "range")
train.norm.airlines <- predict(norm.value, train.airlines)
valid.norm.airlines <- predict(norm.value, valid.airlines)

# Fitting the Neuron model for single hidden layer with 5 nodes
nn3 <- neuralnet(Phone_sale~ 
                  Topflight+Balance+Qual_miles+cc1_miles.+cc2_miles.+
                  cc3_miles.+Bonus_miles+ Bonus_trans +Flight_miles_12mo +
                  Flight_trans_12+Online_12+Email+Club_member+Any_cc_miles_12mo,
                  data = train.norm.airlines, 
                  hidden = 5,
                 threshold = 0.01,
                 stepmax = 1e+05, 
                 linear.output = T)

plot(nn3, rep = "best")

# Decile-wise life chart for training
nn3.train <- predict(nn3, newdata = train.norm.airlines)
gain1.train = gains(train.airlines$Phone_sale, nn3.train)
heights = gain1.train$mean.resp/mean(train.airlines$Phone_sale)
b1 = barplot(heights, names.arg=gain1.train$depth, xlab="Percentile",
             ylab="Response Ratio", main="Performance of Training with N.N.3",
             ylim = c(0,4))
text(b1, heights, labels=round(heights,2), pos=3)

# Decile-wise life chart for validation
nn4.valid <- predict(nn3, newdata = valid.norm.airlines)
gain1.valid = gains(valid.airlines$Phone_sale, nn4.valid)
heights1 = gain1.valid$mean.resp/mean(valid.airlines$Phone_sale)
b2 = barplot(heights1, names.arg=gain1.valid$depth, xlab="Percentile",
             ylab="Response Ratio", main="Performance of Validation with N.N.3",
             ylim = c(0,3.25))
text(b1, heights1, labels=round(heights1,2), pos=3)

Question 11.4b

"We can see from the decile-wise lift charts above that the first model 
(with 5 nodes) does a great job predicting the first 30% of the training set. 
On the validation set, however, it does not accurately predict the Phone Sale. 
Targeting the first 20% of customers may enhance sales, but the program struggles 
to reliably anticipate phone sales after that."
## [1] "We can see from the decile-wise lift charts above that the first model \n(with 5 nodes) does a great job predicting the first 30% of the training set. \nOn the validation set, however, it does not accurately predict the Phone Sale. \nTargeting the first 20% of customers may enhance sales, but the program struggles \nto reliably anticipate phone sales after that."

Question 11.4c

Run a second neural net model on the data, this time setting the number of hidden nodes to 1. Comment now on the difference between this model and the model you ran earlier, and how overfitting might have affected results.

# Fitting the Neuron model for single hidden layer with 1 nodes
nn7 <- neuralnet(Phone_sale~ 
                   Topflight+Balance+Qual_miles+cc1_miles.+cc2_miles.+
                   cc3_miles.+Bonus_miles+ Bonus_trans +Flight_miles_12mo +
                   Flight_trans_12+Online_12+Email+Club_member+Any_cc_miles_12mo,
                 data = train.norm.airlines, 
                 hidden = 1,
                 threshold = 0.01,
                 stepmax = 1e+05, 
                 linear.output = T)

plot(nn7, rep = "best")

# Decile-wise life chart for training
nn7.train <- predict(nn7, newdata = train.norm.airlines)
gain2.train = gains(train.airlines$Phone_sale, nn7.train)
heights2 = gain2.train$mean.resp/mean(train.airlines$Phone_sale)
b3 = barplot(heights2, names.arg=gain2.train$depth, xlab="Percentile",
             ylab="Response Ratio", main="Performance of Training with N.N.7",
             ylim = c(0,3.25))
text(b1, heights2, labels=round(heights2,2), pos=3)

# Decile-wise life chart for validation
nn7.valid <- predict(nn7, newdata = valid.norm.airlines)
gain7.valid = gains(valid.airlines$Phone_sale, nn7.valid)
heights3 = gain7.valid$mean.resp/mean(valid.airlines$Phone_sale)
b4 = barplot(heights3, names.arg=gain1.valid$depth, xlab="Percentile",
             ylab="Response Ratio", main="Performance of Validation with N.N.7",
             ylim = c(0,3.25))
text(b4, heights3, labels=round(heights3,2), pos=3)

Question 11.4d

What sort of information, if any, is provided about the effects of the various variables?

"If I were to utilize this model in the real world, I would look at the 
correlations between the predictors and phone sales, and then try to reduce 
the number of predictors. Connecting a three-node neural network or a couple 
of hidden layers to our model, on the other hand, may improve its performance 
on unknown input."
## [1] "If I were to utilize this model in the real world, I would look at the \ncorrelations between the predictors and phone sales, and then try to reduce \nthe number of predictors. Connecting a three-node neural network or a couple \nof hidden layers to our model, on the other hand, may improve its performance \non unknown input."