Setup

library(mlbench)
library(caret)

## Loading required package: ggplot2

## Loading required package: lattice

library(AppliedPredictiveModeling)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(tidyr)
library(plotly)

## Warning: package 'plotly' was built under R version 4.5.3

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

library(nnet)
set.seed(200)

Exercise 7.2

Friedman (1991) introduced several benchmark data sets create by simulation. One of these simulations used the following nonlinear equation to create data:

\(y =10sin(πx_1x_2) +20(x_3 −0.5)^2 +10x_4 +5x_5 +N(0,σ^2)\)

where the x values are random variables uniformly distributed between [0, 1] (there are also 5 other non-informative variables also created in the simulation). The package mlbench contains a function called mlbench.friedman1 that simulates these data:

training_data <- mlbench.friedman1(200, sd = 1)

training_data$x <- data.frame(training_data$x)
featurePlot(training_data$x, training_data$y)

## or other methods.

## This creates a list with a vector 'y' and a matrix
## of predictors 'x'. Also simulate a large test set to
## estimate the true error rate with good precision:
test_data <- mlbench.friedman1(5000, sd = 1)
test_data$x <- data.frame(test_data$x)

ctrl <- trainControl(method = "cv", number = 10)

Tune several models on these data.

KVM

knn_model <- train(x = training_data$x, 
                  y = training_data$y, 
                  method = 'knn', 
                  preProc = c("center", "scale"), 
                  tuneLength = 10,
                  trControl = ctrl)

knn_model

## k-Nearest Neighbors 
## 
## 200 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 180, 180, 180, 180, 180, 180, ... 
## Resampling results across tuning parameters:
## 
##   k   RMSE      Rsquared   MAE     
##    5  3.305255  0.5721163  2.754163
##    7  3.149006  0.6185350  2.606181
##    9  3.088199  0.6674249  2.510615
##   11  3.071514  0.6867903  2.506869
##   13  3.067138  0.6963304  2.491396
##   15  3.120643  0.6955263  2.523599
##   17  3.107764  0.7108114  2.526434
##   19  3.124437  0.7189758  2.539485
##   21  3.156001  0.7136684  2.575904
##   23  3.179459  0.7172603  2.601060
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 13.

knn_pred <- predict(knn_model, newdata = test_data$x)
pr_knn <- postResample(pred = knn_pred, obs = test_data$y)

compare <- data.frame(model = "knn",
                      r_squared = pr_knn[["Rsquared"]],
                      rmse = pr_knn[["RMSE"]],
                      mse = pr_knn[["RMSE"]]^2)

SVM

svm_model <- train(x = training_data$x, 
                  y = training_data$y, 
                  method = 'svmRadial', 
                  preProc = c("center", "scale"), 
                  tuneLength = 14,
                  trControl = ctrl)

svm_model

## Support Vector Machines with Radial Basis Function Kernel 
## 
## 200 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 180, 180, 180, 180, 180, 180, ... 
## Resampling results across tuning parameters:
## 
##   C        RMSE      Rsquared   MAE     
##      0.25  2.503872  0.7977922  1.994784
##      0.50  2.263709  0.8131959  1.792161
##      1.00  2.089745  0.8350487  1.640996
##      2.00  1.984750  0.8499042  1.540859
##      4.00  1.920550  0.8599271  1.475066
##      8.00  1.891624  0.8643280  1.455714
##     16.00  1.880946  0.8665123  1.463006
##     32.00  1.880885  0.8665323  1.463142
##     64.00  1.880885  0.8665323  1.463142
##    128.00  1.880885  0.8665323  1.463142
##    256.00  1.880885  0.8665323  1.463142
##    512.00  1.880885  0.8665323  1.463142
##   1024.00  1.880885  0.8665323  1.463142
##   2048.00  1.880885  0.8665323  1.463142
## 
## Tuning parameter 'sigma' was held constant at a value of 0.06273868
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.06273868 and C = 32.

svm_pred <- predict(svm_model, newdata = test_data$x)
pr_svm <- postResample(pred = svm_pred, obs = test_data$y)

svm_row <- data.frame(model = "svm",
                      r_squared = pr_svm[["Rsquared"]],
                      rmse = pr_svm[["RMSE"]],
                      mse = pr_svm[["RMSE"]]^2)
compare <- rbind(compare, svm_row)

MARS

mars_grid <- expand.grid(.degree = 1:2, .nprune = 2:38)
mars_model <- train(x = training_data$x, 
                  y = training_data$y, 
                  method = 'earth', 
                  tuneGrid = mars_grid,
                  trControl = ctrl)

## Loading required package: earth

## Loading required package: Formula

## Loading required package: plotmo

## Loading required package: plotrix

mars_model

## Multivariate Adaptive Regression Spline 
## 
## 200 samples
##  10 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 180, 180, 180, 180, 180, 180, ... 
## Resampling results across tuning parameters:
## 
##   degree  nprune  RMSE      Rsquared   MAE     
##   1        2      4.336509  0.2780552  3.644285
##   1        3      3.473744  0.5268355  2.810284
##   1        4      2.585918  0.7307972  2.097935
##   1        5      2.281714  0.7932946  1.850226
##   1        6      2.220784  0.8080737  1.752481
##   1        7      1.778204  0.8755039  1.404480
##   1        8      1.694713  0.8831280  1.314474
##   1        9      1.677352  0.8870617  1.299383
##   1       10      1.637649  0.8939230  1.261202
##   1       11      1.585028  0.9016419  1.254184
##   1       12      1.589668  0.9023533  1.243048
##   1       13      1.636098  0.8963126  1.274448
##   1       14      1.667410  0.8928254  1.288750
##   1       15      1.665869  0.8929734  1.289378
##   1       16      1.665869  0.8929734  1.289378
##   1       17      1.665869  0.8929734  1.289378
##   1       18      1.665869  0.8929734  1.289378
##   1       19      1.665869  0.8929734  1.289378
##   1       20      1.665869  0.8929734  1.289378
##   1       21      1.665869  0.8929734  1.289378
##   1       22      1.665869  0.8929734  1.289378
##   1       23      1.665869  0.8929734  1.289378
##   1       24      1.665869  0.8929734  1.289378
##   1       25      1.665869  0.8929734  1.289378
##   1       26      1.665869  0.8929734  1.289378
##   1       27      1.665869  0.8929734  1.289378
##   1       28      1.665869  0.8929734  1.289378
##   1       29      1.665869  0.8929734  1.289378
##   1       30      1.665869  0.8929734  1.289378
##   1       31      1.665869  0.8929734  1.289378
##   1       32      1.665869  0.8929734  1.289378
##   1       33      1.665869  0.8929734  1.289378
##   1       34      1.665869  0.8929734  1.289378
##   1       35      1.665869  0.8929734  1.289378
##   1       36      1.665869  0.8929734  1.289378
##   1       37      1.665869  0.8929734  1.289378
##   1       38      1.665869  0.8929734  1.289378
##   2        2      4.336509  0.2780552  3.644285
##   2        3      3.473744  0.5268355  2.810284
##   2        4      2.585918  0.7307972  2.097935
##   2        5      2.263535  0.7970551  1.850627
##   2        6      2.269918  0.7979362  1.760438
##   2        7      1.769627  0.8768774  1.395560
##   2        8      1.642242  0.8946240  1.292210
##   2        9      1.423601  0.9191185  1.133361
##   2       10      1.398904  0.9219194  1.095466
##   2       11      1.365153  0.9265078  1.056961
##   2       12      1.334194  0.9309022  1.054664
##   2       13      1.340735  0.9311837  1.050705
##   2       14      1.302618  0.9355875  1.025773
##   2       15      1.313371  0.9349276  1.029788
##   2       16      1.287783  0.9379375  1.014472
##   2       17      1.287783  0.9379375  1.014472
##   2       18      1.287783  0.9379375  1.014472
##   2       19      1.287783  0.9379375  1.014472
##   2       20      1.287783  0.9379375  1.014472
##   2       21      1.287783  0.9379375  1.014472
##   2       22      1.287783  0.9379375  1.014472
##   2       23      1.287783  0.9379375  1.014472
##   2       24      1.287783  0.9379375  1.014472
##   2       25      1.287783  0.9379375  1.014472
##   2       26      1.287783  0.9379375  1.014472
##   2       27      1.287783  0.9379375  1.014472
##   2       28      1.287783  0.9379375  1.014472
##   2       29      1.287783  0.9379375  1.014472
##   2       30      1.287783  0.9379375  1.014472
##   2       31      1.287783  0.9379375  1.014472
##   2       32      1.287783  0.9379375  1.014472
##   2       33      1.287783  0.9379375  1.014472
##   2       34      1.287783  0.9379375  1.014472
##   2       35      1.287783  0.9379375  1.014472
##   2       36      1.287783  0.9379375  1.014472
##   2       37      1.287783  0.9379375  1.014472
##   2       38      1.287783  0.9379375  1.014472
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nprune = 16 and degree = 2.

mars_pred <- predict(mars_model, newdata = test_data$x)
pr_mars <- postResample(pred = mars_pred, obs = test_data$y)

mars_row <- data.frame(model = "mars",
                      r_squared = pr_mars[["Rsquared"]],
                      rmse = pr_mars[["RMSE"]],
                      mse = pr_mars[["RMSE"]]^2)
compare <- rbind(compare, mars_row)

NNET

nnet_grid <- expand.grid(.decay = c(0, 0.01, .1),
                        .size = c(1:10),
                        .bag = FALSE)

nnet_model <- train(x = training_data$x, 
                  y = training_data$y, 
                  method = 'avNNet', 
                  tuneGrid = nnet_grid,
                  trControl = ctrl,
                  preProc = c("center", "scale"),
                  linout = TRUE,
                  trace = FALSE,
                  MaxNWts = 10 * (ncol(training_data$x) + 1) + 10 + 1,
                  maxit = 500)

## Warning: executing %dopar% sequentially: no parallel backend registered

nnet_model

## Model Averaged Neural Network 
## 
## 200 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 180, 180, 180, 180, 180, 180, ... 
## Resampling results across tuning parameters:
## 
##   decay  size  RMSE      Rsquared   MAE     
##   0.00    1    2.425781  0.7772900  1.912506
##   0.00    2    2.445790  0.7763799  1.918591
##   0.00    3    2.050823  0.8347940  1.595904
##   0.00    4    2.027251  0.8375237  1.602809
##   0.00    5    2.529135  0.7697625  1.860244
##   0.00    6    2.738552  0.7316011  2.092375
##   0.00    7    4.489358  0.5593220  2.651495
##   0.00    8    5.676330  0.4963704  3.272238
##   0.00    9    5.839399  0.4988938  3.508280
##   0.00   10    3.259136  0.6455203  2.209138
##   0.01    1    2.407830  0.7808729  1.912057
##   0.01    2    2.414800  0.7768646  1.905023
##   0.01    3    2.079379  0.8283189  1.630607
##   0.01    4    2.046872  0.8352091  1.627392
##   0.01    5    2.066268  0.8371940  1.625184
##   0.01    6    2.140136  0.8186529  1.748623
##   0.01    7    2.191099  0.8195851  1.678329
##   0.01    8    2.307624  0.7944878  1.813945
##   0.01    9    2.569384  0.7484895  2.043404
##   0.01   10    2.737772  0.7233009  2.203260
##   0.10    1    2.418022  0.7762852  1.897950
##   0.10    2    2.452679  0.7719314  1.914702
##   0.10    3    2.106233  0.8278623  1.630821
##   0.10    4    2.041037  0.8348755  1.608782
##   0.10    5    2.077090  0.8347308  1.664639
##   0.10    6    2.251557  0.8020515  1.793479
##   0.10    7    2.127261  0.8211299  1.651059
##   0.10    8    2.195091  0.8096029  1.735512
##   0.10    9    2.200882  0.8179188  1.756793
##   0.10   10    2.332269  0.7906976  1.813211
## 
## Tuning parameter 'bag' was held constant at a value of FALSE
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were size = 4, decay = 0 and bag = FALSE.

nnet_pred <- predict(nnet_model, newdata = test_data$x)
pr_nnet <- postResample(pred = nnet_pred, obs = test_data$y)

nnet_row <- data.frame(model = "nnet",
                      r_squared = pr_nnet[["Rsquared"]],
                      rmse = pr_nnet[["RMSE"]],
                      mse = pr_nnet[["RMSE"]]^2)
compare <- rbind(compare, nnet_row)

Comparison

compare

##   model r_squared     rmse      mse
## 1   knn 0.6747755 3.148156 9.910885
## 2   svm 0.8257365 2.073190 4.298119
## 3  mars 0.9471145 1.149250 1.320777
## 4  nnet 0.7692074 2.594235 6.730055

models <- list(SVM = svm_model, NNET = nnet_model,
               KNN = knn_model, MARS = mars_model)

results <- sapply(models, function(m) {
  preds <- predict(m, newdata = test_data$x)
  RMSE(preds, test_data$y)
})
print(sort(results))

##     MARS      SVM     NNET      KNN 
## 1.149250 2.073190 2.594235 3.148156

resamps <- resamples(models)
summary(resamps)

## 
## Call:
## summary.resamples(object = resamps)
## 
## Models: SVM, NNET, KNN, MARS 
## Number of resamples: 10 
## 
## MAE 
##          Min.   1st Qu.   Median     Mean  3rd Qu.     Max. NA's
## SVM  1.156875 1.3117114 1.399348 1.463142 1.606072 1.865418    0
## NNET 1.268962 1.4215472 1.592861 1.602809 1.659504 2.229668    0
## KNN  1.834833 2.1471846 2.492130 2.491396 2.893954 3.005473    0
## MARS 0.860703 0.8903864 0.994729 1.014472 1.121062 1.236741    0
## 
## RMSE 
##          Min.  1st Qu.   Median     Mean  3rd Qu.     Max. NA's
## SVM  1.658470 1.719959 1.789232 1.880885 1.912734 2.385774    0
## NNET 1.501438 1.809560 1.969752 2.027251 2.028334 2.946942    0
## KNN  2.387649 2.844232 2.976898 3.067138 3.411783 3.646876    0
## MARS 1.059342 1.097373 1.260603 1.287783 1.347103 1.698515    0
## 
## Rsquared 
##           Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## SVM  0.7798647 0.8423752 0.8731821 0.8665323 0.8997912 0.9295706    0
## NNET 0.7341464 0.8326077 0.8469449 0.8375237 0.8568889 0.9143177    0
## KNN  0.4963614 0.6105380 0.7121444 0.6963304 0.7896009 0.8407250    0
## MARS 0.8924392 0.9358747 0.9450349 0.9379375 0.9480275 0.9584190    0

dotplot(resamps)

Evaluation Analysis: In regards to the evaluation metrics shown above, the MARS model achieves the best overall performance, recording the lowest RMSE at 1.149250, the lowest MSE 1.320777 and the highest \(R^2\) value at 0.9471145. SVM performs marginally well with an \(R^2\) of 0.8257365 and an RMSE of 2.073190. NNET records a \(R2\) of 0.7692074 and RMSE of 2.594235 while KNN recorded the worst performance with an \(R^2\) of 0.6747755 and and RMSE of 3.148156.

Predicted vs Actual

par(mfrow = c(2,2), mar = c(3,3,2,1))

plot(test_data$y, knn_pred, xlab = "Actual", ylab = "Predicted", main = "KNN", col = "seagreen");abline(0,1, col = "red")
plot(test_data$y, svm_pred, xlab = "Actual", ylab = "Predicted", main = "SVM", col = "lightblue");abline(0,1, col = "red")
plot(test_data$y, mars_pred, xlab = "Actual", ylab = "Predicted", main = "MARS", col = "mediumpurple");abline(0,1, col = "red")
plot(test_data$y, nnet_pred, xlab = "Actual", ylab = "Predicted", main = "NNET", col = "wheat");abline(0,1, col = "red")

par(mfrow = c(1,1))

Predicted vs Actual Analysis: The red diagonal lines plotted above represent perfect prediction where \(prediction = actual\). MARS follows the diagonal most closely with minimal scatter, consistent with the evaluation metrics discussed previously. SVM and NNET show reasonable alignment along the diagonal but with lightly more spread while KNN shows the most deviation,with noticeable scatter suggesting that the model struggles to capture underlying relationship as well as the other 3 models, this is verified through the evaluation metrics above.

Residuals

par(mfrow = c(2,2), mar = c(3,3,2,1))

plot(knn_pred, knn_pred - test_data$y, xlab = "Actual", ylab = "Predicted", main = "KNN", col = "seagreen");abline(h = 0, col = "red")
plot(svm_pred, svm_pred - test_data$y, xlab = "Actual", ylab = "Predicted", main = "SVM", col = "lightblue");abline(h = 0, col = "red")
plot(mars_pred, mars_pred - test_data$y, xlab = "Actual", ylab = "Predicted", main = "MARS", col = "mediumpurple");abline(h = 0, col = "red")
plot(nnet_pred, nnet_pred - test_data$y, xlab = "Actual", ylab = "Predicted", main = "NNET", col = "wheat");abline(h = 0, col = "red")

par(mfrow = c(1,1))

Residuals: The red horizontal line at zero represents no error. Mars residuals are captured in a narrow range, randomly scattered around zero, suggesting that it is a well fitted model. NNET and SVM closely follow with the same scattering while KNN displays the widest residual spread, showing some structure unlike the previous three models.

Mars Variable Importance

mars_imp <- varImp(mars_model)
mars_imp

## earth variable importance
## 
##    Overall
## X1  100.00
## X4   75.23
## X2   48.73
## X5   15.52
## X3    0.00

plot(mars_imp, top = 20, main = "MARS Variable Importance")

summary(mars_model$finalModel)

## Call: earth(x=data.frame[200,10], y=c(18.46,16.1,17...), keepxy=TRUE, degree=2,
##             nprune=16)
## 
##                                 coefficients
## (Intercept)                        20.378441
## h(0.621722-X1)                    -15.512132
## h(X1-0.621722)                      9.177132
## h(0.601063-X2)                    -17.940676
## h(X2-0.601063)                     10.064356
## h(X3-0.281766)                     11.590022
## h(0.447442-X3)                     14.641640
## h(X3-0.447442)                    -12.924806
## h(X3-0.606015)                     13.416764
## h(0.734892-X4)                    -10.074386
## h(X4-0.734892)                      9.687149
## h(0.850094-X5)                     -5.385762
## h(0.218266-X1) * h(X2-0.601063)   -55.372637
## h(X1-0.218266) * h(X2-0.601063)   -27.542831
## h(X1-0.621722) * h(X2-0.295997)   -26.527403
## h(0.649253-X1) * h(0.601063-X2)    26.129827
## 
## Selected 16 of 18 terms, and 5 of 10 predictors (nprune=16)
## Termination condition: Reached nk 21
## Importance: X1, X4, X2, X5, X3, X6-unused, X7-unused, X8-unused, X9-unused, ...
## Number of terms at each degree of interaction: 1 11 4
## GCV 1.61518    RSS 210.6377    GRSq 0.934423    RSq 0.9568093

Mars Variable Importance: The MARS model used predictors X1-X5, with X6-X10 left unused. This does show that MARS performed well in automatic variable selection. The variable importance ranking matches with the true equations, where X1 and X2 contribute to the sin interaction term, X4 has a large linear coefficient of 10, X5 has a smaller coefficient of 5 and X3 has an importance score of 0.00 despite it still being present in the model, but contributing four hinge terms as seen above in the coefficients.

Exercise_4

Sofia Hernandez

2026-03-26