Exercise 4

library(mlbench)

## Warning: package 'mlbench' was built under R version 4.5.2

set.seed(123)

data <- mlbench.friedman1(200, sd = 1)
df <- data.frame(data$x)
df$y <- data$y

set.seed(123)
trainIndex <- sample(1:nrow(df), 0.8*nrow(df))
train <- df[trainIndex, ]
test <- df[-trainIndex, ]

library(caret)

## Warning: package 'caret' was built under R version 4.5.2

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 4.5.3

## Loading required package: lattice

control <- trainControl(method = "cv", number = 5)

set.seed(123)
knn_model <- train(y ~ ., data = train,
                   method = "knn",
                   trControl = control,
                   tuneLength = 10)

set.seed(123)
svm_model <- train(y ~ ., data = train,
                   method = "svmRadial",
                   trControl = control,
                   tuneLength = 10)

set.seed(123)
nnet_model <- train(y ~ ., data = train,
                    method = "nnet",
                    trControl = control,
                    trace = FALSE,
                    tuneLength = 5)

## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.

set.seed(123)
mars_model <- train(y ~ ., data = train,
                    method = "earth",
                    trControl = control,
                    tuneLength = 10)

## Loading required package: earth

## Warning: package 'earth' was built under R version 4.5.2

## Loading required package: Formula

## Loading required package: plotmo

## Warning: package 'plotmo' was built under R version 4.5.2

## Loading required package: plotrix

## Warning: package 'plotrix' was built under R version 4.5.2

summary(mars_model$finalModel)

## Call: earth(x=matrix[160,10], y=c(5.599,13.92,2...), keepxy=TRUE, degree=1,
##             nprune=8)
## 
##                coefficients
## (Intercept)       18.253152
## h(0.58175-X1)    -10.144056
## h(0.5849-X2)     -12.294953
## h(0.444527-X3)    11.811415
## h(X3-0.444527)     8.907508
## h(0.903401-X4)   -10.326271
## h(X5-0.191312)     5.354069
## h(X8-0.828207)     8.785975
## 
## Selected 8 of 20 terms, and 6 of 10 predictors (nprune=8)
## Termination condition: Reached nk 21
## Importance: X4, X2, X1, X5, X3, X8, X6-unused, X7-unused, X9-unused, ...
## Number of terms at each degree of interaction: 1 7 (additive model)
## GCV 2.733329    RSS 359.1765    GRSq 0.8761635    RSq 0.8970111

results <- resamples(list(
  kNN = knn_model,
  SVM = svm_model,
  NN = nnet_model,
  MARS = mars_model
))

summary(results)

## 
## Call:
## summary.resamples(object = results)
## 
## Models: kNN, SVM, NN, MARS 
## Number of resamples: 5 
## 
## MAE 
##           Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## kNN   2.029830  2.065465  2.192595  2.292289  2.353855  2.819702    0
## SVM   1.161699  1.513344  1.533736  1.474941  1.546174  1.619754    0
## NN   13.502103 13.641643 13.654174 13.641073 13.702464 13.704982    0
## MARS  1.251510  1.268957  1.290207  1.332457  1.420435  1.431177    0
## 
## RMSE 
##           Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## kNN   2.534503  2.654364  2.701961  2.869894  2.923484  3.535158    0
## SVM   1.449625  1.825587  1.839818  1.845953  1.962907  2.151829    0
## NN   14.073605 14.362591 14.432178 14.416339 14.479334 14.733986    0
## MARS  1.537070  1.563061  1.690926  1.682290  1.724146  1.896249    0
## 
## Rsquared 
##           Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## kNN  0.5650286 0.6290567 0.6663214 0.6485648 0.6855466 0.6968708    0
## SVM  0.7518451 0.8367328 0.8420569 0.8491882 0.8998005 0.9155057    0
## NN          NA        NA        NA       NaN        NA        NA    5
## MARS 0.8044394 0.8646940 0.8819871 0.8732113 0.9036079 0.9113282    0

bwplot(results)

pred_knn <- predict(knn_model, test)
pred_svm <- predict(svm_model, test)
pred_nn  <- predict(nnet_model, test)
pred_mars <- predict(mars_model, test)

data.frame(
  Model = c("kNN", "SVM", "NN", "MARS"),
  RMSE = c(
    RMSE(pred_knn, test$y),
    RMSE(pred_svm, test$y),
    RMSE(pred_nn, test$y),
    RMSE(pred_mars, test$y)
  )
)

##   Model      RMSE
## 1   kNN  2.916709
## 2   SVM  1.870894
## 3    NN 14.303565
## 4  MARS  1.475404

The models showed clear differences in performance. MARS performed the best overall, with the lowest RMSE and strong R² values, followed by SVM. The kNN model was less accurate, while the neural network performed very poorly, with high error and no meaningful R² values.

Looking at the MARS model, it mostly selected the important predictors (X1–X5), which are known to drive the outcome. It also included one additional variable (X8), but the remaining variables were not used. This shows that MARS was able to identify the key predictors, with only a small amount of noise included.

Overall, MARS provided the best performance and was effective at identifying the most relevant variables, with SVM as a strong alternative.

Exercise 4

Jeneil Miller

2026-03-25