Part 1. Setup

library(mlbench)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
set.seed(67)
trainingData <- mlbench.friedman1(500, sd = 1)

trainingData$x <- data.frame(trainingData$x)

testData <-mlbench.friedman1(20000, sd = 1)
testData$x <- data.frame(testData$x)

featurePlot(trainingData$x, trainingData$y)

Part 2. Model Tuning

Part 2.a. kNN Model

knnTune <- train(x = trainingData$x,
                 y = trainingData$y,
                 method = "knn",
                 preProc = c("center", "scale"),
                 tunelength = 10)
knnTune
## k-Nearest Neighbors 
## 
## 500 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 500, 500, 500, 500, 500, 500, ... 
## Resampling results across tuning parameters:
## 
##   k  RMSE      Rsquared   MAE     
##   5  3.164948  0.5809337  2.501237
##   7  3.062666  0.6118201  2.431851
##   9  3.003797  0.6346567  2.391112
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 9.
knnPred <- predict(knnTune, newdata = testData$x)
knnPerf <- postResample(pred = knnPred, obs = testData$y)

Part 2.b. MARS Model

marsGrid <- expand.grid(degree = 1:3,
                        nprune = seq(1, 21, by = 3))

marsTune <- train(x = trainingData$x, 
                  y = trainingData$y, 
                  method = "earth",
                  tuneGrid = marsGrid)
## Loading required package: earth
## Loading required package: Formula
## Loading required package: plotmo
## Loading required package: plotrix
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
marsTune
## Multivariate Adaptive Regression Spline 
## 
## 500 samples
##  10 predictor
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 500, 500, 500, 500, 500, 500, ... 
## Resampling results across tuning parameters:
## 
##   degree  nprune  RMSE      Rsquared   MAE      
##   1        1      4.843117        NaN  3.9786844
##   1        4      2.656424  0.7008859  2.1478952
##   1        7      1.845814  0.8547847  1.4609303
##   1       10      1.714346  0.8751869  1.3472819
##   1       13      1.724880  0.8737119  1.3535340
##   1       16      1.734647  0.8722868  1.3658757
##   1       19      1.738849  0.8716750  1.3704517
##   2        1      4.843117        NaN  3.9786844
##   2        4      2.725538  0.6832721  2.2044609
##   2        7      1.924074  0.8411618  1.5288173
##   2       10      1.450415  0.9101359  1.1478975
##   2       13      1.236353  0.9348850  0.9817638
##   2       16      1.174313  0.9414176  0.9367529
##   2       19      1.163063  0.9426519  0.9323211
##   3        1      4.843117        NaN  3.9786844
##   3        4      2.725538  0.6832721  2.2044609
##   3        7      1.924074  0.8411618  1.5288173
##   3       10      1.450415  0.9101359  1.1478975
##   3       13      1.239727  0.9345294  0.9837972
##   3       16      1.185274  0.9402116  0.9418417
##   3       19      1.174023  0.9414459  0.9374100
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nprune = 19 and degree = 2.
marsPred <- predict(marsTune, newdata = testData$x)
marsPerf <- postResample(pred = marsPred, obs = testData$y)

Part 2.c. Neural Network

nnetTune <- train(x = trainingData$x,
                  y = trainingData$y,
                  method = "nnet",
                  preProc = c("center", "scale"),
                  tuneLength = 5,
                  linout = TRUE,
                  trace = FALSE)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
nnetTune
## Neural Network 
## 
## 500 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 500, 500, 500, 500, 500, 500, ... 
## Resampling results across tuning parameters:
## 
##   size  decay  RMSE      Rsquared   MAE     
##   1     0e+00  3.016146  0.6184536  2.410940
##   1     1e-04  3.216238  0.5775168  2.521703
##   1     1e-03  3.119578  0.6188123  2.400742
##   1     1e-02  2.703107  0.7034790  2.149256
##   1     1e-01  2.500417  0.7397040  1.978170
##   3     0e+00  2.525721  0.7330113  1.998492
##   3     1e-04  2.578282  0.7231640  2.048690
##   3     1e-03  2.510283  0.7377386  1.993675
##   3     1e-02  2.487021  0.7418668  1.973132
##   3     1e-01  2.440638  0.7514099  1.942626
##   5     0e+00  2.611529  0.7212269  2.079016
##   5     1e-04  2.491671  0.7425700  1.967025
##   5     1e-03  2.494718  0.7454881  1.956460
##   5     1e-02  2.556103  0.7285697  2.021105
##   5     1e-01  2.516960  0.7384589  1.999555
##   7     0e+00  2.491631  0.7450191  1.972076
##   7     1e-04  2.528005  0.7365785  1.988117
##   7     1e-03  2.460445  0.7518620  1.932134
##   7     1e-02  2.565315  0.7291293  2.034848
##   7     1e-01  2.288858  0.7826790  1.788171
##   9     0e+00  2.414018  0.7625410  1.906401
##   9     1e-04  2.431788  0.7600588  1.913749
##   9     1e-03  2.430306  0.7616748  1.926205
##   9     1e-02  2.461594  0.7526647  1.926675
##   9     1e-01  2.428642  0.7588335  1.913825
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were size = 7 and decay = 0.1.
nnetPred <- predict(nnetTune, newdata = testData$x)
nnetPerf <- postResample(pred = nnetPred, obs = testData$y)

Part 3. Performance

We can see from the RMSE that the MARS model gives the best performance, with an RMSE of 1.092747.

results <- rbind(kNN = knnPerf,
                 MARS = marsPerf,
                 NNet = nnetPerf)

results
##          RMSE  Rsquared       MAE
## kNN  2.832597 0.7212999 2.2388682
## MARS 1.092747 0.9516917 0.8675603
## NNet 1.937947 0.8481984 1.4905810

part 4. Variable Importance

Might as well do all the models instead of just the MARS model.

The MARS model does select the informative predictors, it does not select X3 however, indicating it might have missed some relevant relationship between variables..

The Neural network and the MARS model have virtually the same importance chart. Both of them ranking the importance of the predictors in the following order (most to least important): X4, X1, X2, X5. Which is interesting.

However, knn disagrees with the other two models. Although it does still believe X4, X1, and X2 are the most important. It disagrees with the order of importance for X3 and X5. It also believes X10, X6, X8, and X7 are very marginally important. This shows that it is over fitting some noise in the data.

knnImportance <- varImp(knnTune)
marsImportance <- varImp(marsTune)
nnetImportance <- varImp(nnetTune)

plot(knnImportance, top = 10, main = "kNN")

plot(marsImportance, top = 10, main = "MARS")

plot(marsImportance, top = 10, main = "NNet")