## k-Nearest Neighbors
##
## 200 samples
## 10 predictor
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 3.603887 0.4776204 2.908662
## 7 3.459101 0.5183524 2.789848
## 9 3.387264 0.5482165 2.731947
## 11 3.366802 0.5663369 2.710553
## 13 3.320793 0.5917131 2.665083
## 15 3.303479 0.6088481 2.653429
## 17 3.312885 0.6148761 2.662543
## 19 3.325304 0.6194656 2.675679
## 21 3.337219 0.6232702 2.692637
## 23 3.348356 0.6301476 2.700319
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 15.
## [1] 3.303479
## [1] 15
## RMSE Rsquared MAE
## 3.2163068 0.6711406 2.5822776
## k-Nearest Neighbors
##
## 200 samples
## 10 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 180, 180, 180, 180, 180, 180, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 3.213027 0.6097890 2.637578
## 7 3.152126 0.6438120 2.560190
## 9 3.109557 0.6710604 2.520907
## 11 3.065667 0.6964043 2.486666
## 13 3.089659 0.7104752 2.511473
## 15 3.089470 0.7285052 2.533084
## 17 3.114551 0.7284227 2.550220
## 19 3.140322 0.7234940 2.552184
## 21 3.150406 0.7266336 2.583701
## 23 3.178313 0.7301648 2.605996
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 11.
## [1] 3.065667
## [1] 11
## RMSE Rsquared MAE
## 3.1648236 0.6596898 2.5350940
## Selected 12 of 18 terms, and 6 of 10 predictors
## Termination condition: Reached nk 21
## Importance: X1, X4, X2, X5, X3, X6, X7-unused, X8-unused, X9-unused, ...
## Number of terms at each degree of interaction: 1 11 (additive model)
## GCV 2.540556 RSS 397.9654 GRSq 0.8968524 RSq 0.9183982
## Call: earth(x=trainingData$x, y=trainingData$y)
##
## coefficients
## (Intercept) 18.451984
## h(0.621722-X1) -11.074396
## h(0.601063-X2) -10.744225
## h(X3-0.281766) 20.607853
## h(0.447442-X3) 17.880232
## h(X3-0.447442) -23.282007
## h(X3-0.636458) 15.150350
## h(0.734892-X4) -10.027487
## h(X4-0.734892) 9.092045
## h(0.850094-X5) -4.723407
## h(X5-0.850094) 10.832932
## h(X6-0.361791) -1.956821
##
## Selected 12 of 18 terms, and 6 of 10 predictors
## Termination condition: Reached nk 21
## Importance: X1, X4, X2, X5, X3, X6, X7-unused, X8-unused, X9-unused, ...
## Number of terms at each degree of interaction: 1 11 (additive model)
## GCV 2.540556 RSS 397.9654 GRSq 0.8968524 RSq 0.9183982
## plotmo grid: X1 X2 X3 X4 X5 X6
## 0.5139349 0.5106664 0.537307 0.4445841 0.5343299 0.4975981
## X7 X8 X9 X10
## 0.4688035 0.497961 0.5288716 0.5359218
## [1] "first mars model:"
## RMSE Rsquared MAE
## 1.8136467 0.8677298 1.3911836
## [1] "tuned mars model"
## RMSE Rsquared MAE
## 1.1492504 0.9471145 0.9158382
## plotmo grid: X1 X2 X3 X4 X5 X6
## 0.5139349 0.5106664 0.537307 0.4445841 0.5343299 0.4975981
## X7 X8 X9 X10
## 0.4688035 0.497961 0.5288716 0.5359218
## RMSE Rsquared MAE
## 1.5097628 0.9096305 1.1809958
## [1] "svm fit stats"
## RMSE Rsquared MAE
## 2.0852438 0.8237714 1.5892080
## [1] "linear model performance"
## RMSE Rsquared MAE
## 1.3922542 0.4454847 1.0822378
## Selected 13 of 21 terms, and 9 of 56 predictors
## Termination condition: RSq changed by less than 0.001 at 21 terms
## Importance: ManufacturingProcess32, ManufacturingProcess09, ...
## Number of terms at each degree of interaction: 1 12 (additive model)
## GCV 1.03222 RSS 89.52947 GRSq 0.7156628 RSq 0.8103038
## Call: earth(x=training_set[,-1], y=training_set[,1])
##
## coefficients
## (Intercept) 38.285713
## h(72.17-BiologicalMaterial03) -0.120302
## h(ManufacturingProcess01-10.2) 0.341730
## h(43.92-ManufacturingProcess09) -0.520137
## h(ManufacturingProcess09-43.92) 0.296091
## h(33.1-ManufacturingProcess13) 3.099246
## h(10.6-ManufacturingProcess28) 0.074423
## h(ManufacturingProcess28-10.6) 1.243163
## h(ManufacturingProcess32-152) 0.213673
## h(62-ManufacturingProcess33) 0.386951
## h(7-ManufacturingProcess39) -0.414697
## h(ManufacturingProcess39-7) -2.329411
## h(ManufacturingProcess42-11.5) -1.292715
##
## Selected 13 of 21 terms, and 9 of 56 predictors
## Termination condition: RSq changed by less than 0.001 at 21 terms
## Importance: ManufacturingProcess32, ManufacturingProcess09, ...
## Number of terms at each degree of interaction: 1 12 (additive model)
## GCV 1.03222 RSS 89.52947 GRSq 0.7156628 RSq 0.8103038
## plotmo grid: BiologicalMaterial01 BiologicalMaterial02
## 6.3 55.09
## BiologicalMaterial03 BiologicalMaterial04 BiologicalMaterial05
## 67.195 12.22 18.56
## BiologicalMaterial06 BiologicalMaterial08 BiologicalMaterial09
## 48.46 17.495 12.83
## BiologicalMaterial10 BiologicalMaterial11 BiologicalMaterial12
## 2.71 145.84 20.04
## ManufacturingProcess01 ManufacturingProcess02 ManufacturingProcess03
## 11.4 21 1.55
## ManufacturingProcess04 ManufacturingProcess05 ManufacturingProcess06
## 934 999.35 206.6
## ManufacturingProcess07 ManufacturingProcess08 ManufacturingProcess09
## 177 178 45.77
## ManufacturingProcess10 ManufacturingProcess11 ManufacturingProcess12
## 9 9.4 0
## ManufacturingProcess13 ManufacturingProcess14 ManufacturingProcess15
## 34.6 4858 6033
## ManufacturingProcess16 ManufacturingProcess17 ManufacturingProcess18
## 4589 34.4 4835
## ManufacturingProcess19 ManufacturingProcess20 ManufacturingProcess21
## 6023 4582.5 -0.3
## ManufacturingProcess22 ManufacturingProcess23 ManufacturingProcess24
## 5 3 8
## ManufacturingProcess25 ManufacturingProcess26 ManufacturingProcess27
## 4854 6045.5 4587
## ManufacturingProcess28 ManufacturingProcess29 ManufacturingProcess30
## 10.4 19.9 9.1
## ManufacturingProcess31 ManufacturingProcess32 ManufacturingProcess33
## 70.8 159 64
## ManufacturingProcess34 ManufacturingProcess35 ManufacturingProcess36
## 2.5 495 0.019
## ManufacturingProcess37 ManufacturingProcess38 ManufacturingProcess39
## 0.9 3 7.2
## ManufacturingProcess40 ManufacturingProcess41 ManufacturingProcess42
## 0 0 11.6
## ManufacturingProcess43 ManufacturingProcess44 ManufacturingProcess45
## 0.8 1.9 2.2
## [1] "first mars model"
## RMSE Rsquared MAE
## 1.2440949 0.4886308 0.9983959
## [1] "custom grid-tuned mars model"
## RMSE Rsquared MAE
## 1.2282855 0.4721621 0.9789413
## [1] "neural net"
## RMSE Rsquared MAE
## 1.8046830 0.2774567 1.4488611
## [1] "support vector machine"
## RMSE Rsquared MAE
## 0.9146136 0.7297499 0.7136072
## [1] "svm fit stats"
## RMSE Rsquared MAE
## 0.9728524 0.7009538 0.7633500
## Length Class Mode
## 1 ksvm S4
## Support Vector Machine object of class "ksvm"
##
## SV type: eps-svr (regression)
## parameter : epsilon = 0.07 cost C = 30
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.01469409473627
##
## Number of Support Vectors : 116
##
## Objective Function Value : -91.6408
## Training error : 0.004475
## Laplace distr. width : 0
## [1] -0.59403334 0.96924443 0.66786116 0.73738170 1.59630896
## [6] -2.87819228 0.61116495 0.32601881 2.33103371 0.98787724
## [11] -0.69768326 -0.53812853 -0.23510450 0.28574904 -0.40115125
## [16] 1.33152492 -0.29816007 -2.39219089 -0.51513163 -0.62250736
## [21] -0.54830969 -0.23437236 -1.45892795 0.42089477 0.42666967
## [26] 0.36567815 0.66014184 -1.79130748 -1.87067753 8.08848322
## [31] 0.40430138 0.23814737 4.72077475 -7.06155003 -0.98485791
## [36] -0.73816275 -3.56865616 -0.62125957 -0.84942550 0.72075596
## [41] 1.70944297 -1.54030544 -0.25809483 -1.42546548 1.36323365
## [46] 5.42184382 0.01454477 -1.33753918 -2.37774004 -6.20828393
## [51] -2.26102514 0.36974108 2.98903319 -2.80434451 0.23920180
## [56] 0.56613440 1.29938402 0.66768274 -0.68460886 -1.96256872
## [61] -1.10955508 8.00360321 -0.64565813 2.68966057 6.44443997
## [66] -3.62176524 -0.05005937 -0.71887568 -2.07940980 -1.55504261
## [71] -0.31552351 -0.16647941 -4.13903543 -2.10310237 3.12950404
## [76] 3.04746825 8.38006687 9.88303989 3.69644644 -2.15856406
## [81] 0.83377640 5.78956526 4.27943495 0.51895495 8.69167573
## [86] 4.01951130 0.34479962 -0.73969058 2.20742178 -3.48934210
## [91] -0.34326826 -1.94545647 -4.49095482 1.99711090 -4.67701281
## [96] 0.07484472 -5.23654489 -1.51375807 -0.60279751 6.94014920
## [101] 4.87672283 -2.96107705 -2.83207703 -5.01718573 -4.77861922
## [106] -2.94905362 -5.62764645 -5.12994052 -6.78505930 1.30948097
## [111] 2.54784550 -0.17053201 -1.57240128 0.14689989 -1.17660994
## [116] 0.07918679
exercises are from: Applied Predictive Modeling, Max Kuhn and Kjell Johnson
set.seed(200) trainingData<- mlbench.friedman1(200,sd = 1) trainingData\(x<-data.frame(trainingData\)x)
featurePlot(trainingData\(x,trainingData\)y) testData<-mlbench.friedman1(5000, sd = 1) testData\(x <-data.frame(testData\)x)
knnmodel <- train(x = trainingData\(x, y = trainingData\)y, method = “knn”, preproc = c(“center”,“scale”), tuneLength = 10) knnmodel #knnmodel\(results min(knnmodel\)results\(RMSE) knnmodel\)bestTune\(k knnPred<-predict(knnmodel,newdata=testData\)x) postResample(pred = knnPred,obs = testData$y)
ctrl<- trainControl(method= ‘CV’, number = 10) knnmodel <- train(x = trainingData\(x, y = trainingData\)y, method = “knn”, preproc = c(“center”,“scale”), trControl = ctrl, tuneLength = 10)
knnmodel #knnmodel\(results min(knnmodel\)results\(RMSE) knnmodel\)bestTune\(k knnPred<-predict(knnmodel,newdata=testData\)x) postResample(pred = knnPred,obs = testData$y)
mars_model<-earth(trainingData\(x, y = trainingData\)y) mars_model summary(mars_model) plotmo(mars_model) plot(mars_model) marsPred<-predict(mars_model,newdata=testData\(x) print('first mars model:') postResample(pred = marsPred,obs = testData\)y)
#specified mars model marsGrid<-expand.grid(.degree = 1:3, .nprune = 20:24) marsTuned<- train(x = trainingData\(x, y = trainingData\)y, method = “earth”, tuneGrid = marsGrid, trControl = ctrl, tuneLength = 10)
plot(marsTuned) marsPred<-predict(marsTuned,newdata=testData\(x) print('tuned mars model') postResample(pred = marsPred,obs = testData\)y)
nnetfit<- nnet(x = trainingData\(x, y = trainingData\)y, size=6, decay = .0353, linout = TRUE, trace = FALSE, preproc = c(“center”,“scale”), maxit = 300) plotmo(nnetfit) nnetPred<-predict(nnetfit,newdata=testData\(x) postResample(pred = nnetPred,obs = testData\)y)
svm_fit<- ksvm(x = as.matrix(trainingData\(x), y = trainingData\)y, preproc = c(“center”,“scale”), C = 30, epsilon = .07) svmPred<-predict(svm_fit,newdata=testData\(x) print('svm fit stats') postResample(pred = svmPred,obs = testData\)y)
data(ChemicalManufacturingProcess) column_names<-names(ChemicalManufacturingProcess) chem_set<-as.matrix(unlist(ChemicalManufacturingProcess)) chem_set<-matrix(chem_set,ncol=58) colnames(chem_set)<-column_names chem_set<-knn.impute(chem_set, k = 5)
#column 8 was found by nearZeroVal to have all values = 100, so we take it out. chem_set<-chem_set[,-8] set.seed=39 test_set_indices<-sample.int(176,size=44) test_set<-chem_set[test_set_indices,] training_set<-chem_set[-test_set_indices,]
training_set_df<-data.frame(training_set) test_set_df<-data.frame(test_set)
lm_model<-lm(Yield~.,data=training_set_df) lmPred<-predict(lm_model,newdata=test_set_df) print(‘linear model performance’) postResample(pred = lmPred,obs = test_set_df[,1])
mars_model<-earth(x = training_set[,-1],y = training_set[,1]) mars_model summary(mars_model) plotmo(mars_model) #plot(mars_model) marsPred<-predict(mars_model,newdata=test_set[,-1]) print(‘first mars model’) postResample(pred = marsPred,obs = test_set[,1]) ctrl<- trainControl(method= ‘CV’, number = 10) marsGrid<-expand.grid(.degree = 1:4, .nprune = 6:56) marsTuned<- train(x = training_set[,-1], y = training_set[,1], method = “earth”, tuneGrid = marsGrid, trControl = ctrl, tuneLength = 10) marsPred<-predict(marsTuned,newdata=test_set[,-1]) plot(marsTuned) print(‘custom grid-tuned mars model’) postResample(pred = marsPred,obs = test_set[,1]) gbmImp <- varImp(marsTuned)
plot(gbmImp, top = 7)
nnetfit<- nnet(x = training_set[,-1], y = training_set[,1], size=8, decay = .03, linout = TRUE, trace = FALSE, preproc = c(“center”,“scale”), maxit = 300) #plotmo(nnetfit) nnetPred<-predict(nnetfit,newdata=test_set[,-1]) print(‘neural net’) postResample(pred = nnetPred,obs = test_set[,1])
svm_fit<- ksvm(x = as.matrix(training_set[,-1]), y = training_set[,1], preproc = c(“center”,“scale”), C = 30, epsilon = .07, prob.model=TRUE) svmPred<-predict(svm_fit,newdata=test_set[,-1]) svm_model<- train(x = training_set[,-1], y = training_set[,1], method = “svmRadial”, trControl = ctrl, tuneLength = 10) svmPred<-predict(svm_model,newdata=test_set[,-1]) print(‘support vector machine’) postResample(pred = svmPred,obs = test_set[,1]) gbmImp <- varImp(svm_model) plot(gbmImp, top = 15)
print(‘svm fit stats’) postResample(pred = svmPred,obs = test_set[,1]) summary(svm_fit) svm_fit coef(svm_fit)