R Markdown
data("PimaIndiansDiabetes")
str(PimaIndiansDiabetes)
## 'data.frame': 768 obs. of 9 variables:
## $ pregnant: num 6 1 8 1 0 5 3 10 2 8 ...
## $ glucose : num 148 85 183 89 137 116 78 115 197 125 ...
## $ pressure: num 72 66 64 66 40 74 50 0 70 96 ...
## $ triceps : num 35 29 0 23 35 0 32 0 45 0 ...
## $ insulin : num 0 0 0 94 168 0 88 0 543 0 ...
## $ mass : num 33.6 26.6 23.3 28.1 43.1 25.6 31 35.3 30.5 0 ...
## $ pedigree: num 0.627 0.351 0.672 0.167 2.288 ...
## $ age : num 50 31 32 21 33 30 26 29 53 54 ...
## $ diabetes: Factor w/ 2 levels "neg","pos": 2 1 2 1 2 1 2 1 2 2 ...
table(PimaIndiansDiabetes$diabetes)
##
## neg pos
## 500 268
set.seed(7)
trainControl<-trainControl(method="cv", number=5)
fit.nb<-train(diabetes ~., data=PimaIndiansDiabetes, method="nb", metric="Accuracy", trControl=trainControl)
print(fit.nb)
## Naive Bayes
##
## 768 samples
## 8 predictor
## 2 classes: 'neg', 'pos'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 614, 614, 615, 614, 615
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE 0.7564553 0.4510005
## TRUE 0.7564978 0.4452698
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
library(kernlab)
library(mlbench)
data("PimaIndiansDiabetes")
fit<-ksvm(diabetes~., data=PimaIndiansDiabetes, kernel="rbfdot")
print(fit)
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.12374975684703
##
## Number of Support Vectors : 435
##
## Objective Function Value : -352.8229
## Training error : 0.175781
predictions<-predict(fit, PimaIndiansDiabetes[, 1:8], type="response")
head(predictions,5)
## [1] pos neg pos neg pos
## Levels: neg pos
table(predictions, PimaIndiansDiabetes$diabetes)
##
## predictions neg pos
## neg 463 98
## pos 37 170
library(kernlab)
library(mlbench)
data("BostonHousing")
#fit model
fit<-ksvm(medv~., data=BostonHousing, kernel="rbfdot")
#summarize fit
print(fit)
## Support Vector Machine object of class "ksvm"
##
## SV type: eps-svr (regression)
## parameter : epsilon = 0.1 cost C = 1
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.10659215661543
##
## Number of Support Vectors : 334
##
## Objective Function Value : -75.1727
## Training error : 0.090571
predictions<-predict(fit, BostonHousing)
head(predictions, 5)
## [,1]
## [1,] 26.39252
## [2,] 22.31988
## [3,] 33.78000
## [4,] 32.47819
## [5,] 32.79253
#summarize accuracy
mse<-mean((BostonHousing$medv-predictions)^2)
print(mse)
## [1] 7.6611
library(caret)
library(mlbench)
data("PimaIndiansDiabetes")
set.seed(7)
trainControl<-trainControl(method="cv", number=5)
fit.svmRadial<-train(diabetes ~., data=PimaIndiansDiabetes, method="svmRadial", metric= "Accuracy", trControl=trainControl)
#summarize fit
print(fit.svmRadial)
## Support Vector Machines with Radial Basis Function Kernel
##
## 768 samples
## 8 predictor
## 2 classes: 'neg', 'pos'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 614, 614, 615, 614, 615
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.7604278 0.4360310
## 0.50 0.7656056 0.4552142
## 1.00 0.7590952 0.4409422
##
## Tuning parameter 'sigma' was held constant at a value of 0.124824
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.124824 and C = 0.5.
library(caret)
library(mlbench)
data("BostonHousing")
set.seed(7)
trainControl<-trainControl(method="cv", number=5)
fit.svmRadial<-train(medv~., data=BostonHousing, method="svmRadial", metric="RMSE", trControl=trainControl)
print(fit.svmRadial)
## Support Vector Machines with Radial Basis Function Kernel
##
## 506 samples
## 13 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 407, 404, 405, 404, 404
## Resampling results across tuning parameters:
##
## C RMSE Rsquared MAE
## 0.25 4.842935 0.7527188 2.823026
## 0.50 4.303449 0.7969666 2.540125
## 1.00 3.830139 0.8333874 2.327301
##
## Tuning parameter 'sigma' was held constant at a value of 0.1057462
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.1057462 and C = 1.
library(rpart)
library(mlbench)
data("PimaIndiansDiabetes")
fit<-rpart(diabetes~., data=PimaIndiansDiabetes)
print(fit)
## n= 768
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 768 268 neg (0.65104167 0.34895833)
## 2) glucose< 127.5 485 94 neg (0.80618557 0.19381443)
## 4) age< 28.5 271 23 neg (0.91512915 0.08487085) *
## 5) age>=28.5 214 71 neg (0.66822430 0.33177570)
## 10) mass< 26.35 41 2 neg (0.95121951 0.04878049) *
## 11) mass>=26.35 173 69 neg (0.60115607 0.39884393)
## 22) glucose< 99.5 55 10 neg (0.81818182 0.18181818) *
## 23) glucose>=99.5 118 59 neg (0.50000000 0.50000000)
## 46) pedigree< 0.561 84 34 neg (0.59523810 0.40476190)
## 92) pedigree< 0.2 21 4 neg (0.80952381 0.19047619) *
## 93) pedigree>=0.2 63 30 neg (0.52380952 0.47619048)
## 186) pregnant>=1.5 52 21 neg (0.59615385 0.40384615)
## 372) pressure>=67 40 12 neg (0.70000000 0.30000000) *
## 373) pressure< 67 12 3 pos (0.25000000 0.75000000) *
## 187) pregnant< 1.5 11 2 pos (0.18181818 0.81818182) *
## 47) pedigree>=0.561 34 9 pos (0.26470588 0.73529412) *
## 3) glucose>=127.5 283 109 pos (0.38515901 0.61484099)
## 6) mass< 29.95 76 24 neg (0.68421053 0.31578947)
## 12) glucose< 145.5 41 6 neg (0.85365854 0.14634146) *
## 13) glucose>=145.5 35 17 pos (0.48571429 0.51428571)
## 26) insulin< 14.5 21 8 neg (0.61904762 0.38095238) *
## 27) insulin>=14.5 14 4 pos (0.28571429 0.71428571) *
## 7) mass>=29.95 207 57 pos (0.27536232 0.72463768)
## 14) glucose< 157.5 115 45 pos (0.39130435 0.60869565)
## 28) age< 30.5 50 23 neg (0.54000000 0.46000000)
## 56) pressure>=61 40 13 neg (0.67500000 0.32500000)
## 112) mass< 41.8 31 7 neg (0.77419355 0.22580645) *
## 113) mass>=41.8 9 3 pos (0.33333333 0.66666667) *
## 57) pressure< 61 10 0 pos (0.00000000 1.00000000) *
## 29) age>=30.5 65 18 pos (0.27692308 0.72307692) *
## 15) glucose>=157.5 92 12 pos (0.13043478 0.86956522) *
predictions <-predict(fit, PimaIndiansDiabetes[, 1:8], type="class")
table(predictions, PimaIndiansDiabetes$diabetes)
##
## predictions neg pos
## neg 449 72
## pos 51 196