A6_PartII

R Markdown

data("PimaIndiansDiabetes")
str(PimaIndiansDiabetes)

## 'data.frame':    768 obs. of  9 variables:
##  $ pregnant: num  6 1 8 1 0 5 3 10 2 8 ...
##  $ glucose : num  148 85 183 89 137 116 78 115 197 125 ...
##  $ pressure: num  72 66 64 66 40 74 50 0 70 96 ...
##  $ triceps : num  35 29 0 23 35 0 32 0 45 0 ...
##  $ insulin : num  0 0 0 94 168 0 88 0 543 0 ...
##  $ mass    : num  33.6 26.6 23.3 28.1 43.1 25.6 31 35.3 30.5 0 ...
##  $ pedigree: num  0.627 0.351 0.672 0.167 2.288 ...
##  $ age     : num  50 31 32 21 33 30 26 29 53 54 ...
##  $ diabetes: Factor w/ 2 levels "neg","pos": 2 1 2 1 2 1 2 1 2 2 ...

table(PimaIndiansDiabetes$diabetes)

## 
## neg pos 
## 500 268

set.seed(7)
trainControl<-trainControl(method="cv", number=5)
fit.nb<-train(diabetes ~., data=PimaIndiansDiabetes, method="nb", metric="Accuracy", trControl=trainControl)
print(fit.nb)

## Naive Bayes 
## 
## 768 samples
##   8 predictor
##   2 classes: 'neg', 'pos' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 614, 614, 615, 614, 615 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa    
##   FALSE      0.7564553  0.4510005
##    TRUE      0.7564978  0.4452698
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.

library(kernlab)
library(mlbench)
data("PimaIndiansDiabetes")
fit<-ksvm(diabetes~., data=PimaIndiansDiabetes, kernel="rbfdot")
print(fit)

## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 1 
## 
## Gaussian Radial Basis kernel function. 
##  Hyperparameter : sigma =  0.12374975684703 
## 
## Number of Support Vectors : 435 
## 
## Objective Function Value : -352.8229 
## Training error : 0.175781

predictions<-predict(fit, PimaIndiansDiabetes[, 1:8], type="response")
head(predictions,5)

## [1] pos neg pos neg pos
## Levels: neg pos

table(predictions, PimaIndiansDiabetes$diabetes)

##            
## predictions neg pos
##         neg 463  98
##         pos  37 170

library(kernlab)
library(mlbench)
data("BostonHousing")
#fit model
fit<-ksvm(medv~., data=BostonHousing, kernel="rbfdot")
#summarize fit
print(fit)

## Support Vector Machine object of class "ksvm" 
## 
## SV type: eps-svr  (regression) 
##  parameter : epsilon = 0.1  cost C = 1 
## 
## Gaussian Radial Basis kernel function. 
##  Hyperparameter : sigma =  0.10659215661543 
## 
## Number of Support Vectors : 334 
## 
## Objective Function Value : -75.1727 
## Training error : 0.090571

predictions<-predict(fit, BostonHousing)
head(predictions, 5)

##          [,1]
## [1,] 26.39252
## [2,] 22.31988
## [3,] 33.78000
## [4,] 32.47819
## [5,] 32.79253

#summarize accuracy
mse<-mean((BostonHousing$medv-predictions)^2)
print(mse)

## [1] 7.6611

library(caret)
library(mlbench)
data("PimaIndiansDiabetes")
set.seed(7)
trainControl<-trainControl(method="cv", number=5)
fit.svmRadial<-train(diabetes ~., data=PimaIndiansDiabetes, method="svmRadial", metric= "Accuracy", trControl=trainControl)

#summarize fit
print(fit.svmRadial)

## Support Vector Machines with Radial Basis Function Kernel 
## 
## 768 samples
##   8 predictor
##   2 classes: 'neg', 'pos' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 614, 614, 615, 614, 615 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa    
##   0.25  0.7604278  0.4360310
##   0.50  0.7656056  0.4552142
##   1.00  0.7590952  0.4409422
## 
## Tuning parameter 'sigma' was held constant at a value of 0.124824
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.124824 and C = 0.5.

library(caret)
library(mlbench)
data("BostonHousing")
set.seed(7)
trainControl<-trainControl(method="cv", number=5)
fit.svmRadial<-train(medv~., data=BostonHousing, method="svmRadial", metric="RMSE", trControl=trainControl)
print(fit.svmRadial)

## Support Vector Machines with Radial Basis Function Kernel 
## 
## 506 samples
##  13 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 407, 404, 405, 404, 404 
## Resampling results across tuning parameters:
## 
##   C     RMSE      Rsquared   MAE     
##   0.25  4.842935  0.7527188  2.823026
##   0.50  4.303449  0.7969666  2.540125
##   1.00  3.830139  0.8333874  2.327301
## 
## Tuning parameter 'sigma' was held constant at a value of 0.1057462
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.1057462 and C = 1.

library(rpart)
library(mlbench)
data("PimaIndiansDiabetes")
fit<-rpart(diabetes~., data=PimaIndiansDiabetes)
print(fit)

## n= 768 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##   1) root 768 268 neg (0.65104167 0.34895833)  
##     2) glucose< 127.5 485  94 neg (0.80618557 0.19381443)  
##       4) age< 28.5 271  23 neg (0.91512915 0.08487085) *
##       5) age>=28.5 214  71 neg (0.66822430 0.33177570)  
##        10) mass< 26.35 41   2 neg (0.95121951 0.04878049) *
##        11) mass>=26.35 173  69 neg (0.60115607 0.39884393)  
##          22) glucose< 99.5 55  10 neg (0.81818182 0.18181818) *
##          23) glucose>=99.5 118  59 neg (0.50000000 0.50000000)  
##            46) pedigree< 0.561 84  34 neg (0.59523810 0.40476190)  
##              92) pedigree< 0.2 21   4 neg (0.80952381 0.19047619) *
##              93) pedigree>=0.2 63  30 neg (0.52380952 0.47619048)  
##               186) pregnant>=1.5 52  21 neg (0.59615385 0.40384615)  
##                 372) pressure>=67 40  12 neg (0.70000000 0.30000000) *
##                 373) pressure< 67 12   3 pos (0.25000000 0.75000000) *
##               187) pregnant< 1.5 11   2 pos (0.18181818 0.81818182) *
##            47) pedigree>=0.561 34   9 pos (0.26470588 0.73529412) *
##     3) glucose>=127.5 283 109 pos (0.38515901 0.61484099)  
##       6) mass< 29.95 76  24 neg (0.68421053 0.31578947)  
##        12) glucose< 145.5 41   6 neg (0.85365854 0.14634146) *
##        13) glucose>=145.5 35  17 pos (0.48571429 0.51428571)  
##          26) insulin< 14.5 21   8 neg (0.61904762 0.38095238) *
##          27) insulin>=14.5 14   4 pos (0.28571429 0.71428571) *
##       7) mass>=29.95 207  57 pos (0.27536232 0.72463768)  
##        14) glucose< 157.5 115  45 pos (0.39130435 0.60869565)  
##          28) age< 30.5 50  23 neg (0.54000000 0.46000000)  
##            56) pressure>=61 40  13 neg (0.67500000 0.32500000)  
##             112) mass< 41.8 31   7 neg (0.77419355 0.22580645) *
##             113) mass>=41.8 9   3 pos (0.33333333 0.66666667) *
##            57) pressure< 61 10   0 pos (0.00000000 1.00000000) *
##          29) age>=30.5 65  18 pos (0.27692308 0.72307692) *
##        15) glucose>=157.5 92  12 pos (0.13043478 0.86956522) *

predictions <-predict(fit, PimaIndiansDiabetes[, 1:8], type="class")
table(predictions, PimaIndiansDiabetes$diabetes)

##            
## predictions neg pos
##         neg 449  72
##         pos  51 196

A6_PartII

Lila Ghemri

2022-12-22

R Markdown