Encoding the target feature as factor

Splitting the dataset into the Training set and Test set

install.packages(‘caTools’)

library(caTools)
set.seed(123)
split = sample.split(diabetes$Outcome, SplitRatio = 0.75)
training_set_diabetes = subset(diabetes, split == TRUE)
test_set_diabetes = subset(diabetes, split == FALSE)

Feature Scaling

training_set_diabetes[-9] = scale(training_set_diabetes[-9])
test_set_diabetes[-9] = scale(test_set_diabetes[-9])

Fitting Logistic Regression to the Training set

library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
classifier = glm(formula = Outcome ~ .,
                 family = quasibinomial,
                 data = training_set_diabetes)

prob_pred = predict(classifier, type = 'response', newdata = test_set_diabetes[-9])
y_pred = ifelse(prob_pred > 0.5, 1, 0)
# Making the Confusion Matrix
cm = table(test_set_diabetes[, 9], y_pred > 0.5)
cm
##    
##     FALSE TRUE
##   0   107   18
##   1    31   36
fourfoldplot(cm)

Fitting SVM to the Training set with linear kernel

install.packages(‘e1071’)

library(caret)
library(e1071)
classifier = svm(formula = Outcome ~ .,
                 data = training_set_diabetes,
                 type = 'C-classification',
                 kernel = 'linear')

# Predicting the Test set results
y_pred = predict(classifier, newdata = test_set_diabetes[-9])
# Making the Confusion Matrix
cm = table(test_set_diabetes[, 9], y_pred)
cm
##    y_pred
##       0   1
##   0 107  18
##   1  31  36
fourfoldplot(cm)

Fitting Kernel SVM to the Training set

install.packages(‘e1071’)

library(e1071)
library(caret)
classifier = svm(formula = Outcome ~ .,
                 data = training_set_diabetes,
                 type = 'C-classification',
                 kernel = 'radial')

# Predicting the Test set results
y_pred = predict(classifier, newdata = test_set_diabetes[-9])

# Making the Confusion Matrix
cm = table(test_set_diabetes[, 9], y_pred)
cm
##    y_pred
##       0   1
##   0 106  19
##   1  29  38
fourfoldplot(cm)

Fitting Naive Bayes to the Training set

install.packages(‘e1071’)

library(e1071)
library(caret)
classifier = naiveBayes(x = training_set_diabetes[-9],
                        y = training_set_diabetes$Outcome)

# Predicting the Test set results
y_pred = predict(classifier, newdata = test_set_diabetes[-9])

# Making the Confusion Matrix
cm = table(test_set_diabetes[, 9], y_pred)
cm
##    y_pred
##       0   1
##   0 105  20
##   1  23  44
fourfoldplot(cm)

Fitting Random Forest Classification to the Training set

install.packages(‘randomForest’)

library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(caret)
set.seed(123)
classifier = randomForest(x = training_set_diabetes[-9],
                          y = training_set_diabetes$Outcome,
                          ntree = 300)

# Predicting the Test set results
y_pred = predict(classifier, newdata = test_set_diabetes[-9])

# Making the Confusion Matrix
cm = table(test_set_diabetes[, 9], y_pred)
cm
##    y_pred
##       0   1
##   0 106  19
##   1  29  38
fourfoldplot(cm)