Encoding the target feature as factor

Splitting the dataset into the Training set and Test set

install.packages(‘caTools’)

library(caTools)
set.seed(123)
split = sample.split(diabetes$Outcome, SplitRatio = 0.75)
training_set_diabetes = subset(diabetes, split == TRUE)
test_set_diabetes = subset(diabetes, split == FALSE)

Feature Scaling

training_set_diabetes[-9] = scale(training_set_diabetes[-9])
test_set_diabetes[-9] = scale(test_set_diabetes[-9])

Fitting Logistic Regression to the Training set

library(caret)

## Loading required package: lattice

## Loading required package: ggplot2

classifier = glm(formula = Outcome ~ .,
                 family = quasibinomial,
                 data = training_set_diabetes)

prob_pred = predict(classifier, type = 'response', newdata = test_set_diabetes[-9])
y_pred = ifelse(prob_pred > 0.5, 1, 0)
# Making the Confusion Matrix
cm = table(test_set_diabetes[, 9], y_pred > 0.5)
cm

##    
##     FALSE TRUE
##   0   107   18
##   1    31   36

fourfoldplot(cm)

Fitting SVM to the Training set with linear kernel

install.packages(‘e1071’)

library(caret)
library(e1071)
classifier = svm(formula = Outcome ~ .,
                 data = training_set_diabetes,
                 type = 'C-classification',
                 kernel = 'linear')

# Predicting the Test set results
y_pred = predict(classifier, newdata = test_set_diabetes[-9])
# Making the Confusion Matrix
cm = table(test_set_diabetes[, 9], y_pred)
cm

##    y_pred
##       0   1
##   0 107  18
##   1  31  36

fourfoldplot(cm)

Fitting Kernel SVM to the Training set

install.packages(‘e1071’)

library(e1071)
library(caret)
classifier = svm(formula = Outcome ~ .,
                 data = training_set_diabetes,
                 type = 'C-classification',
                 kernel = 'radial')

# Predicting the Test set results
y_pred = predict(classifier, newdata = test_set_diabetes[-9])

# Making the Confusion Matrix
cm = table(test_set_diabetes[, 9], y_pred)
cm

##    y_pred
##       0   1
##   0 106  19
##   1  29  38

fourfoldplot(cm)

Fitting Naive Bayes to the Training set

install.packages(‘e1071’)

library(e1071)
library(caret)
classifier = naiveBayes(x = training_set_diabetes[-9],
                        y = training_set_diabetes$Outcome)

# Predicting the Test set results
y_pred = predict(classifier, newdata = test_set_diabetes[-9])

# Making the Confusion Matrix
cm = table(test_set_diabetes[, 9], y_pred)
cm

##    y_pred
##       0   1
##   0 105  20
##   1  23  44

fourfoldplot(cm)

Fitting Random Forest Classification to the Training set

install.packages(‘randomForest’)

library(randomForest)

## randomForest 4.6-14

## Type rfNews() to see new features/changes/bug fixes.

## 
## Attaching package: 'randomForest'

## The following object is masked from 'package:ggplot2':
## 
##     margin

library(caret)
set.seed(123)
classifier = randomForest(x = training_set_diabetes[-9],
                          y = training_set_diabetes$Outcome,
                          ntree = 300)

# Predicting the Test set results
y_pred = predict(classifier, newdata = test_set_diabetes[-9])

# Making the Confusion Matrix
cm = table(test_set_diabetes[, 9], y_pred)
cm

##    y_pred
##       0   1
##   0 106  19
##   1  29  38

fourfoldplot(cm)

diabetes_detection

rdb

May 29, 2018

Encoding the target feature as factor

Splitting the dataset into the Training set and Test set

install.packages(‘caTools’)

Feature Scaling

Fitting Logistic Regression to the Training set

Fitting SVM to the Training set with linear kernel

install.packages(‘e1071’)

Fitting Kernel SVM to the Training set

install.packages(‘e1071’)

Fitting Naive Bayes to the Training set

install.packages(‘e1071’)

Fitting Random Forest Classification to the Training set

install.packages(‘randomForest’)