This dataset is a subset of data from the Orinda Longitudinal Study
of Myopia (OLSM), a cohort study of ocular component development and
risk factors for the onset of myopia in children. Data collection began
in the 1989–1990 school year and continued annually through the
2000–2001 school year.
All data about the parts that make up the eye (the ocular components)
were collected during an examination during the school day. Data on
family history and visual activities were collected yearly in a survey
completed by a parent or guardian. The dataset used in this text is from
618 of the subjects who had at least five years of follow-up and were
not myopic when they entered the study.
## Loading required package: ggplot2
## Loading required package: lattice
## Loading required package: Formula
## Loading required package: plotmo
## Loading required package: plotrix
## Loading required package: TeachingDemos
##
## Attaching package: 'histogram'
## The following object is masked from 'package:lattice':
##
## histogram
##
## Attaching package: 'kernlab'
## The following object is masked from 'package:ggplot2':
##
## alpha
## corrplot 0.92 loaded
##
## Attaching package: 'pls'
## The following object is masked from 'package:corrplot':
##
## corrplot
## The following object is masked from 'package:caret':
##
## R2
## The following object is masked from 'package:stats':
##
## loadings
## Loading required package: lars
## Loaded lars 1.3
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
##
## Attaching package: 'missForest'
## The following object is masked from 'package:VIM':
##
## nrmse
## Loading required package: foreign
## Loading required package: survival
##
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
##
## cluster
## Loading required package: MASS
## Loading required package: nnet
##
## Attaching package: 'epiDisplay'
## The following objects are masked from 'package:kernlab':
##
## alpha, csi
## The following object is masked from 'package:lattice':
##
## dotplot
## The following object is masked from 'package:ggplot2':
##
## alpha
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following object is masked from 'package:epiDisplay':
##
## ci
## The following object is masked from 'package:colorspace':
##
## coords
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
##
## Attaching package: 'klaR'
## The following object is masked from 'package:TeachingDemos':
##
## triplot
## Loading required package: class
## Loaded mda 0.5-3
## Loading required package: cluster
##
## Attaching package: 'mice'
## The following objects are masked from 'package:epiDisplay':
##
## cc, cci
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
## Loaded gam 1.22
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'moments'
## The following objects are masked from 'package:e1071':
##
## kurtosis, moment, skewness
## Package 'creditmodel' version 1.3.1
## Loaded ROSE 0.0-4
##
## Attaching package: 'Metrics'
## The following object is masked from 'package:pROC':
##
## auc
## The following objects are masked from 'package:caret':
##
## precision, recall
##
## randomForestSRC 3.1.1
##
## Type rfsrc.news() to see new features, changes, and bug fixes.
##
##
## Attaching package: 'randomForestSRC'
## The following objects are masked from 'package:e1071':
##
## impute, tune
myopia.data.raw <-read.table('myopia.csv', sep=',',header=TRUE)
head(myopia.data.raw)
## ID STUDYYEAR MYOPIC AGE GENDER SPHEQ AL ACD LT VCD SPORTHR READHR
## 1 1 1992 1 6 1 -0.052 21.89 3.690 3.498 14.70 45 8
## 2 2 1995 0 6 1 0.608 22.38 3.702 3.392 15.29 4 0
## 3 3 1991 0 6 1 1.179 22.49 3.462 3.514 15.52 14 0
## 4 4 1990 1 6 1 0.525 22.20 3.862 3.612 14.73 18 11
## 5 5 1995 0 5 0 0.697 23.29 3.676 3.454 16.16 14 0
## 6 6 1995 0 6 0 1.744 22.14 3.224 3.556 15.36 10 6
## COMPHR STUDYHR TVHR DIOPTERHR MOMMY DADMY
## 1 0 0 10 34 1 1
## 2 1 1 7 12 1 1
## 3 2 0 10 14 0 0
## 4 0 0 4 37 0 1
## 5 0 0 4 4 1 0
## 6 2 1 19 44 0 1
str(myopia.data.raw)
## 'data.frame': 618 obs. of 18 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ STUDYYEAR: int 1992 1995 1991 1990 1995 1995 1993 1991 1991 1991 ...
## $ MYOPIC : int 1 0 0 1 0 0 0 0 0 0 ...
## $ AGE : int 6 6 6 6 5 6 6 6 7 6 ...
## $ GENDER : int 1 1 1 1 0 0 1 1 0 1 ...
## $ SPHEQ : num -0.052 0.608 1.179 0.525 0.697 ...
## $ AL : num 21.9 22.4 22.5 22.2 23.3 ...
## $ ACD : num 3.69 3.7 3.46 3.86 3.68 ...
## $ LT : num 3.5 3.39 3.51 3.61 3.45 ...
## $ VCD : num 14.7 15.3 15.5 14.7 16.2 ...
## $ SPORTHR : int 45 4 14 18 14 10 12 12 4 30 ...
## $ READHR : int 8 0 0 11 0 6 7 0 0 5 ...
## $ COMPHR : int 0 1 2 0 0 2 2 0 3 1 ...
## $ STUDYHR : int 0 1 0 0 0 1 1 0 1 0 ...
## $ TVHR : int 10 7 10 4 4 19 8 8 3 10 ...
## $ DIOPTERHR: int 34 12 14 37 4 44 36 8 12 27 ...
## $ MOMMY : int 1 1 0 0 1 0 0 0 0 0 ...
## $ DADMY : int 1 1 0 1 0 1 1 0 0 0 ...
myopia.data.raw$GENDER=factor(myopia.data.raw$GENDER)
myopia.data.raw$MOMMY=factor(myopia.data.raw$MOMMY)
myopia.data.raw$DADMY=factor(myopia.data.raw$DADMY)
myopia.data = myopia.data.raw %>%
select(AGE,GENDER,SPHEQ,AL,ACD,LT,VCD,SPORTHR,READHR,COMPHR,STUDYHR,TVHR,DIOPTERHR,MOMMY,DADMY,MYOPIC)
str(myopia.data)
## 'data.frame': 618 obs. of 16 variables:
## $ AGE : int 6 6 6 6 5 6 6 6 7 6 ...
## $ GENDER : Factor w/ 2 levels "0","1": 2 2 2 2 1 1 2 2 1 2 ...
## $ SPHEQ : num -0.052 0.608 1.179 0.525 0.697 ...
## $ AL : num 21.9 22.4 22.5 22.2 23.3 ...
## $ ACD : num 3.69 3.7 3.46 3.86 3.68 ...
## $ LT : num 3.5 3.39 3.51 3.61 3.45 ...
## $ VCD : num 14.7 15.3 15.5 14.7 16.2 ...
## $ SPORTHR : int 45 4 14 18 14 10 12 12 4 30 ...
## $ READHR : int 8 0 0 11 0 6 7 0 0 5 ...
## $ COMPHR : int 0 1 2 0 0 2 2 0 3 1 ...
## $ STUDYHR : int 0 1 0 0 0 1 1 0 1 0 ...
## $ TVHR : int 10 7 10 4 4 19 8 8 3 10 ...
## $ DIOPTERHR: int 34 12 14 37 4 44 36 8 12 27 ...
## $ MOMMY : Factor w/ 2 levels "0","1": 2 2 1 1 2 1 1 1 1 1 ...
## $ DADMY : Factor w/ 2 levels "0","1": 2 2 1 2 1 2 2 1 1 1 ...
## $ MYOPIC : int 1 0 0 1 0 0 0 0 0 0 ...
par(mfrow=c(3,4))
hist(myopia.data$AGE, xlab = "Age", main = "Age",col='light blue')
hist(myopia.data$SPHEQ, xlab = "SPHEQ", main = "SPHEQ",col='light blue')
hist(myopia.data$AL, xlab = "AL", main = "AL",col='light blue')
hist(myopia.data$ACD, xlab = "ACD", main = "ACD",col='light blue')
hist(myopia.data$LT, xlab = "LT", main = "LT",col='light blue')
hist(myopia.data$VCD, xlab = "VCD", main = "VCD",col='light blue')
hist(myopia.data$SPORTHR, xlab = "SPORTHR", main = "SPORTHR",col='light blue')
hist(myopia.data$READHR, xlab = "READHR", main = "READHR",col='light blue')
hist(myopia.data$COMPHR, xlab = "COMPHR", main = "COMPHR",col='light blue')
hist(myopia.data$STUDYHR, xlab = "STUDYHR", main = "STUDYHR",col='light blue')
hist(myopia.data$TVHR, xlab = "TVHR", main = "TVHR",col='light blue')
hist(myopia.data$DIOPTERHR, xlab = "DIOPTERHR", main = "DIOPTERHR",col='light blue')
myopia.data %>%
group_by(AGE, MYOPIC) %>%
summarise(myopic_sum = sum(MYOPIC),
hwy_n = n()) %>%
ggplot(aes(x = AGE, y = myopic_sum)) +
geom_col(aes(fill = MYOPIC))
## `summarise()` has grouped output by 'AGE'. You can override using the `.groups`
## argument.
myopia.data.numeric <- myopia.data[, unlist(lapply(myopia.data, is.numeric))]
corrplot::corrplot(cor(myopia.data.numeric), method = 'color')
corrplot::corrplot(cor(myopia.data.numeric), method = 'number')
myopia.data.corr <- cor(myopia.data.numeric)
round(myopia.data.corr, 2)
## AGE SPHEQ AL ACD LT VCD SPORTHR READHR COMPHR STUDYHR
## AGE 1.00 -0.12 0.22 0.19 -0.19 0.20 0.06 0.13 0.06 0.40
## SPHEQ -0.12 1.00 -0.31 -0.24 0.07 -0.25 -0.02 -0.10 -0.03 -0.05
## AL 0.22 -0.31 1.00 0.46 -0.33 0.94 0.11 0.02 0.09 0.10
## ACD 0.19 -0.24 0.46 1.00 -0.34 0.20 0.08 0.01 0.07 0.05
## LT -0.19 0.07 -0.33 -0.34 1.00 -0.45 -0.03 0.02 -0.03 -0.04
## VCD 0.20 -0.25 0.94 0.20 -0.45 1.00 0.10 0.01 0.07 0.09
## SPORTHR 0.06 -0.02 0.11 0.08 -0.03 0.10 1.00 0.13 0.01 0.12
## READHR 0.13 -0.10 0.02 0.01 0.02 0.01 0.13 1.00 0.04 0.26
## COMPHR 0.06 -0.03 0.09 0.07 -0.03 0.07 0.01 0.04 1.00 0.11
## STUDYHR 0.40 -0.05 0.10 0.05 -0.04 0.09 0.12 0.26 0.11 1.00
## TVHR 0.07 -0.08 0.08 -0.04 0.05 0.08 0.17 0.00 0.13 0.04
## DIOPTERHR 0.29 -0.12 0.11 0.04 0.00 0.10 0.19 0.70 0.50 0.62
## MYOPIC 0.02 -0.37 0.04 0.11 -0.05 0.01 -0.10 0.07 0.03 -0.03
## TVHR DIOPTERHR MYOPIC
## AGE 0.07 0.29 0.02
## SPHEQ -0.08 -0.12 -0.37
## AL 0.08 0.11 0.04
## ACD -0.04 0.04 0.11
## LT 0.05 0.00 -0.05
## VCD 0.08 0.10 0.01
## SPORTHR 0.17 0.19 -0.10
## READHR 0.00 0.70 0.07
## COMPHR 0.13 0.50 0.03
## STUDYHR 0.04 0.62 -0.03
## TVHR 1.00 0.43 0.00
## DIOPTERHR 0.43 1.00 0.04
## MYOPIC 0.00 0.04 1.00
aggr_plot <- aggr(myopia.data, col=c('light blue','red'), numbers=TRUE, sortVars=TRUE, labels=names(myopia.data), cex.axis=.7, gap=3, ylab=c("Histogram of missing data","Pattern"))
##
## Variables sorted by number of missings:
## Variable Count
## AGE 0
## GENDER 0
## SPHEQ 0
## ACD 0
## LT 0
## VCD 0
## SPORTHR 0
## READHR 0
## COMPHR 0
## STUDYHR 0
## TVHR 0
## DIOPTERHR 0
## MOMMY 0
## DADMY 0
## MYOPIC 0
sum(is.na(myopia.data))
## [1] 0
set.seed(10)
sample <- sample.split(myopia.data$MYOPIC, SplitRatio = 0.8)
data.train <- subset(myopia.data, sample==TRUE)
data.test <- subset(myopia.data, sample==FALSE)
table(data.train$MYOPIC)
##
## 0 1
## 430 65
Data are imbalance, so we will use over-sampling to make them balance.
data.train <- ovun.sample(MYOPIC ~ ., data = data.train, method ="over")$data
table(data.train$MYOPIC)
##
## 0 1
## 430 400
Now the number of class in the target value are almost equal
data.train$MYOPIC<- as.factor(data.train$MYOPIC)
data.test$MYOPIC<- as.factor(data.test$MYOPIC)
glm = glm(MYOPIC ~ ., data = data.train,family = binomial)
model_summ <- summary(glm)
model_summ
##
## Call:
## glm(formula = MYOPIC ~ ., family = binomial, data = data.train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.56608 -0.57088 -0.02537 0.61075 2.49050
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 9.343320 6.260413 1.492 0.135583
## AGE 0.016454 0.161607 0.102 0.918902
## GENDER1 0.245112 0.229784 1.067 0.286103
## SPHEQ -4.307788 0.331279 -13.003 < 2e-16 ***
## ACD 0.774389 0.522881 1.481 0.138606
## LT -1.190944 0.874791 -1.361 0.173386
## VCD -0.417411 0.199555 -2.092 0.036465 *
## SPORTHR -0.055338 0.014561 -3.800 0.000144 ***
## READHR 0.105147 0.035038 3.001 0.002691 **
## COMPHR -0.074066 0.036143 -2.049 0.040441 *
## STUDYHR -0.335083 0.084942 -3.945 7.98e-05 ***
## TVHR -0.006309 0.019630 -0.321 0.747899
## DIOPTERHR NA NA NA NA
## MOMMY1 1.331234 0.217107 6.132 8.69e-10 ***
## DADMY1 0.589477 0.207994 2.834 0.004595 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1149.54 on 829 degrees of freedom
## Residual deviance: 644.21 on 816 degrees of freedom
## AIC: 672.21
##
## Number of Fisher Scoring iterations: 6
data.test$predicted.glm = predict(glm, newdata = data.test, type = "response")
Now, define a new prediction field, as 0/1 values.
data.test$predicted.glm = ifelse(data.test$predicted.glm >= 0.5, 1, 0)
table(data.test$predicted.glm)
##
## 0 1
## 96 27
cm1=confusionMatrix(as.factor(data.test$predicted.glm),(data.test$MYOPIC))
cm1
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 91 5
## 1 16 11
##
## Accuracy : 0.8293
## 95% CI : (0.7509, 0.8911)
## No Information Rate : 0.8699
## P-Value [Acc > NIR] : 0.9254
##
## Kappa : 0.4163
##
## Mcnemar's Test P-Value : 0.0291
##
## Sensitivity : 0.8505
## Specificity : 0.6875
## Pos Pred Value : 0.9479
## Neg Pred Value : 0.4074
## Prevalence : 0.8699
## Detection Rate : 0.7398
## Detection Prevalence : 0.7805
## Balanced Accuracy : 0.7690
##
## 'Positive' Class : 0
##
The model has predicted 0 as 0 (True negative) 105 times and 0 as 1 (False negative) 10 times.
The model has predicted 1 as 0 (False positive) 2 times and 1 as 1 (True positive) 6 times.
The accuracy of the model is 90%.
pred <- prediction(predict(glm,subset(data.test, select=-c(predicted.glm)), type = "response"),data.test$MYOPIC)
auc <- round(as.numeric(performance(pred, measure = "auc")@y.values),3)
perf <- performance(pred, "tpr","fpr")
auc
## [1] 0.853
plot(perf,colorize = T, main = "ROC Curve")
text(0.5,0.5, paste("AUC:", auc))
A ROC curve is constructed by plotting the true positive rate (TPR)
against the false positive rate (FPR). The plot here indicate that our
model’s performance is good. (given curves is close to the top-left
corner). In general, an AUC of 0.5 suggests no discrimination (i.e.,
ability to diagnose patients with and without the disease or condition
based on the test), 0.7 to 0.8 is considered acceptable, 0.8 to 0.9 is
considered excellent, and more than 0.9 is considered outstanding. here
we have 0.864 which means excellent
Accuracy=c(round(cm1$overall[1],3))
Accuracy=as.data.frame(Accuracy)
colnames(Accuracy) <- c('Logistic Regression')
Sensitivity=c(round(cm1$byClass[1],3))
Sensitivity=as.data.frame(Sensitivity)
colnames(Sensitivity) <- c('Logistic Regression')
Accuracy =rbind(Accuracy,Sensitivity)
grid <- expand.grid(C = c(0,0.01, 0.05, 0.1, 0.5, 0.75, 1, 1.5))
trctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)
lsvm <- train(MYOPIC ~ ., data = data.train, method = "svmLinear",trControl=trctrl,preProcess = c("center", "scale"),tuneLength = 10,tuneGrid = grid)
lsvm
## Support Vector Machines with Linear Kernel
##
## 830 samples
## 14 predictor
## 2 classes: '0', '1'
##
## Pre-processing: centered (14), scaled (14)
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 747, 747, 747, 747, 747, 747, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.00 NaN NaN
## 0.01 0.8120482 0.6246674
## 0.05 0.8204819 0.6410483
## 0.10 0.8244980 0.6490510
## 0.50 0.8253012 0.6505768
## 0.75 0.8261044 0.6521895
## 1.00 0.8261044 0.6521768
## 1.50 0.8257028 0.6513579
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was C = 0.75.
plot(lsvm)
The above plot is showing that our classifier is giving best accuracy on
C = 1.5. Let’s try to make predictions using this model for our test
set.
data.test$predicted.lsvm <- predict(lsvm, newdata = subset(data.test, select=-c( predicted.glm)))
cm2=confusionMatrix(data.test$predicted.lsvm,data.test$MYOPIC)
cm2
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 93 5
## 1 14 11
##
## Accuracy : 0.8455
## 95% CI : (0.7693, 0.9044)
## No Information Rate : 0.8699
## P-Value [Acc > NIR] : 0.82724
##
## Kappa : 0.4492
##
## Mcnemar's Test P-Value : 0.06646
##
## Sensitivity : 0.8692
## Specificity : 0.6875
## Pos Pred Value : 0.9490
## Neg Pred Value : 0.4400
## Prevalence : 0.8699
## Detection Rate : 0.7561
## Detection Prevalence : 0.7967
## Balanced Accuracy : 0.7783
##
## 'Positive' Class : 0
##
Acc=c(round(cm2$overall[1],3))
Sen=c(round(cm2$byClass[1],3))
Accuracy$LSVM=c(Acc,Sen)
rsvm <- train(MYOPIC ~ ., data = data.train, method = "svmRadial",trControl=trctrl,preProcess = c("center", "scale"),tuneLength = 10)
rsvm
## Support Vector Machines with Radial Basis Function Kernel
##
## 830 samples
## 14 predictor
## 2 classes: '0', '1'
##
## Pre-processing: centered (14), scaled (14)
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 747, 747, 747, 747, 747, 747, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.8429719 0.6869421
## 0.50 0.8570281 0.7151250
## 1.00 0.8819277 0.7647541
## 2.00 0.9024096 0.8054494
## 4.00 0.9140562 0.8288095
## 8.00 0.9196787 0.8400063
## 16.00 0.9353414 0.8711408
## 32.00 0.9409639 0.8823377
## 64.00 0.9417671 0.8839517
## 128.00 0.9409639 0.8823544
##
## Tuning parameter 'sigma' was held constant at a value of 0.05132851
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.05132851 and C = 64.
plot(rsvm)
data.test$predicted.rsvm <- predict(rsvm, newdata = subset(data.test, select=-c( predicted.glm, predicted.lsvm)))
cm3=confusionMatrix(data.test$predicted.rsvm,data.test$MYOPIC)
cm3
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 98 12
## 1 9 4
##
## Accuracy : 0.8293
## 95% CI : (0.7509, 0.8911)
## No Information Rate : 0.8699
## P-Value [Acc > NIR] : 0.9254
##
## Kappa : 0.1803
##
## Mcnemar's Test P-Value : 0.6625
##
## Sensitivity : 0.9159
## Specificity : 0.2500
## Pos Pred Value : 0.8909
## Neg Pred Value : 0.3077
## Prevalence : 0.8699
## Detection Rate : 0.7967
## Detection Prevalence : 0.8943
## Balanced Accuracy : 0.5829
##
## 'Positive' Class : 0
##
Acc=c(round(cm3$overall[1],3))
Sen=c(round(cm3$byClass[1],3))
Accuracy$RSVM=c(Acc,Sen)
dtm = rpart(MYOPIC ~ ., data = data.train)
dtm
## n= 830
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 830 400 0 (0.51807229 0.48192771)
## 2) SPHEQ>=0.563 379 61 0 (0.83905013 0.16094987)
## 4) SPHEQ>=0.7235 262 16 0 (0.93893130 0.06106870) *
## 5) SPHEQ< 0.7235 117 45 0 (0.61538462 0.38461538)
## 10) DIOPTERHR>=6 101 31 0 (0.69306931 0.30693069)
## 20) SPHEQ< 0.6265 26 0 0 (1.00000000 0.00000000) *
## 21) SPHEQ>=0.6265 75 31 0 (0.58666667 0.41333333)
## 42) READHR< 0.5 17 0 0 (1.00000000 0.00000000) *
## 43) READHR>=0.5 58 27 1 (0.46551724 0.53448276)
## 86) AGE>=6.5 10 0 0 (1.00000000 0.00000000) *
## 87) AGE< 6.5 48 17 1 (0.35416667 0.64583333)
## 174) SPHEQ>=0.679 13 4 0 (0.69230769 0.30769231) *
## 175) SPHEQ< 0.679 35 8 1 (0.22857143 0.77142857) *
## 11) DIOPTERHR< 6 16 2 1 (0.12500000 0.87500000) *
## 3) SPHEQ< 0.563 451 112 1 (0.24833703 0.75166297)
## 6) SPHEQ>=0.2845 166 75 1 (0.45180723 0.54819277)
## 12) VCD>=16.015 21 0 0 (1.00000000 0.00000000) *
## 13) VCD< 16.015 145 54 1 (0.37241379 0.62758621)
## 26) SPORTHR>=19 12 0 0 (1.00000000 0.00000000) *
## 27) SPORTHR< 19 133 42 1 (0.31578947 0.68421053)
## 54) SPHEQ< 0.4315 41 17 0 (0.58536585 0.41463415)
## 108) VCD>=15.065 18 2 0 (0.88888889 0.11111111) *
## 109) VCD< 15.065 23 8 1 (0.34782609 0.65217391)
## 218) VCD< 14.93 7 0 0 (1.00000000 0.00000000) *
## 219) VCD>=14.93 16 1 1 (0.06250000 0.93750000) *
## 55) SPHEQ>=0.4315 92 18 1 (0.19565217 0.80434783)
## 110) VCD< 15.28 27 13 1 (0.48148148 0.51851852)
## 220) ACD< 3.849 15 2 0 (0.86666667 0.13333333) *
## 221) ACD>=3.849 12 0 1 (0.00000000 1.00000000) *
## 111) VCD>=15.28 65 5 1 (0.07692308 0.92307692) *
## 7) SPHEQ< 0.2845 285 37 1 (0.12982456 0.87017544) *
pred <- predict(dtm, newdata = subset(data.test, select=-c(predicted.glm, predicted.lsvm, predicted.rsvm)))
data.test$predicted.dtm=ifelse(pred[,1] >= 0.5, 1, 0)
cm4=confusionMatrix(as.factor(data.test$predicted.dtm),data.test$MYOPIC)
cm4
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 21 12
## 1 86 4
##
## Accuracy : 0.2033
## 95% CI : (0.1361, 0.2852)
## No Information Rate : 0.8699
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.1867
##
## Mcnemar's Test P-Value : 1.654e-13
##
## Sensitivity : 0.19626
## Specificity : 0.25000
## Pos Pred Value : 0.63636
## Neg Pred Value : 0.04444
## Prevalence : 0.86992
## Detection Rate : 0.17073
## Detection Prevalence : 0.26829
## Balanced Accuracy : 0.22313
##
## 'Positive' Class : 0
##
Acc=c(round(cm4$overall[1],3))
Sen=c(round(cm4$byClass[1],3))
Accuracy$Decission_Tree=c(Acc,Sen)
print(Accuracy)
## Logistic Regression LSVM RSVM Decission_Tree
## Accuracy 0.829 0.846 0.829 0.203
## Sensitivity 0.850 0.869 0.916 0.196