setwd("C:\\Users\\user\\Desktop\\R_CODE_2023")
library(rattle)
## Loading required package: tibble
## Loading required package: bitops
## Rattle: A free graphical interface for data science with R.
## Version 5.5.1 Copyright (c) 2006-2021 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
library(ggplot2)
library(ISLR2)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:ISLR2':
##
## Boston
library(caret)
## Loading required package: lattice
library(splines)
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(rattle)
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
## The following object is masked from 'package:rattle':
##
## importance
HeartData = read.csv("Heart.csv", header=TRUE)
#HeartData
View(HeartData)
nrow(HeartData)
## [1] 303
HeartData = na.omit(HeartData)
nrow(HeartData)
## [1] 297
par(mfrow=c(2,2))
boxplot(HeartData$Age ~ as.factor(HeartData$AHD))
boxplot(HeartData$MaxHR ~ as.factor(HeartData$AHD))
boxplot(HeartData$Chol ~ as.factor(HeartData$AHD))
boxplot(HeartData$Age ~ as.factor(HeartData$AHD))
par(mfrow=c(1,1))
pairs( cbind( HeartData$Chol, HeartData$MaxHR, HeartData$RestBP,HeartData$Age), pch=19, lower.panel=NULL, cex=.5)
HeartData$HD = as.factor(HeartData$AHD)
train_model <- trainControl(method = "repeatedcv", number = 5, repeats=10)
model.cart <- train(
HD ~ Age + as.factor(Sex) + as.factor(Thal)
+ Chol + MaxHR + RestBP + Fbs + RestECG + ExAng,
data = HeartData,
method = "rpart",
trControl = train_model)
model.cart
## CART
##
## 297 samples
## 9 predictor
## 2 classes: 'No', 'Yes'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold, repeated 10 times)
## Summary of sample sizes: 237, 237, 238, 238, 238, 238, ...
## Resampling results across tuning parameters:
##
## cp Accuracy Kappa
## 0.01094891 0.7307288 0.4574042
## 0.02189781 0.7377966 0.4722918
## 0.48905109 0.6101299 0.1717318
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.02189781.
model.cart$finalModel
## n= 297
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 297 137 No (0.5387205 0.4612795)
## 2) as.factor(Thal)normal>=0.5 164 37 No (0.7743902 0.2256098) *
## 3) as.factor(Thal)normal< 0.5 133 33 Yes (0.2481203 0.7518797) *
confusionMatrix(predict(model.cart, HeartData),
reference=HeartData$HD, positive="Yes")
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 127 37
## Yes 33 100
##
## Accuracy : 0.7643
## 95% CI : (0.7118, 0.8114)
## No Information Rate : 0.5387
## P-Value [Acc > NIR] : 7.203e-16
##
## Kappa : 0.5248
##
## Mcnemar's Test P-Value : 0.7199
##
## Sensitivity : 0.7299
## Specificity : 0.7937
## Pos Pred Value : 0.7519
## Neg Pred Value : 0.7744
## Prevalence : 0.4613
## Detection Rate : 0.3367
## Detection Prevalence : 0.4478
## Balanced Accuracy : 0.7618
##
## 'Positive' Class : Yes
##
fancyRpartPlot(model.cart$finalModel)
model.rf <- train(
HD ~ Age + as.factor(Sex) + as.factor(Thal)
+ Chol + MaxHR + RestBP + Fbs + RestECG + ExAng,
data = HeartData,
method = "rf",
trControl = train_model)
model.rf
## Random Forest
##
## 297 samples
## 9 predictor
## 2 classes: 'No', 'Yes'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold, repeated 10 times)
## Summary of sample sizes: 238, 238, 237, 238, 237, 237, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.7654124 0.5277834
## 6 0.7404972 0.4776255
## 10 0.7313785 0.4594746
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
#HD ~ Chol + MaxHR + RestBP + as.factor(Thal) + ExAng,
summary(model.rf$finalModel)
## Length Class Mode
## call 4 -none- call
## type 1 -none- character
## predicted 297 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 594 matrix numeric
## oob.times 297 -none- numeric
## classes 2 -none- character
## importance 10 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 297 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 10 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
model.rf$finalModel
##
## Call:
## randomForest(x = x, y = y, mtry = param$mtry)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 2
##
## OOB estimate of error rate: 24.24%
## Confusion matrix:
## No Yes class.error
## No 126 34 0.2125000
## Yes 38 99 0.2773723
plot(model.rf$finalModel)
varImp(model.rf$finalModel)
## Overall
## Age 17.712745
## as.factor(Sex)1 5.550317
## as.factor(Thal)normal 16.729774
## as.factor(Thal)reversable 13.550742
## Chol 15.367344
## MaxHR 23.847994
## RestBP 13.858147
## Fbs 2.341168
## RestECG 3.935611
## ExAng 9.823690
plot( varImp(model.rf) )
yhat = predict(model.rf$finalModel, type="prob")[,1]
plot(HeartData$MaxHR, yhat)
scatter.smooth(HeartData$MaxHR, yhat, span=.3)
confusionMatrix(predict(model.rf, HeartData),
reference=HeartData$HD, positive="Yes")
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 157 6
## Yes 3 131
##
## Accuracy : 0.9697
## 95% CI : (0.9433, 0.9861)
## No Information Rate : 0.5387
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9389
##
## Mcnemar's Test P-Value : 0.505
##
## Sensitivity : 0.9562
## Specificity : 0.9812
## Pos Pred Value : 0.9776
## Neg Pred Value : 0.9632
## Prevalence : 0.4613
## Detection Rate : 0.4411
## Detection Prevalence : 0.4512
## Balanced Accuracy : 0.9687
##
## 'Positive' Class : Yes
##