Can we accuratley predict hamstring quadricep ratio in cancer
patients as a means of accurate physical assesment using dual x-ray
absorptiometry scans?
#Load Libraries
library(dplyr)
library(ggplot2)
library(lubridate)
library(caret)
library(gains)
library(pROC)
library(readxl)
library(stringr)
library(rpart)
library(rpart.plot)
library(forecast)
#DATA MANAGEMENT________________________________________________________________
MMF_biodex_raw<- read.csv("/Volumes/Kindl Portable /MMF/MMF Data/BIODEX Data Exports/MMF Data 9:6:22/MMF 9-6-22t.csv")
MMF_Baseline_DEXA <- read_excel("/Volumes/Kindl Portable /MMF/MMF Data/DEXA Data/MMF Baseline Data Pilot and Cohort1_tu.xlsx",
sheet = "DXA_WbodyComposition Baseline")
#CLEAN AND CALCULATE HAMSTRING QUADRACEPT RATIO FROM BIODEX DATA
MMF_Filtered <- MMF_biodex_raw %>% filter(!LAST %in% c("AWB Test MCW","E4V_05_T1","GZ","LO","WP","TU","Greving","LA","M"))
#Change Test Date from CHR to Date and drop hours/minutes
MMF_Filtered$TEST.DATE <- format(as.POSIXct(MMF_Filtered$TEST.DATE,format = '%m/%d/%Y %H:%M'),format = '%m/%d/%Y')
MMF_Filtered$TEST.DATE<-mdy(MMF_Filtered$TEST.DATE)
#Correcting miss labeled participants
MMF_Filtered$LAST <- ifelse(MMF_Filtered$LAST == "3010" & MMF_Filtered$TEST.DATE == "2020-10-09",
"3011"
,ifelse(MMF_Filtered$LAST == "3010A",
"3010",
ifelse(MMF_Filtered$LAST == "MMF 1023" & MMF_Filtered$TEST.DATE == "2019-12-14",
"MMF 1046",
ifelse(MMF_Filtered$LAST == "MMF 1023A",
"MMF 1023",
ifelse(MMF_Filtered$LAST == "30088888","3008",
MMF_Filtered$LAST
)))))
#Remove data where peak torque was not recorded
MMF_Filtered <-MMF_Filtered %>% filter(PEAKTQ.AWY != 0| PEAKTQ.TWD != 0)
#Remove data for warm up sets
MMF_Filtered<-MMF_Filtered%>%filter(SET..%in% c(2,4,6))
#Remove un-used columns
MMF_Filtered <- subset(MMF_Filtered, select = -c(2,5,6,7,8,9,10,11,12,57))
#Make copy of filtered data
MMF <- MMF_Filtered
#Make new variable that is calculation of hamstring to quadriceps ratio
MMF$HM_QD_RATIO <- (MMF$PEAKTQ.BW.TWD/MMF$PEAKTQ.BW.AWY)
#ADD HAMSTRING QUADRACEPT RATIO TO DEXA DATA SET:
names(MMF_Baseline_DEXA)[2]<- "LAST"
#remove white spaces in "LAST' so data sets can combine
MMF$LAST<-str_replace_all(MMF$LAST," ","")
#Merge data sets
combineddata <- merge(MMF,MMF_Baseline_DEXA,by=c("LAST"))
#Drop some more columns that we dont need
combineddata <-subset(combineddata, select = -c(50:53,90:95))
#NOW DATA SETS ARE COMBINED BUT FOR KNN AND DECISION TREE WE ONLY WANT DEXA DATA SINCE USING BIODEX DATA TO PREDICT THE HAMSTRING TO QUADRACEPT RATIO IS SELF FUFILLING:
LAST_RATIO <- select(MMF,LAST,HM_QD_RATIO)
DXA_RATIO <- merge(LAST_RATIO,MMF_Baseline_DEXA,by=c("LAST"))
DXA_RATIO <- subset(DXA_RATIO, select = -c(3:8,37:42))
DXA_RATIO$HM_QD_RATIO <- ifelse(DXA_RATIO$HM_QD_RATIO >.50,"1","0")
DXADATA <- DXA_RATIO
DXADATA$HM_QD_RATIO <-as.numeric(DXADATA$HM_QD_RATIO)
DXADATA <- subset(DXADATA, select = -c(1,31:36))
#CLEAN UP OUR WORKING DIRECTORY SO WE ONLY HAVE THE DATASET WE'LL USE FOR KNN AND DECISION TREE
rm(combineddata,DXA_RATIO,LAST_RATIO,MMF,MMF_Baseline_DEXA,MMF_biodex_raw,MMF_Filtered)
#KNN:___________________________________________________________________________
#Scale the data
Data1 <- scale(DXADATA[1:29])
#Scaled data to data frame
Data1<-as.data.frame(Data1)
Data1$HM_QD_RATIO<- as.factor(DXADATA$HM_QD_RATIO)
#Partition the data
#Hamstring Quadriceps Ratio as a factor is our target variable
set.seed(1)
myIndex<- createDataPartition(Data1$HM_QD_RATIO, p=0.7, list=FALSE)
trainSet <- Data1[myIndex,]
validationSet <- Data1[-myIndex,]
#Train control
myCtrl <- trainControl(method="cv", number=10)
#Cross fold validation to 10
myGrid <- expand.grid(.k=c(1:10))
set.seed(1)
KNN_fit <- train(HM_QD_RATIO ~ ., data=trainSet, method = "knn", trControl=myCtrl, tuneGrid = myGrid)
KNN_fit
## k-Nearest Neighbors
##
## 157 samples
## 28 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 142, 141, 141, 141, 142, 141, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 1 0.5725000 0.121666003
## 2 0.5329167 0.037759763
## 3 0.5275000 0.033315285
## 4 0.5083333 0.001008097
## 5 0.5075000 0.004868379
## 6 0.5200000 0.035461097
## 7 0.5316667 0.063493173
## 8 0.5079167 0.026788245
## 9 0.5079167 0.020373706
## 10 0.4766667 -0.045368350
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 1.
KNN_Class <- predict(KNN_fit, newdata = validationSet)
confusionMatrix(KNN_Class, validationSet$HM_QD_RATIO, positive = '1')
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 27 16
## 1 10 13
##
## Accuracy : 0.6061
## 95% CI : (0.4781, 0.7242)
## No Information Rate : 0.5606
## P-Value [Acc > NIR] : 0.2688
##
## Kappa : 0.1821
##
## Mcnemar's Test P-Value : 0.3268
##
## Sensitivity : 0.4483
## Specificity : 0.7297
## Pos Pred Value : 0.5652
## Neg Pred Value : 0.6279
## Prevalence : 0.4394
## Detection Rate : 0.1970
## Detection Prevalence : 0.3485
## Balanced Accuracy : 0.5890
##
## 'Positive' Class : 1
##
KNN_Class_prob <- predict(KNN_fit, newdata = validationSet, type='prob')
KNN_Class_prob
## 0 1
## 1 0.5000000 0.5000000
## 2 0.5000000 0.5000000
## 3 0.5000000 0.5000000
## 4 1.0000000 0.0000000
## 5 1.0000000 0.0000000
## 6 1.0000000 0.0000000
## 7 1.0000000 0.0000000
## 8 0.1666667 0.8333333
## 9 0.1666667 0.8333333
## 10 0.6666667 0.3333333
## 11 0.6666667 0.3333333
## 12 1.0000000 0.0000000
## 13 1.0000000 0.0000000
## 14 1.0000000 0.0000000
## 15 0.2500000 0.7500000
## 16 0.0000000 1.0000000
## 17 0.6000000 0.4000000
## 18 0.6000000 0.4000000
## 19 0.8333333 0.1666667
## 20 0.6666667 0.3333333
## 21 0.6666667 0.3333333
## 22 0.3333333 0.6666667
## 23 0.3333333 0.6666667
## 24 0.3333333 0.6666667
## 25 0.7500000 0.2500000
## 26 0.5555556 0.4444444
## 27 0.5555556 0.4444444
## 28 0.5555556 0.4444444
## 29 0.5555556 0.4444444
## 30 0.5555556 0.4444444
## 31 0.5555556 0.4444444
## 32 0.5555556 0.4444444
## 33 0.5555556 0.4444444
## 34 0.5555556 0.4444444
## 35 0.5000000 0.5000000
## 36 0.5000000 0.5000000
## 37 0.5000000 0.5000000
## 38 0.4000000 0.6000000
## 39 0.2500000 0.7500000
## 40 0.5000000 0.5000000
## 41 0.5000000 0.5000000
## 42 0.5000000 0.5000000
## 43 0.3750000 0.6250000
## 44 0.7142857 0.2857143
## 45 0.7142857 0.2857143
## 46 0.5000000 0.5000000
## 47 0.5000000 0.5000000
## 48 0.5000000 0.5000000
## 49 0.6666667 0.3333333
## 50 0.6666667 0.3333333
## 51 0.6666667 0.3333333
## 52 0.0000000 1.0000000
## 53 0.0000000 1.0000000
## 54 0.7142857 0.2857143
## 55 0.7142857 0.2857143
## 56 1.0000000 0.0000000
## 57 0.2857143 0.7142857
## 58 0.2857143 0.7142857
## 59 0.0000000 1.0000000
## 60 1.0000000 0.0000000
## 61 1.0000000 0.0000000
## 62 1.0000000 0.0000000
## 63 1.0000000 0.0000000
## 64 0.5000000 0.5000000
## 65 0.5000000 0.5000000
## 66 0.5000000 0.5000000
confusionMatrix(as.factor(ifelse(KNN_Class_prob[,2]>0.50, '1', '0')),
validationSet$HM_QD_RATIO, positive = '1')
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 30 21
## 1 7 8
##
## Accuracy : 0.5758
## 95% CI : (0.4479, 0.6966)
## No Information Rate : 0.5606
## P-Value [Acc > NIR] : 0.45269
##
## Kappa : 0.0914
##
## Mcnemar's Test P-Value : 0.01402
##
## Sensitivity : 0.2759
## Specificity : 0.8108
## Pos Pred Value : 0.5333
## Neg Pred Value : 0.5882
## Prevalence : 0.4394
## Detection Rate : 0.1212
## Detection Prevalence : 0.2273
## Balanced Accuracy : 0.5433
##
## 'Positive' Class : 1
##
validationSet$HM_QD_RATIO <- as.numeric(as.character(validationSet$HM_QD_RATIO))
gains_table <- gains(validationSet$HM_QD_RATIO, KNN_Class_prob[,2])
gains_table
## Depth Cume Cume Pct Mean
## of Cume Mean Mean of Total Lift Cume Model
## File N N Resp Resp Resp Index Lift Score
## -------------------------------------------------------------------------
## 9 6 6 0.50 0.50 10.3% 114 114 0.94
## 20 7 13 0.43 0.46 20.7% 98 105 0.70
## 45 17 30 0.53 0.50 51.7% 120 114 0.51
## 59 9 39 0.11 0.41 55.2% 25 93 0.44
## 73 9 48 0.33 0.40 65.5% 76 90 0.35
## 79 4 52 0.75 0.42 75.9% 171 96 0.29
## 100 14 66 0.50 0.44 100.0% 114 100 0.03
## NA NA NA NA NA NA% NA NA NA
## NA NA NA NA NA NA% NA NA NA
## NA NA NA NA NA NA% NA NA NA
##Cumulative lift chart
plot(c(0, gains_table$cume.pct.of.total*sum(validationSet$HM_QD_RATIO))~c(0, gains_table$cume.obs),
xlab = "# of cases",
ylab = "Cumulative",
main="Cumulative Lift Chart",
type="l")
lines(c(0, sum(validationSet$HM_QD_RATIO))~c(0, dim(validationSet)[1]),
col="red",
lty=2)

##Decile-wise lift
barplot(gains_table$mean.resp/mean(validationSet$HM_QD_RATIO),
names.arg=gains_table$depth,
xlab="Percentile",
ylab="Lift",
ylim=c(0,3),
main="Decile-Wise Lift Chart")

##ROC Curve
roc_object <- roc(validationSet$HM_QD_RATIO, KNN_Class_prob[,2])
plot.roc(roc_object)

auc(roc_object)
## Area under the curve: 0.5149
#CLASSIFICATION TREE:___________________________________________________________
TreeData <- DXADATA
#Create categorical outcome w/factor
TreeData$HM_QD_RATIO <- as.factor(TreeData$HM_QD_RATIO)
data <- TreeData
set.seed(1)
myIndex <- createDataPartition(data$HM_QD_RATIO, p=0.8, list=FALSE)
trainSet <- data[myIndex,]
validationSet <- data[-myIndex,]
#Default Tree
set.seed(1)
default_tree <- rpart(HM_QD_RATIO ~.,
data = trainSet,
method = "class")
summary(default_tree)
## Call:
## rpart(formula = HM_QD_RATIO ~ ., data = trainSet, method = "class")
## n= 179
##
## CP nsplit rel error xerror xstd
## 1 0.09493671 0 1.0000000 1.0000000 0.08409302
## 2 0.03797468 2 0.8101266 0.9367089 0.08339824
## 3 0.02531646 3 0.7721519 0.9873418 0.08397584
## 4 0.01000000 5 0.7215190 0.9746835 0.08384783
##
## Variable importance
## TRUNK_PFAT L_LEG_PFAT R_LEG_PFAT TRUNK_FAT LARM_PFAT LARM_LEAN SUBTOT_FAT
## 12 10 10 9 8 8 7
## RARM_PFAT TRUNK_MASS LARM_FAT HEAD_FAT HEAD_LEAN HEAD_MASS LARM_MASS
## 6 5 4 3 3 3 3
## RARM_FAT RARM_MASS HEAD_PFAT L_LEG_LEAN R_LEG_MASS
## 2 2 2 2 1
##
## Node number 1: 179 observations, complexity param=0.09493671
## predicted class=0 expected loss=0.4413408 P(node) =1
## class counts: 100 79
## probabilities: 0.559 0.441
## left son=2 (28 obs) right son=3 (151 obs)
## Primary splits:
## LARM_PFAT < 32.95919 to the right, improve=3.422366, (0 missing)
## R_LEG_PFAT < 33.48621 to the right, improve=2.994498, (0 missing)
## HEAD_MASS < 5095.034 to the left, improve=2.483308, (0 missing)
## HEAD_LEAN < 3858.3 to the left, improve=2.322997, (0 missing)
## HEAD_FAT < 1261.939 to the left, improve=1.963809, (0 missing)
## Surrogate splits:
## R_LEG_PFAT < 33.48621 to the right, agree=0.950, adj=0.679, (0 split)
## TRUNK_PFAT < 37.54768 to the right, agree=0.944, adj=0.643, (0 split)
## RARM_PFAT < 29.87553 to the right, agree=0.927, adj=0.536, (0 split)
## L_LEG_PFAT < 34.85107 to the right, agree=0.922, adj=0.500, (0 split)
## LARM_LEAN < 3358.73 to the left, agree=0.911, adj=0.429, (0 split)
##
## Node number 2: 28 observations
## predicted class=0 expected loss=0.2142857 P(node) =0.1564246
## class counts: 22 6
## probabilities: 0.786 0.214
##
## Node number 3: 151 observations, complexity param=0.09493671
## predicted class=0 expected loss=0.4834437 P(node) =0.8435754
## class counts: 78 73
## probabilities: 0.517 0.483
## left son=6 (76 obs) right son=7 (75 obs)
## Primary splits:
## TRUNK_PFAT < 27.74399 to the left, improve=4.048797, (0 missing)
## HEAD_LEAN < 3833.429 to the left, improve=2.481704, (0 missing)
## HEAD_MASS < 5054.295 to the left, improve=2.481704, (0 missing)
## LARM_LEAN < 4330.712 to the right, improve=2.387011, (0 missing)
## HEAD_PFAT < 24.81534 to the left, improve=2.323573, (0 missing)
## Surrogate splits:
## TRUNK_FAT < 12411.22 to the left, agree=0.934, adj=0.867, (0 split)
## SUBTOT_FAT < 25799.2 to the left, agree=0.848, adj=0.693, (0 split)
## L_LEG_PFAT < 26.6037 to the left, agree=0.841, adj=0.680, (0 split)
## R_LEG_PFAT < 26.30059 to the left, agree=0.821, adj=0.640, (0 split)
## TRUNK_MASS < 45188.76 to the left, agree=0.815, adj=0.627, (0 split)
##
## Node number 6: 76 observations, complexity param=0.02531646
## predicted class=0 expected loss=0.3684211 P(node) =0.424581
## class counts: 48 28
## probabilities: 0.632 0.368
## left son=12 (42 obs) right son=13 (34 obs)
## Primary splits:
## TRUNK_FAT < 11216.93 to the right, improve=1.284387, (0 missing)
## TRUNK_MASS < 43326.17 to the right, improve=1.284387, (0 missing)
## LARM_LEAN < 4233.126 to the right, improve=1.214575, (0 missing)
## TRUNK_LEAN < 32041.47 to the right, improve=1.195171, (0 missing)
## LARM_MASS < 5725.131 to the right, improve=1.070613, (0 missing)
## Surrogate splits:
## LARM_FAT < 1473.992 to the right, agree=0.868, adj=0.706, (0 split)
## L_LEG_PFAT < 23.88228 to the right, agree=0.803, adj=0.559, (0 split)
## SUBTOT_FAT < 21460.09 to the right, agree=0.803, adj=0.559, (0 split)
## R_LEG_MASS < 16013.72 to the right, agree=0.789, adj=0.529, (0 split)
## LARM_PFAT < 20.72244 to the right, agree=0.763, adj=0.471, (0 split)
##
## Node number 7: 75 observations, complexity param=0.03797468
## predicted class=1 expected loss=0.4 P(node) =0.4189944
## class counts: 30 45
## probabilities: 0.400 0.600
## left son=14 (11 obs) right son=15 (64 obs)
## Primary splits:
## LARM_LEAN < 4829.499 to the right, improve=1.4403410, (0 missing)
## LARM_MASS < 7090.864 to the right, improve=1.4403410, (0 missing)
## HEAD_PFAT < 25.38827 to the left, improve=1.0210080, (0 missing)
## RARM_LEAN < 3912.856 to the right, improve=0.9230769, (0 missing)
## TRUNK_FAT < 12020.82 to the right, improve=0.9230769, (0 missing)
## Surrogate splits:
## LARM_MASS < 7090.864 to the right, agree=1.00, adj=1.000, (0 split)
## LARM_FAT < 2270.908 to the right, agree=0.96, adj=0.727, (0 split)
## RARM_FAT < 2332.712 to the right, agree=0.96, adj=0.727, (0 split)
## RARM_MASS < 7592.971 to the right, agree=0.96, adj=0.727, (0 split)
## RARM_PFAT < 31.79057 to the right, agree=0.96, adj=0.727, (0 split)
##
## Node number 12: 42 observations
## predicted class=0 expected loss=0.2857143 P(node) =0.2346369
## class counts: 30 12
## probabilities: 0.714 0.286
##
## Node number 13: 34 observations, complexity param=0.02531646
## predicted class=0 expected loss=0.4705882 P(node) =0.1899441
## class counts: 18 16
## probabilities: 0.529 0.471
## left son=26 (26 obs) right son=27 (8 obs)
## Primary splits:
## HEAD_FAT < 1348.528 to the left, improve=1.6334840, (0 missing)
## HEAD_LEAN < 4168.313 to the left, improve=1.6334840, (0 missing)
## HEAD_MASS < 5516.551 to the left, improve=1.6334840, (0 missing)
## L_LEG_MASS < 14814.73 to the right, improve=1.4126050, (0 missing)
## HEAD_PFAT < 24.36065 to the right, improve=0.9411765, (0 missing)
## Surrogate splits:
## HEAD_LEAN < 4168.313 to the left, agree=1.000, adj=1.000, (0 split)
## HEAD_MASS < 5516.551 to the left, agree=1.000, adj=1.000, (0 split)
## HEAD_PFAT < 24.56043 to the left, agree=0.912, adj=0.625, (0 split)
## LARM_LEAN < 3396.856 to the right, agree=0.912, adj=0.625, (0 split)
## L_LEG_LEAN < 9262.734 to the right, agree=0.912, adj=0.625, (0 split)
##
## Node number 14: 11 observations
## predicted class=0 expected loss=0.3636364 P(node) =0.06145251
## class counts: 7 4
## probabilities: 0.636 0.364
##
## Node number 15: 64 observations
## predicted class=1 expected loss=0.359375 P(node) =0.3575419
## class counts: 23 41
## probabilities: 0.359 0.641
##
## Node number 26: 26 observations
## predicted class=0 expected loss=0.3846154 P(node) =0.1452514
## class counts: 16 10
## probabilities: 0.615 0.385
##
## Node number 27: 8 observations
## predicted class=1 expected loss=0.25 P(node) =0.04469274
## class counts: 2 6
## probabilities: 0.250 0.750
prp(default_tree,
type = 1,
extra = 1,
under = TRUE)

#Full Tree:
set.seed(1)
full_tree <- rpart(HM_QD_RATIO ~ .,
data = trainSet,
method = "class",
cp = 0,
minsplit = 2,
minbucket = 1)
prp(full_tree,
type = 1,
extra = 1,
under = TRUE)

printcp(full_tree)
##
## Classification tree:
## rpart(formula = HM_QD_RATIO ~ ., data = trainSet, method = "class",
## cp = 0, minsplit = 2, minbucket = 1)
##
## Variables actually used in tree construction:
## [1] HEAD_FAT L_LEG_LEAN LARM_LEAN LARM_PFAT TRUNK_FAT TRUNK_PFAT
##
## Root node error: 79/179 = 0.44134
##
## n= 179
##
## CP nsplit rel error xerror xstd
## 1 0.094937 0 1.00000 1.00000 0.084093
## 2 0.037975 2 0.81013 0.93671 0.083398
## 3 0.025316 3 0.77215 1.00000 0.084093
## 4 0.000000 6 0.69620 0.97468 0.083848
pruned_tree <- prune(full_tree, cp = 0.030)
prp(pruned_tree,
type = 1,
extra = 1,
under = TRUE)

predicted_class <- predict(pruned_tree, validationSet, type = "class")
confusionMatrix(predicted_class, validationSet$HM_QD_RATIO, positive = "1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 18 13
## 1 7 6
##
## Accuracy : 0.5455
## 95% CI : (0.3885, 0.6961)
## No Information Rate : 0.5682
## P-Value [Acc > NIR] : 0.6777
##
## Kappa : 0.0372
##
## Mcnemar's Test P-Value : 0.2636
##
## Sensitivity : 0.3158
## Specificity : 0.7200
## Pos Pred Value : 0.4615
## Neg Pred Value : 0.5806
## Prevalence : 0.4318
## Detection Rate : 0.1364
## Detection Prevalence : 0.2955
## Balanced Accuracy : 0.5179
##
## 'Positive' Class : 1
##
length(which(data$HM_QD_RATIO=="1"))
## [1] 98
predicted_prob <- predict(pruned_tree, validationSet, type= 'prob')
head(predicted_prob)
## 0 1
## 3 0.3593750 0.6406250
## 7 0.3593750 0.6406250
## 8 0.6315789 0.3684211
## 9 0.6315789 0.3684211
## 10 0.6315789 0.3684211
## 15 0.3593750 0.6406250
confusionMatrix(as.factor(ifelse(predicted_prob[,2]>0.26, '1', '0')),
validationSet$HM_QD_RATIO, positive = '1')
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 4 2
## 1 21 17
##
## Accuracy : 0.4773
## 95% CI : (0.3246, 0.6331)
## No Information Rate : 0.5682
## P-Value [Acc > NIR] : 0.9140112
##
## Kappa : 0.0489
##
## Mcnemar's Test P-Value : 0.0001746
##
## Sensitivity : 0.8947
## Specificity : 0.1600
## Pos Pred Value : 0.4474
## Neg Pred Value : 0.6667
## Prevalence : 0.4318
## Detection Rate : 0.3864
## Detection Prevalence : 0.8636
## Balanced Accuracy : 0.5274
##
## 'Positive' Class : 1
##
validationSet$HM_QD_RATIO <- as.numeric(as.character(validationSet$HM_QD_RATIO))
gains_table <- gains(validationSet$HM_QD_RATIO, predicted_prob[,2])
gains_table
## Depth Cume Cume Pct Mean
## of Cume Mean Mean of Total Lift Cume Model
## File N N Resp Resp Resp Index Lift Score
## -------------------------------------------------------------------------
## 30 13 13 0.46 0.46 31.6% 107 107 0.64
## 84 24 37 0.42 0.43 84.2% 96 100 0.37
## 86 1 38 1.00 0.45 89.5% 232 104 0.36
## 100 6 44 0.33 0.43 100.0% 77 100 0.21
##LIFT CHART
plot(c(0, gains_table$cume.pct.of.total*sum(validationSet$HM_QD_RATIO)) ~ c(0, gains_table$cume.obs),
xlab = '# of cases',
ylab = "Cumulative",
type = "l")
lines(c(0, sum(validationSet$HM_QD_RATIO))~c(0, dim(validationSet)[1]),
col="red",
lty=2)

##DECILE-WISE LIFT CHART
barplot(gains_table$mean.resp/mean(validationSet$HM_QD_RATIO),
names.arg=gains_table$depth,
xlab="Percentile",
ylab="Lift",
ylim=c(0, 3.0),
main="Decile-Wise Lift Chart")

##RECEIVER OPERATOR CURVE
roc_object <- roc(validationSet$HM_QD_RATIO, predicted_prob[,2])
plot.roc(roc_object)

auc(roc_object)
## Area under the curve: 0.52