library(AppliedPredictiveModeling)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(Hmisc)
## Loading required package: survival
##
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
##
## cluster
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
load data
data(AlzheimerDisease)
data(concrete)
v1 <- cut2(concrete$Cement, g = 10)
v2 <- cut2(concrete$BlastFurnaceSlag, g = 5)
v3 <- cut2(concrete$FlyAsh, g = 5)
v4 <- cut2(concrete$Water, g = 5)
v5 <- cut2(concrete$Superplasticizer, g = 5)
v6 <- cut2(concrete$Superplasticizer, g = 5)
v7 <- cut2(concrete$CoarseAggregate, g = 5)
v8 <- cut2(concrete$FineAggregate, g = 5)
v9 <- cut2(concrete$Age, g = 5)
ggplot(concrete, aes(x = seq(1:nrow(concrete)),y = CompressiveStrength, col = v9 )) + geom_point()
ggplot(concrete, aes(x = seq(1:nrow(concrete)),y = CompressiveStrength, col = v8 )) + geom_point()
ggplot(concrete, aes(x = seq(1:nrow(concrete)),y = CompressiveStrength, col = v7 )) + geom_point()
ggplot(concrete, aes(x = seq(1:nrow(concrete)),y = CompressiveStrength, col = v6 )) + geom_point()
ggplot(concrete, aes(x = seq(1:nrow(concrete)),y = CompressiveStrength, col = v5 )) + geom_point()
ggplot(concrete, aes(x = seq(1:nrow(concrete)),y = CompressiveStrength, col = v4 )) + geom_point()
ggplot(concrete, aes(x = seq(1:nrow(concrete)),y = CompressiveStrength, col = v3 )) + geom_point()
ggplot(concrete, aes(x = seq(1:nrow(concrete)),y = CompressiveStrength, col = v2 )) + geom_point()
ggplot(concrete, aes(x = seq(1:nrow(concrete)),y = CompressiveStrength, col = v1 )) + geom_point()
set.seed(3433)
adData = data.frame(diagnosis,predictors)
inTrain = createDataPartition(adData$diagnosis, p = 3/4)[[1]]
training = adData[ inTrain,]
testing = adData[-inTrain,]
IL <- grep("^IL.*", names(adData), value = T)
trainingIL = training[, c("diagnosis", IL)]
testingIL = testing[, c("diagnosis", IL)]
## Including all the predictors
model1 <- train(diagnosis ~., method = "glm", data = trainingIL)
confusionMatrix(testing$diagnosis, predict(model1, testing))
## Confusion Matrix and Statistics
##
## Reference
## Prediction Impaired Control
## Impaired 2 20
## Control 9 51
##
## Accuracy : 0.6463
## 95% CI : (0.533, 0.7488)
## No Information Rate : 0.8659
## P-Value [Acc > NIR] : 1.00000
##
## Kappa : -0.0702
## Mcnemar's Test P-Value : 0.06332
##
## Sensitivity : 0.18182
## Specificity : 0.71831
## Pos Pred Value : 0.09091
## Neg Pred Value : 0.85000
## Prevalence : 0.13415
## Detection Rate : 0.02439
## Detection Prevalence : 0.26829
## Balanced Accuracy : 0.45006
##
## 'Positive' Class : Impaired
##
# Including PCA processed predictors
preProc <- preProcess(trainingIL[,-1], method = "pca", thresh = .8)
trainPC <- predict(preProc, trainingIL[,-1])
modelfit <- train(x = trainPC, y = trainingIL$diagnosis, method = "glm")
testPC <- predict(preProc, testingIL)
confusionMatrix(testing$diagnosis, predict(modelfit, testPC))
## Confusion Matrix and Statistics
##
## Reference
## Prediction Impaired Control
## Impaired 3 19
## Control 4 56
##
## Accuracy : 0.7195
## 95% CI : (0.6094, 0.8132)
## No Information Rate : 0.9146
## P-Value [Acc > NIR] : 1.000000
##
## Kappa : 0.0889
## Mcnemar's Test P-Value : 0.003509
##
## Sensitivity : 0.42857
## Specificity : 0.74667
## Pos Pred Value : 0.13636
## Neg Pred Value : 0.93333
## Prevalence : 0.08537
## Detection Rate : 0.03659
## Detection Prevalence : 0.26829
## Balanced Accuracy : 0.58762
##
## 'Positive' Class : Impaired
##
#alternative way
model2 <- train(diagnosis ~ ., method = "glm", data = trainingIL,
preProcess = "pca",
trControl = trainControl(preProcOptions = list(thresh = 0.8)))
confusionMatrix(testing$diagnosis, predict(model2, testing))
## Confusion Matrix and Statistics
##
## Reference
## Prediction Impaired Control
## Impaired 3 19
## Control 4 56
##
## Accuracy : 0.7195
## 95% CI : (0.6094, 0.8132)
## No Information Rate : 0.9146
## P-Value [Acc > NIR] : 1.000000
##
## Kappa : 0.0889
## Mcnemar's Test P-Value : 0.003509
##
## Sensitivity : 0.42857
## Specificity : 0.74667
## Pos Pred Value : 0.13636
## Neg Pred Value : 0.93333
## Prevalence : 0.08537
## Detection Rate : 0.03659
## Detection Prevalence : 0.26829
## Balanced Accuracy : 0.58762
##
## 'Positive' Class : Impaired
##