library(caret);
## Loading required package: lattice
## Loading required package: ggplot2
library(kernlab)
##
## Attaching package: 'kernlab'
## The following object is masked from 'package:ggplot2':
##
## alpha
library(AppliedPredictiveModeling)
data(AlzheimerDisease)
adData = data.frame(diagnosis,predictors)
trainIndex = createDataPartition(diagnosis, p = 0.5,list=FALSE)
training = adData[trainIndex,]
testing = adData[trainIndex,]
Answer: The following commands was marked as the correct answer
adData = data.frame(diagnosis,predictors)
testIndex = createDataPartition(diagnosis, p = 0.50, list=FALSE)
training = adData[-testIndex,]
testing = adData[testIndex,]
adData = data.frame(predictors)
trainIndex = createDataPartition(diagnosis,p=0.5,list=FALSE)
training = adData[-trainIndex,]
testing = adData[-trainIndex,]
adData = data.frame(diagnosis,predictors)
train = createDataPartition(diagnosis, p = 0.50,list=FALSE)
test = createDataPartition(diagnosis, p = 0.50,list=FALSE)
library(AppliedPredictiveModeling)
data(concrete)
library(caret)
set.seed(1000)
inTrain = createDataPartition(mixtures$CompressiveStrength, p = 3/4)[[1]]
training = mixtures[ inTrain,]
testing = mixtures[-inTrain,]
library(ISLR);
library(ggplot2);
library(Hmisc);
## Loading required package: survival
##
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
##
## cluster
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
cols<-colnames(training)
a<--length(cols)
subCols<-cols[-length(cols)] #eliminates the CompressiveStrength column
plotCols=4 #creates a 4 columns graphic
par(mfrow=c(ceil(length(subCols)/plotCols),plotCols))
res<-sapply(subCols, function(colName){
cut<-cut2(training[,colName])
lab<- paste0("index: col=", colName)
plot(training$CompressiveStrength, pch=19, col=cut, xlab=lab, ylab="CompressiveStrength")
})
There is a non-random pattern in the plot of the outcome versus index that is perfectly explained by the FlyAsh variable so there may be a variable missing.
There is a non-random pattern in the plot of the outcome versus index.
There is a non-random pattern in the plot of the outcome versus index that is perfectly explained by the Age variable so there may be a variable missing.
library(AppliedPredictiveModeling)
data(concrete)
library(caret)
set.seed(1000)
inTrain = createDataPartition(mixtures$CompressiveStrength, p = 3/4)[[1]]
training = mixtures[ inTrain,]
testing = mixtures[-inTrain,]
par(mfrow=c(1,2))
hist(training$Superplasticizer, breaks=50, xlab="Superplasticizer", ylab="Occorencies", main=paste("Histogram of values of\nSuperplasticizer"))
hist(log(training$Superplasticizer+1), breaks=50, xlab="log(Superplasticizer+1)", ylab="Occorencies", main=paste("Histogram of values of\nlog(Superplasticizer+1)"))
library(caret)
library(AppliedPredictiveModeling)
set.seed(3433)
data(AlzheimerDisease)
adData = data.frame(diagnosis,predictors)
inTrain = createDataPartition(adData$diagnosis, p = 3/4)[[1]]
training = adData[ inTrain,]
testing = adData[-inTrain,]
PredNames<-colnames(training)
OnlyPredIL<-PredNames[substr(PredNames,1,2)=="IL"]
PropPCA<-preProcess(training[,OnlyPredIL],method="pca", thresh=.90)
PropPCA
## Created from 251 samples and 12 variables
##
## Pre-processing:
## - centered (12)
## - ignored (0)
## - principal component signal extraction (12)
## - scaled (12)
##
## PCA needed 9 components to capture 90 percent of the variance
library(AppliedPredictiveModeling);
library(caret);
data(AlzheimerDisease)
set.seed(3433)
adData=data.frame(diagnosis,predictors)
inTrain<-createDataPartition(adData$diagnosis, p = 3/4)[[1]]
training<-adData[inTrain,]
testing<-adData[-inTrain,]
PredNames<-colnames(training)
NamesIL<-PredNames[substr(PredNames,1,2)=="IL"]
#Model using ALL the predictors beggining with IL
trainingIL<-training[,c(NamesIL, "diagnosis")]
testingIL<-testing[,c(NamesIL, "diagnosis")]
AllPredIL<-train(diagnosis ~ ., data=trainingIL, method="glm")
confusionMatrix(testingIL$diagnosis, predict(AllPredIL, testingIL))
## Confusion Matrix and Statistics
##
## Reference
## Prediction Impaired Control
## Impaired 2 20
## Control 9 51
##
## Accuracy : 0.6463
## 95% CI : (0.533, 0.7488)
## No Information Rate : 0.8659
## P-Value [Acc > NIR] : 1.00000
##
## Kappa : -0.0702
## Mcnemar's Test P-Value : 0.06332
##
## Sensitivity : 0.18182
## Specificity : 0.71831
## Pos Pred Value : 0.09091
## Neg Pred Value : 0.85000
## Prevalence : 0.13415
## Detection Rate : 0.02439
## Detection Prevalence : 0.26829
## Balanced Accuracy : 0.45006
##
## 'Positive' Class : Impaired
##
#Model usando Principal Componentes threshold 80%
preProcPCA<-preProcess(trainingIL, method="pca", thresh=.8)
trainPC<-predict(preProcPCA, trainingIL)
ModelPCA<-train(diagnosis ~ ., method="glm", data=trainPC)
testPC<-predict(preProcPCA, testing[, NamesIL])
confusionMatrix(testingIL$diagnosis, predict(ModelPCA, testPC))
## Confusion Matrix and Statistics
##
## Reference
## Prediction Impaired Control
## Impaired 3 19
## Control 4 56
##
## Accuracy : 0.7195
## 95% CI : (0.6094, 0.8132)
## No Information Rate : 0.9146
## P-Value [Acc > NIR] : 1.000000
##
## Kappa : 0.0889
## Mcnemar's Test P-Value : 0.003509
##
## Sensitivity : 0.42857
## Specificity : 0.74667
## Pos Pred Value : 0.13636
## Neg Pred Value : 0.93333
## Prevalence : 0.08537
## Detection Rate : 0.03659
## Detection Prevalence : 0.26829
## Balanced Accuracy : 0.58762
##
## 'Positive' Class : Impaired
##