\[ X = 0.71 \times {\rm num 415} + 0.71 \times {\rm num857}\]
\[ Y = 0.71 \times {\rm num 415} - 0.71 \times {\rm num857}\]
X <- 0.71*training$num415 + 0.71*training$num857
Y <- 0.71*training$num415 - 0.71*training$num857
plot(X,Y)
smallSpam <- spam[,c(34,32)]
prComp <- prcomp(smallSpam)
plot(prComp$x[,1],prComp$x[,2])
prComp$rotation
PC1 PC2
num415 0.7081 0.7061
num857 0.7061 -0.7081
typeColor <- ((spam$type=="spam")*1 + 1)
prComp <- prcomp(log10(spam[,-58]+1))
plot(prComp$x[,1],prComp$x[,2],col=typeColor,xlab="PC1",ylab="PC2")
preProc <- preProcess(log10(spam[,-58]+1),method="pca",pcaComp=2)
spamPC <- predict(preProc,log10(spam[,-58]+1))
plot(spamPC[,1],spamPC[,2],col=typeColor)
preProc <- preProcess(log10(training[,-58]+1),method="pca",pcaComp=2)
trainPC <- predict(preProc,log10(training[,-58]+1))
modelFit <- train(training$type ~ .,method="glm",data=trainPC)
testPC <- predict(preProc,log10(testing[,-58]+1))
confusionMatrix(testing$type,predict(modelFit,testPC))
Confusion Matrix and Statistics
Reference
Prediction nonspam spam
nonspam 646 51
spam 64 389
Accuracy : 0.9
95% CI : (0.881, 0.917)
No Information Rate : 0.617
P-Value [Acc > NIR] : <2e-16
Kappa : 0.79
Mcnemar's Test P-Value : 0.263
Sensitivity : 0.910
Specificity : 0.884
Pos Pred Value : 0.927
Neg Pred Value : 0.859
Prevalence : 0.617
Detection Rate : 0.562
Detection Prevalence : 0.606
'Positive' Class : nonspam
modelFit <- train(training$type ~ .,method="glm",preProcess="pca",data=training)
confusionMatrix(testing$type,predict(modelFit,testing))
Confusion Matrix and Statistics
Reference
Prediction nonspam spam
nonspam 660 37
spam 54 399
Accuracy : 0.921
95% CI : (0.904, 0.936)
No Information Rate : 0.621
P-Value [Acc > NIR] : <2e-16
Kappa : 0.833
Mcnemar's Test P-Value : 0.0935
Sensitivity : 0.924
Specificity : 0.915
Pos Pred Value : 0.947
Neg Pred Value : 0.881
Prevalence : 0.621
Detection Rate : 0.574
Detection Prevalence : 0.606
'Positive' Class : nonspam