iono <- read.csv(file = "/Users/alex/Dropbox/College/4-Senior/Machine Learning/Project5/ionospheredata.csv", header = FALSE, sep = ",")
cols = nearZeroVar(iono)
iono <- iono[-cols]
iono$V1 <- as.numeric(as.character(iono$V1))
set.seed("12345")
dp <- createDataPartition(iono$V35, p=0.7, list=FALSE)
training <- iono[dp,]
testing <- iono[-dp,]
Read in data, removed any columns with near zero variance, set the first variable to numeric, set the seed, and created training and testing partitions.
models1 <- caretList(V35~., data=training,
trControl=trainControl(method="cv", number=10, savePredictions=TRUE, classProbs=TRUE), methodList=c('knn', 'lda', 'rpart'))
results <- resamples(models1)
summary(results)
##
## Call:
## summary.resamples(object = results)
##
## Models: knn, lda, rpart
## Number of resamples: 10
##
## Accuracy
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## knn 0.7600000 0.8100000 0.8575000 0.8501667 0.87875 0.92 0
## lda 0.7600000 0.8350000 0.8750000 0.8703333 0.91000 1.00 0
## rpart 0.7083333 0.8891667 0.9183333 0.8941667 0.92000 0.96 0
##
## Kappa
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## knn 0.4230769 0.5467033 0.6525199 0.6455658 0.7283935 0.8175182 0
## lda 0.3902439 0.6111632 0.7019704 0.6890054 0.7929140 1.0000000 0
## rpart 0.3913043 0.7624470 0.8157359 0.7747473 0.8324250 0.9110320 0
Accuracies of the three models used in models1 are all relatively similar.
modelCor(results)
## knn lda rpart
## knn 1.0000000 0.7927249 0.5740615
## lda 0.7927249 1.0000000 0.2232201
## rpart 0.5740615 0.2232201 1.0000000
models1 <- caretList(V35~., data=training,
trControl=trainControl(method="cv", number=10, savePredictions=TRUE, classProbs=TRUE), methodList=c('knn', 'rpart'))
Model correlations show that LDA and kNN models are highly correlated. Reran models1 to exclude the LDA model.
stack1 <- caretStack(models1, method="glm", metric="Accuracy", trControl=trainControl(method="cv", number=10, savePredictions=TRUE, classProbs=TRUE))
kNN1 <- train(V35~., data = training, method = "knn", trControl = trainControl(method = 'cv', number = 10))
CART1 <- train(V35~., data = training, method = "rpart", trControl = trainControl(method = 'cv', number = 10))
confusionMatrix(training$V35, predict(stack1, training))
## Confusion Matrix and Statistics
##
## Reference
## Prediction b g
## b 26 63
## g 157 1
##
## Accuracy : 0.1093
## 95% CI : (0.0733, 0.155)
## No Information Rate : 0.7409
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.5701
## Mcnemar's Test P-Value : 3.609e-10
##
## Sensitivity : 0.142077
## Specificity : 0.015625
## Pos Pred Value : 0.292135
## Neg Pred Value : 0.006329
## Prevalence : 0.740891
## Detection Rate : 0.105263
## Detection Prevalence : 0.360324
## Balanced Accuracy : 0.078851
##
## 'Positive' Class : b
##
confusionMatrix(training$V35, predict(kNN1, training))
## Confusion Matrix and Statistics
##
## Reference
## Prediction b g
## b 64 25
## g 3 155
##
## Accuracy : 0.8866
## 95% CI : (0.8403, 0.9233)
## No Information Rate : 0.7287
## P-Value [Acc > NIR] : 1.088e-09
##
## Kappa : 0.7401
## Mcnemar's Test P-Value : 7.229e-05
##
## Sensitivity : 0.9552
## Specificity : 0.8611
## Pos Pred Value : 0.7191
## Neg Pred Value : 0.9810
## Prevalence : 0.2713
## Detection Rate : 0.2591
## Detection Prevalence : 0.3603
## Balanced Accuracy : 0.9082
##
## 'Positive' Class : b
##
confusionMatrix(training$V35, predict(CART1, training))
## Confusion Matrix and Statistics
##
## Reference
## Prediction b g
## b 80 9
## g 11 147
##
## Accuracy : 0.919
## 95% CI : (0.8777, 0.9498)
## No Information Rate : 0.6316
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.8252
## Mcnemar's Test P-Value : 0.8231
##
## Sensitivity : 0.8791
## Specificity : 0.9423
## Pos Pred Value : 0.8989
## Neg Pred Value : 0.9304
## Prevalence : 0.3684
## Detection Rate : 0.3239
## Detection Prevalence : 0.3603
## Balanced Accuracy : 0.9107
##
## 'Positive' Class : b
##
The accuracy of the stacked model (stack1) is very low at 10.93% while the kNN1 and the CART1 both have high accuracies at 88.66% and 91.9% respectively.
predicted.stack1 <- predict(stack1, newdata = testing)
confusionMatrix(predicted.stack1, testing$V35)
## Confusion Matrix and Statistics
##
## Reference
## Prediction b g
## b 14 67
## g 23 0
##
## Accuracy : 0.1346
## 95% CI : (0.0756, 0.2155)
## No Information Rate : 0.6442
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.4909
## Mcnemar's Test P-Value : 5.826e-06
##
## Sensitivity : 0.3784
## Specificity : 0.0000
## Pos Pred Value : 0.1728
## Neg Pred Value : 0.0000
## Prevalence : 0.3558
## Detection Rate : 0.1346
## Detection Prevalence : 0.7788
## Balanced Accuracy : 0.1892
##
## 'Positive' Class : b
##
predicted.kNN1 <- predict(kNN1, newdata = testing)
confusionMatrix(predicted.kNN1, testing$V35)
## Confusion Matrix and Statistics
##
## Reference
## Prediction b g
## b 20 2
## g 17 65
##
## Accuracy : 0.8173
## 95% CI : (0.7295, 0.8863)
## No Information Rate : 0.6442
## P-Value [Acc > NIR] : 8.509e-05
##
## Kappa : 0.5617
## Mcnemar's Test P-Value : 0.001319
##
## Sensitivity : 0.5405
## Specificity : 0.9701
## Pos Pred Value : 0.9091
## Neg Pred Value : 0.7927
## Prevalence : 0.3558
## Detection Rate : 0.1923
## Detection Prevalence : 0.2115
## Balanced Accuracy : 0.7553
##
## 'Positive' Class : b
##
predicted.CART1 <- predict(CART1, newdata = testing)
confusionMatrix(predicted.CART1, testing$V35)
## Confusion Matrix and Statistics
##
## Reference
## Prediction b g
## b 30 5
## g 7 62
##
## Accuracy : 0.8846
## 95% CI : (0.8071, 0.9389)
## No Information Rate : 0.6442
## P-Value [Acc > NIR] : 2.487e-08
##
## Kappa : 0.7452
## Mcnemar's Test P-Value : 0.7728
##
## Sensitivity : 0.8108
## Specificity : 0.9254
## Pos Pred Value : 0.8571
## Neg Pred Value : 0.8986
## Prevalence : 0.3558
## Detection Rate : 0.2885
## Detection Prevalence : 0.3365
## Balanced Accuracy : 0.8681
##
## 'Positive' Class : b
##
When the stack1, kNN1, and CART1 models are applied to the testing data we see similar results with stack1’s accuracy being the lowest at 13.46%, while kNN1 and CART1’s accuracies dropped slightly to 81.73% and 88.46% respectively.
data("Khan")
training <- data.frame(Khan$xtrain)
training$response <- as.factor(Khan$ytrain)
testing <- data.frame(Khan$xtest)
testing$response <- as.factor(Khan$ytest)
set.seed(12345)
Read in the data from ISLR package, added the response variables to the training and testing partitions and set the seed.
CART2 <- train(response~., data=training, method="rpart", trControl=trainControl(method="cv", number=10))
RF2 <- train(response~., data=training, method="rf", trControl=trainControl(method="cv", number=10))
GBM2 <- train(response~., data=training, method="gbm", trControl=trainControl(method="cv", number=10))
SVM2 <- train(response~., data=training, method="svmLinear", trControl=trainControl(method="cv", number=10))
Built four different models: CART, random forest, gradient boosting machine, and SVM.
confusionMatrix(training$response, predict(CART2, training))
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4
## 1 0 0 7 1
## 2 0 22 1 0
## 3 0 0 12 0
## 4 0 0 0 20
##
## Overall Statistics
##
## Accuracy : 0.8571
## 95% CI : (0.7461, 0.9325)
## No Information Rate : 0.3492
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7977
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4
## Sensitivity NA 1.0000 0.6000 0.9524
## Specificity 0.873 0.9756 1.0000 1.0000
## Pos Pred Value NA 0.9565 1.0000 1.0000
## Neg Pred Value NA 1.0000 0.8431 0.9767
## Prevalence 0.000 0.3492 0.3175 0.3333
## Detection Rate 0.000 0.3492 0.1905 0.3175
## Detection Prevalence 0.127 0.3651 0.1905 0.3175
## Balanced Accuracy NA 0.9878 0.8000 0.9762
confusionMatrix(training$response, predict(RF2, training))
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4
## 1 8 0 0 0
## 2 0 23 0 0
## 3 0 0 12 0
## 4 0 0 0 20
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.9431, 1)
## No Information Rate : 0.3651
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4
## Sensitivity 1.000 1.0000 1.0000 1.0000
## Specificity 1.000 1.0000 1.0000 1.0000
## Pos Pred Value 1.000 1.0000 1.0000 1.0000
## Neg Pred Value 1.000 1.0000 1.0000 1.0000
## Prevalence 0.127 0.3651 0.1905 0.3175
## Detection Rate 0.127 0.3651 0.1905 0.3175
## Detection Prevalence 0.127 0.3651 0.1905 0.3175
## Balanced Accuracy 1.000 1.0000 1.0000 1.0000
confusionMatrix(training$response, predict(GBM2, training))
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4
## 1 8 0 0 0
## 2 0 23 0 0
## 3 0 0 12 0
## 4 0 0 0 20
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.9431, 1)
## No Information Rate : 0.3651
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4
## Sensitivity 1.000 1.0000 1.0000 1.0000
## Specificity 1.000 1.0000 1.0000 1.0000
## Pos Pred Value 1.000 1.0000 1.0000 1.0000
## Neg Pred Value 1.000 1.0000 1.0000 1.0000
## Prevalence 0.127 0.3651 0.1905 0.3175
## Detection Rate 0.127 0.3651 0.1905 0.3175
## Detection Prevalence 0.127 0.3651 0.1905 0.3175
## Balanced Accuracy 1.000 1.0000 1.0000 1.0000
confusionMatrix(training$response, predict(SVM2, training))
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4
## 1 8 0 0 0
## 2 0 23 0 0
## 3 0 0 12 0
## 4 0 0 0 20
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.9431, 1)
## No Information Rate : 0.3651
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4
## Sensitivity 1.000 1.0000 1.0000 1.0000
## Specificity 1.000 1.0000 1.0000 1.0000
## Pos Pred Value 1.000 1.0000 1.0000 1.0000
## Neg Pred Value 1.000 1.0000 1.0000 1.0000
## Prevalence 0.127 0.3651 0.1905 0.3175
## Detection Rate 0.127 0.3651 0.1905 0.3175
## Detection Prevalence 0.127 0.3651 0.1905 0.3175
## Balanced Accuracy 1.000 1.0000 1.0000 1.0000
The CART2 model is the only one of the four models that did not have an accuracy of 100% when applied to the training data. CART2 had an accuracy of 85.71%.
predicted.CART2 <- predict(CART2, newdata = testing)
confusionMatrix(predicted.CART2, testing$response)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4
## 1 0 0 0 0
## 2 0 4 0 1
## 3 3 1 5 1
## 4 0 1 1 3
##
## Overall Statistics
##
## Accuracy : 0.6
## 95% CI : (0.3605, 0.8088)
## No Information Rate : 0.3
## P-Value [Acc > NIR] : 0.005138
##
## Kappa : 0.4386
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4
## Sensitivity 0.00 0.6667 0.8333 0.6000
## Specificity 1.00 0.9286 0.6429 0.8667
## Pos Pred Value NaN 0.8000 0.5000 0.6000
## Neg Pred Value 0.85 0.8667 0.9000 0.8667
## Prevalence 0.15 0.3000 0.3000 0.2500
## Detection Rate 0.00 0.2000 0.2500 0.1500
## Detection Prevalence 0.00 0.2500 0.5000 0.2500
## Balanced Accuracy 0.50 0.7976 0.7381 0.7333
predicted.RF2 <- predict(RF2, newdata = testing)
confusionMatrix(predicted.RF2, testing$response)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4
## 1 3 0 0 0
## 2 0 6 1 0
## 3 0 0 5 0
## 4 0 0 0 5
##
## Overall Statistics
##
## Accuracy : 0.95
## 95% CI : (0.7513, 0.9987)
## No Information Rate : 0.3
## P-Value [Acc > NIR] : 1.662e-09
##
## Kappa : 0.932
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4
## Sensitivity 1.00 1.0000 0.8333 1.00
## Specificity 1.00 0.9286 1.0000 1.00
## Pos Pred Value 1.00 0.8571 1.0000 1.00
## Neg Pred Value 1.00 1.0000 0.9333 1.00
## Prevalence 0.15 0.3000 0.3000 0.25
## Detection Rate 0.15 0.3000 0.2500 0.25
## Detection Prevalence 0.15 0.3500 0.2500 0.25
## Balanced Accuracy 1.00 0.9643 0.9167 1.00
predicted.GBM2 <- predict(GBM2, newdata = testing)
confusionMatrix(predicted.GBM2, testing$response)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4
## 1 3 0 0 0
## 2 0 6 0 0
## 3 0 0 6 0
## 4 0 0 0 5
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8316, 1)
## No Information Rate : 0.3
## P-Value [Acc > NIR] : 3.487e-11
##
## Kappa : 1
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4
## Sensitivity 1.00 1.0 1.0 1.00
## Specificity 1.00 1.0 1.0 1.00
## Pos Pred Value 1.00 1.0 1.0 1.00
## Neg Pred Value 1.00 1.0 1.0 1.00
## Prevalence 0.15 0.3 0.3 0.25
## Detection Rate 0.15 0.3 0.3 0.25
## Detection Prevalence 0.15 0.3 0.3 0.25
## Balanced Accuracy 1.00 1.0 1.0 1.00
predicted.SVM2 <- predict(SVM2, newdata = testing)
confusionMatrix(predicted.SVM2, testing$response)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4
## 1 3 0 0 0
## 2 0 6 2 0
## 3 0 0 4 0
## 4 0 0 0 5
##
## Overall Statistics
##
## Accuracy : 0.9
## 95% CI : (0.683, 0.9877)
## No Information Rate : 0.3
## P-Value [Acc > NIR] : 3.773e-08
##
## Kappa : 0.8639
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4
## Sensitivity 1.00 1.0000 0.6667 1.00
## Specificity 1.00 0.8571 1.0000 1.00
## Pos Pred Value 1.00 0.7500 1.0000 1.00
## Neg Pred Value 1.00 1.0000 0.8750 1.00
## Prevalence 0.15 0.3000 0.3000 0.25
## Detection Rate 0.15 0.3000 0.2000 0.25
## Detection Prevalence 0.15 0.4000 0.2000 0.25
## Balanced Accuracy 1.00 0.9286 0.8333 1.00
The CART2 model once again had the lowest accuracy of the four when applied to the testing data. CART2’s accuracy was 60% while RF2’s was 95%, GBM2’s was 100%, and SVM2’s was 90%.
normalize <- function(x) { (x - min(x)) / (max(x) - min(x)) }
energy <- read.csv(file = "/Users/alex/Dropbox/College/4-Senior/Machine Learning/Project5/ENB2012_data.csv", header = TRUE, sep = ",")
energy <- as.data.frame(lapply(energy, normalize))
set.seed("12345")
dp <- createDataPartition(energy$Y1, p = 0.7, list = FALSE)
training <- energy[dp,]
testing <- energy[-dp,]
Created a normalize function, read in the data and normalized it. Set the seed and created training and testing partitions.
NN3b <- train(Y1 ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8, data = training, method = "nnet", trace = FALSE)
Created a neural network model using Y1 as the response and X1 through X8 as the predictors.
cor(predict(NN3b, testing), testing$Y1)^2
## [,1]
## [1,] 0.9932561
Calculated R2 for NN3b on the testing data, as seen above it is 0.9932561.
NN3d <- neuralnet(Y1 + Y2 ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8, data = training, hidden = 1)
plot(NN3d, rep = "best")
cor(compute(NN3d, testing[, 1:8])$net.result[ , 1], testing$Y1)^2
## [1] 0.9076677614
cor(compute(NN3d, testing[, 1:8])$net.result[ , 2], testing$Y2)^2
## [1] 0.8716707806
R2 for Y1 on NN3d with the testing data was 0.9076678
R2 for Y2 on NN3d with the testing data was 0.8716708
NN3e <- neuralnet(Y1 + Y2 ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8, data = training, hidden = c(2, 1), stepmax = 225000)
plot(NN3e, rep = "best")
cor(compute(NN3e, testing[, 1:8])$net.result[ , 1], testing$Y1)^2
## [1] 0.9759201642
cor(compute(NN3e, testing[, 1:8])$net.result[ , 2], testing$Y2)^2
## [1] 0.9429601469
R2 for Y1 on NN3e with the testing data was 0.9759202
R2 for Y2 on NN3e with the testing data was 0.9429601
data("Boston")
set.seed("12345")
Read in data and set the seed.
NN4b <- neuralnet(medv ~ lstat, data = Boston)
xList <- seq(0,40,.2)
predicted.NN4b <- compute(NN4b, xList)
plot(Boston$lstat, Boston$medv, pch = 20)
lines(xList, predicted.NN4b$net.result, col = "green")
The fit of the line is rather poor and does not “fit” the data really at all.
normalBoston <- data.frame(lapply(Boston, normalize))
Normalized the data.
NN4d <- neuralnet(medv ~ lstat, data = normalBoston)
xList<-seq(0,1,.02)
predicted.NN4d <- compute(NN4d,xList)
plot(normalBoston$lstat, normalBoston$medv, pch = 20)
lines(xList, predicted.NN4d$net.result, col = "red")
This curve for NN4d fits the data much better, taking a similar shape as the data shown in the graph.
plot(NN4d, rep = "best")
\(y=2.70587−2.53167S(0.99799+6.0366x)\)
y = medv
x = lstat
S = activation function
NN4f <- neuralnet(medv ~ lstat, data = normalBoston, hidden = c(2,2))
xList<-seq(0,1,.02)
predicted.NN4f <- compute(NN4f,xList)
plot(normalBoston$lstat, normalBoston$medv, pch = 20)
lines(xList, predicted.NN4f$net.result, col = "blue")
The fit of the line for NN4f appears rather similar to that of the one for NN4d. Not necessarily better or worse but a bit different, notably the curl to the left as medv appraches 1.