# AZHAR KUDAIBERGENOVA - DATAMINING PROJECT 3
library(e1071)
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:randomForest':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
admissions <- read.csv("binary.csv")
admissions <- select(admissions, -5)
admissions$admit <- as.factor(admissions$admit)
set.seed(123)
train_idx <- sample(nrow(admissions), 0.7 * nrow(admissions))
train <- admissions[train_idx,]
test <- admissions[-train_idx,]
rf_model <- randomForest(admit ~ ., data = train, ntree = 500, importance = TRUE)
svm_model <- svm(admit ~ ., data = train)
rf_pred <- predict(rf_model, newdata = test)
rf_conf_matrix <- table(rf_pred, test$admit)
rf_accuracy <- sum(diag(rf_conf_matrix)) / sum(rf_conf_matrix)
svm_pred <- predict(svm_model, newdata = test)
svm_conf_matrix <- table(svm_pred, test$admit)
svm_accuracy <- sum(diag(svm_conf_matrix)) / sum(svm_conf_matrix)
ensemble_pred <- as.numeric(predict(rf_model, newdata = test)) + as.numeric(predict(svm_model, newdata = test))
ensemble_pred[ensemble_pred <= 1] <- 0
ensemble_pred[ensemble_pred > 1] <- 1
ensemble_conf_matrix <- table(ensemble_pred, test$admit)
ensemble_accuracy <- sum(diag(ensemble_conf_matrix)) / sum(ensemble_conf_matrix)
print("Random Forest Confusion Matrix:")
## [1] "Random Forest Confusion Matrix:"
print(rf_conf_matrix)
##
## rf_pred 0 1
## 0 79 22
## 1 10 9
print(paste0("Random Forest Accuracy: ", rf_accuracy))
## [1] "Random Forest Accuracy: 0.733333333333333"
print("SVM Confusion Matrix:")
## [1] "SVM Confusion Matrix:"
print(svm_conf_matrix)
##
## svm_pred 0 1
## 0 80 25
## 1 9 6
print(paste0("SVM Accuracy: ", svm_accuracy))
## [1] "SVM Accuracy: 0.716666666666667"
print("Ensemble Confusion Matrix:")
## [1] "Ensemble Confusion Matrix:"
print(ensemble_conf_matrix)
##
## ensemble_pred 0 1
## 1 89 31
print(paste0("Ensemble Accuracy: ", ensemble_accuracy))
## [1] "Ensemble Accuracy: 0.741666666666667"
#Based on the results it seems that the ensemble model improved
# over the two individual models. The ensemble model has an accuracy of 0.7417,
# which is higher than the accuracies of the random forest model (0.7333)
# and SVM model (0.7167).