# AZHAR KUDAIBERGENOVA - DATAMINING PROJECT 3

library(e1071)
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:randomForest':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
admissions <- read.csv("binary.csv")
admissions <- select(admissions, -5)

admissions$admit <- as.factor(admissions$admit)

set.seed(123)
train_idx <- sample(nrow(admissions), 0.7 * nrow(admissions))
train <- admissions[train_idx,]
test <- admissions[-train_idx,]

rf_model <- randomForest(admit ~ ., data = train, ntree = 500, importance = TRUE)
svm_model <- svm(admit ~ ., data = train)


rf_pred <- predict(rf_model, newdata = test)
rf_conf_matrix <- table(rf_pred, test$admit)
rf_accuracy <- sum(diag(rf_conf_matrix)) / sum(rf_conf_matrix)


svm_pred <- predict(svm_model, newdata = test)
svm_conf_matrix <- table(svm_pred, test$admit)
svm_accuracy <- sum(diag(svm_conf_matrix)) / sum(svm_conf_matrix)


ensemble_pred <- as.numeric(predict(rf_model, newdata = test)) + as.numeric(predict(svm_model, newdata = test))
ensemble_pred[ensemble_pred <= 1] <- 0
ensemble_pred[ensemble_pred > 1] <- 1
ensemble_conf_matrix <- table(ensemble_pred, test$admit)
ensemble_accuracy <- sum(diag(ensemble_conf_matrix)) / sum(ensemble_conf_matrix)  

print("Random Forest Confusion Matrix:")
## [1] "Random Forest Confusion Matrix:"
print(rf_conf_matrix)
##        
## rf_pred  0  1
##       0 79 22
##       1 10  9
print(paste0("Random Forest Accuracy: ", rf_accuracy))
## [1] "Random Forest Accuracy: 0.733333333333333"
print("SVM Confusion Matrix:")
## [1] "SVM Confusion Matrix:"
print(svm_conf_matrix)
##         
## svm_pred  0  1
##        0 80 25
##        1  9  6
print(paste0("SVM Accuracy: ", svm_accuracy))
## [1] "SVM Accuracy: 0.716666666666667"
print("Ensemble Confusion Matrix:")
## [1] "Ensemble Confusion Matrix:"
print(ensemble_conf_matrix)
##              
## ensemble_pred  0  1
##             1 89 31
print(paste0("Ensemble Accuracy: ", ensemble_accuracy))
## [1] "Ensemble Accuracy: 0.741666666666667"
#Based on the results it seems that the ensemble model improved 
# over the two individual models. The ensemble model has an accuracy of 0.7417, 
# which is higher than the accuracies of the random forest model (0.7333) 
# and SVM model (0.7167).