options(repos = c(CRAN = "https://cloud.r-project.org/"))
if (!requireNamespace("mlbench", quietly = TRUE)) {
install.packages("mlbench")
}
if (!requireNamespace("corrplot", quietly = TRUE)) {
install.packages("corrplot")
}
url <- "C:\\Users\\Naman\\Downloads\\haberman.data"
dsurvive <- read.csv(url, header = FALSE)
column_names <- c("age", "year", "nodes", "survival_status")
colnames(dsurvive) <- column_names
library(mlbench)
library(corrplot)
## corrplot 0.95 loaded
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
install.packages("neuralnet")
## Installing package into 'C:/Users/Naman/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'neuralnet' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Naman\AppData\Local\Temp\Rtmp2ZP5DR\downloaded_packages
library(neuralnet)
library(ggplot2)
library(reshape2)
#EDA
# 1. Basic Data Summary
print("Basic Data Summary:")
## [1] "Basic Data Summary:"
summary(dsurvive)
## age year nodes survival_status
## Min. :30.00 Min. :58.00 Min. : 0.000 Min. :1.000
## 1st Qu.:44.00 1st Qu.:60.00 1st Qu.: 0.000 1st Qu.:1.000
## Median :52.00 Median :63.00 Median : 1.000 Median :1.000
## Mean :52.46 Mean :62.85 Mean : 4.026 Mean :1.265
## 3rd Qu.:60.75 3rd Qu.:65.75 3rd Qu.: 4.000 3rd Qu.:2.000
## Max. :83.00 Max. :69.00 Max. :52.000 Max. :2.000
# 2. Structure of the dataset
str(dsurvive)
## 'data.frame': 306 obs. of 4 variables:
## $ age : int 30 30 30 31 31 33 33 34 34 34 ...
## $ year : int 64 62 65 59 65 58 60 59 66 58 ...
## $ nodes : int 1 3 0 2 4 10 0 0 9 30 ...
## $ survival_status: int 1 1 1 1 1 1 1 2 2 1 ...
# 3. Check class distribution
table(dsurvive$survival_status)
##
## 1 2
## 225 81
# 4. Correlation Analysis
correlation_matrix <- cor(dsurvive[,1:3])
corrplot(correlation_matrix,
method = "color",
type = "upper",
addCoef.col = "black",
tl.col = "black",
tl.srt = 45,
title = "Feature Correlation Matrix")
par(mfrow=c(2,2))
for(i in 1:3) {
boxplot(dsurvive[,i], main=column_names[i], col="lightblue")
}
par(mfrow=c(2,2))
for(i in 1:3) {
plot(density(dsurvive[,i]),
main=paste("Density Plot of", column_names[i]),
col="blue")
}
# Scatter plots
pairs(dsurvive[,1:3],
main="Scatter Plot Matrix",
pch=21,
bg=c("red","blue")[dsurvive$survival_status])
# Convert survival_status to factor
dsurvive$survival_status <- as.factor(dsurvive$survival_status)
# Scale the features
preProc <- preProcess(dsurvive[,1:3], method=c("center", "scale"))
dsurvive_scaled <- predict(preProc, dsurvive[,1:3])
dsurvive_scaled$survival_status <- dsurvive$survival_status
#2.Split the data
set.seed(123)
trainIndex <- createDataPartition(dsurvive_scaled$survival_status, p=0.7, list=FALSE)
train_data <- dsurvive_scaled[trainIndex,]
test_data <- dsurvive_scaled[-trainIndex,]
train_data$survival_status <- as.numeric(train_data$survival_status) - 1
test_data$survival_status <- as.numeric(test_data$survival_status) - 1
set.seed(123)
nn_model <- neuralnet(survival_status ~ age + year + nodes,
data = train_data,
hidden = c(5,3), # Two hidden layers with 5 and 3 neurons
linear.output = FALSE,
threshold = 0.01)
plot(nn_model, rep="best")
# 5. Make predictions
predictions <- compute(nn_model, test_data[,1:3])
predicted_classes <- ifelse(predictions$net.result > 0.5, 1, 0)
conf_matrix <- table(Actual = test_data$survival_status,
Predicted = predicted_classes)
print("Confusion Matrix:")
## [1] "Confusion Matrix:"
print(conf_matrix)
## Predicted
## Actual 0 1
## 0 61 6
## 1 21 3
accuracy <- sum(diag(conf_matrix))/sum(conf_matrix)
precision <- conf_matrix[2,2]/sum(conf_matrix[,2])
recall <- conf_matrix[2,2]/sum(conf_matrix[2,])
f1_score <- 2 * (precision * recall)/(precision + recall)
print(paste("Accuracy:", round(accuracy, 3)))
## [1] "Accuracy: 0.703"
print(paste("Precision:", round(precision, 3)))
## [1] "Precision: 0.333"
print(paste("Recall:", round(recall, 3)))
## [1] "Recall: 0.125"
print(paste("F1 Score:", round(f1_score, 3)))
## [1] "F1 Score: 0.182"
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
roc_obj <- roc(test_data$survival_status, predictions$net.result)
## Setting levels: control = 0, case = 1
## Warning in roc.default(test_data$survival_status, predictions$net.result):
## Deprecated use a matrix as predictor. Unexpected results may be produced,
## please pass a numeric vector.
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve")
auc <- auc(roc_obj)
print(paste("AUC:", round(auc, 3)))
## [1] "AUC: 0.532"