we will be using ANN to predict breast cancer
# loading the data
data(brca)
# Access the features (X) and target variable (y)
X <- brca[["x"]]
y <- brca[["y"]]
# Split the data into training and testing sets (e.g., 80% training, 20% testing)
set.seed(123) # For reproducibility
train_indices <- sample(nrow(X), floor(0.8 * nrow(X)))
X_train <- X[train_indices, ]
y_train <- y[train_indices]
X_test <- X[-train_indices, ]
y_test <- y[-train_indices]
# Normalize the features
preprocess_params <- preProcess(X_train, method = c("center", "scale"))
X_train <- predict(preprocess_params, X_train)
X_test <- predict(preprocess_params, X_test)
# Convert target variable to numeric using label encoding
y_train <- ifelse(y_train == "M", 1, 0)
y_test <- ifelse(y_test == "M", 1, 0)
model <- keras_model_sequential()
model %>%
layer_dense(units = 64, activation = "relu", input_shape = ncol(X_train)) %>%
layer_dense(units = 32, activation = "relu") %>%
layer_dense(units = 1, activation = "sigmoid")
# compiling the model
model %>%
compile(loss = "binary_crossentropy",
optimizer = "adam",
metrics = "accuracy")
# this trains the model
history <- model %>%
fit(
x = as.matrix(X_train),
y = y_train,
epochs = 20,
batch_size = 32,
validation_split = 0.2
)
plotting the history
plot(history)
## test data
scores <- model %>%
evaluate(
x = as.matrix(X_test),
y = y_test,
verbose = 0
)
cat("Test loss:", scores[[1]], "\n")
## Test loss: 0.09814949
cat("Test accuracy:", scores[[2]], "\n")
## Test accuracy: 0.9736842
# model prediction on test set
y_pred <- model %>% predict(as.matrix(X_test)) %>% `>`(0.5) %>% k_cast("int32")
# Create the confusion matrix
y_test_vec <- as.vector(y_test)
y_pred_vec <- as.vector(y_pred)
confusion_matrix <- table(Actual = y_test_vec, Predicted = y_pred_vec)
# Print the confusion matrix
print(confusion_matrix)
## Predicted
## Actual 0 1
## 0 67 1
## 1 2 44
# Get the predicted probabilities from the model
y_pred_prob <- predict(model, as.matrix(X_test))
# Adjust the threshold
new_threshold <- 0.01 # Set your desired threshold value
# Adjust the predicted values based on the new threshold
y_pred_adjusted <- ifelse(y_pred_prob >= new_threshold, 1, 0)
# Create the confusion matrix with the adjusted predictions
confusion_matrix_adjusted <- table(Actual = y_test, Predicted = y_pred_adjusted)
# Print the adjusted confusion matrix
print(confusion_matrix_adjusted)
## Predicted
## Actual 0 1
## 0 54 14
## 1 0 46