we will be using ANN to predict breast cancer

# loading the data
data(brca)


# Access the features (X) and target variable (y)
X <- brca[["x"]]
y <- brca[["y"]]

# Split the data into training and testing sets (e.g., 80% training, 20% testing)
set.seed(123)  # For reproducibility
train_indices <- sample(nrow(X), floor(0.8 * nrow(X)))

X_train <- X[train_indices, ]
y_train <- y[train_indices]
X_test <- X[-train_indices, ]
y_test <- y[-train_indices]

normalization

# Normalize the features
preprocess_params <- preProcess(X_train, method = c("center", "scale"))

X_train <- predict(preprocess_params, X_train)
X_test <- predict(preprocess_params, X_test)

label encoding

# Convert target variable to numeric using label encoding
y_train <- ifelse(y_train == "M", 1, 0)
y_test <- ifelse(y_test == "M", 1, 0)

Creating sequential model

model <- keras_model_sequential()

model %>%
  layer_dense(units = 64, activation = "relu", input_shape = ncol(X_train)) %>%
  layer_dense(units = 32, activation = "relu") %>%
  layer_dense(units = 1, activation = "sigmoid")

# compiling the model
model %>%
  compile(loss = "binary_crossentropy",
          optimizer = "adam",
          metrics = "accuracy")

fitting the model

# this trains the model
history <- model %>%
  fit(
    x = as.matrix(X_train),
    y = y_train,
    epochs = 20,
    batch_size = 32,
    validation_split = 0.2
  )

plotting the history

plot(history)

## test data

scores <- model %>%
  evaluate(
    x = as.matrix(X_test),
    y = y_test,
    verbose = 0
  )

cat("Test loss:", scores[[1]], "\n")
## Test loss: 0.09814949
cat("Test accuracy:", scores[[2]], "\n")
## Test accuracy: 0.9736842

post-analysis

# model prediction on test set
y_pred <- model %>% predict(as.matrix(X_test)) %>% `>`(0.5) %>% k_cast("int32")

# Create the confusion matrix
y_test_vec <- as.vector(y_test)
y_pred_vec <- as.vector(y_pred)
confusion_matrix <- table(Actual = y_test_vec, Predicted = y_pred_vec)

# Print the confusion matrix
print(confusion_matrix)
##       Predicted
## Actual  0  1
##      0 67  1
##      1  2 44

tuning the model to be more sensitive to malignant tumors

# Get the predicted probabilities from the model
y_pred_prob <- predict(model, as.matrix(X_test))

# Adjust the threshold
new_threshold <- 0.01  # Set your desired threshold value

# Adjust the predicted values based on the new threshold
y_pred_adjusted <- ifelse(y_pred_prob >= new_threshold, 1, 0)

# Create the confusion matrix with the adjusted predictions
confusion_matrix_adjusted <- table(Actual = y_test, Predicted = y_pred_adjusted)

# Print the adjusted confusion matrix
print(confusion_matrix_adjusted)
##       Predicted
## Actual  0  1
##      0 54 14
##      1  0 46