Neural Network Diabetes

Author

Sergio Uribe

CREATED

October 8, 2024

UPDATED

October 8, 2024

Quarto code

Load necessary packages

# Use pacman to load/install necessary packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(neuralnet, mlbench, tidyverse)

Data Preparation

# Load the dataset from the mlbench package
data("PimaIndiansDiabetes2", package = "mlbench")

Create a copy of the dataset

temp <- PimaIndiansDiabetes2

Remove irrelevant columns: ‘insulin’ and ‘triceps’

temp <- temp |> 
  dplyr::select(-insulin, -triceps)

Remove rows with missing data

temp <- na.omit(temp)

Target Variable Transformation

# Recode the 'diabetes' column as binary 1 for 'pos' and 0 for 'neg'
temp <- temp %>%
  mutate(diabetes = ifelse(diabetes == "pos", 1, 0))

Scale the Data

# Standardize the numerical columns (scaling)
temp <- temp %>%
  mutate(across(everything(), scale))

Define the Formula

# Define the formula for the neural network model
f <- diabetes ~ pregnant + glucose + pressure + mass + pedigree + age

Train Neural Network Model

# Train the neural network using neuralnet package with 4 hidden nodes
fit <- neuralnet::neuralnet(f, data = temp, hidden = 4, algorithm = "rprop+")

Visualize the Model

# Plot the neural network model
plot(fit, intercept = TRUE, show.weights = TRUE)