---
title: "AIML_DNN_IRIS_Data"
format:
html:
theme:
light: cosmo
dark: darkly
toc: true
toc-depth: 3
number-sections: true
code-fold: true
code-tools: true
smooth-scroll: true
anchor-sections: true
fontsize: 1.05em
css: styles.css
execute:
echo: true
warning: false
message: false
cache: false
editor: visual
---
```{r}
# ============================================================
# TWO-LAYER NEURAL NETWORK FOR IRIS FLOWER CLASSIFICATION
# ============================================================
#
# Input variables:
# Sepal.Length, Sepal.Width, Petal.Length, Petal.Width
#
# Output:
# Species ∈ {setosa, versicolor, virginica}
#
# Architecture:
# Input layer (4 features)
# Hidden layer (sigmoid activation)
# Output layer (softmax activation, 3 classes)
#
# Loss:
# Multiclass cross-entropy
# ============================================================
# -----------------------------
# 1. Load the iris dataset
# -----------------------------
data(iris)
# X ∈ R^{n × 4}
X <- as.matrix(iris[, 1:4])
# Class labels
y_labels <- iris$Species
# -----------------------------
# 2. One-hot encode the response
# -----------------------------
# setosa = (1,0,0)
# versicolor = (0,1,0)
# virginica = (0,0,1)
classes <- unique(y_labels)
K <- length(classes)
y <- matrix(0, nrow = length(y_labels), ncol = K)
colnames(y) <- classes
for (i in 1:length(y_labels)) {
y[i, which(classes == y_labels[i])] <- 1
}
# -----------------------------
# 3. Standardize the predictors
# -----------------------------
# X_scaled = (X - mean)/sd
X <- scale(X)
# -----------------------------
# 4. Train-test split
# -----------------------------
set.seed(123)
n <- nrow(X)
idx <- sample(1:n, size = round(0.8 * n))
X_train <- X[idx, , drop = FALSE]
y_train <- y[idx, , drop = FALSE]
X_test <- X[-idx, , drop = FALSE]
y_test <- y[-idx, , drop = FALSE]
y_labels_train <- y_labels[idx]
y_labels_test <- y_labels[-idx]
# -----------------------------
# 5. Activation functions
# -----------------------------
# Hidden layer activation:
# σ(z) = 1 / (1 + e^{-z})
sigmoid <- function(z) {
1 / (1 + exp(-z))
}
# Output layer activation:
# softmax(z_j) = exp(z_j) / sum_k exp(z_k)
softmax <- function(Z) {
Z_shift <- Z - apply(Z, 1, max) # numerical stability
expZ <- exp(Z_shift)
expZ / rowSums(expZ)
}
# -----------------------------
# 6. Initialize parameters
# -----------------------------
# Dimensions:
# W1 ∈ R^{m × 4}, b1 ∈ R^{m × 1}
# W2 ∈ R^{3 × m}, b2 ∈ R^{3 × 1}
input_dim <- 4
hidden_dim <- 6
output_dim <- 3
set.seed(123)
W1 <- matrix(rnorm(hidden_dim * input_dim, mean = 0, sd = 0.1),
nrow = hidden_dim, ncol = input_dim)
b1 <- matrix(0, nrow = hidden_dim, ncol = 1)
W2 <- matrix(rnorm(output_dim * hidden_dim, mean = 0, sd = 0.1),
nrow = output_dim, ncol = hidden_dim)
b2 <- matrix(0, nrow = output_dim, ncol = 1)
# -----------------------------
# 7. Forward propagation
# -----------------------------
forward <- function(X, W1, b1, W2, b2) {
# Z1 = X W1^T + b1
Z1 <- X %*% t(W1) +
matrix(rep(as.vector(b1), each = nrow(X)),
nrow = nrow(X), byrow = FALSE)
# A1 = sigmoid(Z1)
A1 <- sigmoid(Z1)
# Z2 = A1 W2^T + b2
Z2 <- A1 %*% t(W2) +
matrix(rep(as.vector(b2), each = nrow(X)),
nrow = nrow(X), byrow = FALSE)
# A2 = softmax(Z2)
A2 <- softmax(Z2)
list(Z1 = Z1, A1 = A1, Z2 = Z2, A2 = A2)
}
# -----------------------------
# 8. Multiclass cross-entropy loss
# -----------------------------
# L = - (1/n) Σ_i Σ_k y_{ik} log(ŷ_{ik})
loss_fn <- function(y_true, y_hat) {
eps <- 1e-8
y_hat <- pmin(pmax(y_hat, eps), 1 - eps)
-mean(rowSums(y_true * log(y_hat)))
}
# -----------------------------
# 9. Training by backpropagation
# -----------------------------
lr <- 0.05
epochs <- 5000
loss_history <- numeric(epochs)
for (e in 1:epochs) {
# ===== Forward pass =====
fp <- forward(X_train, W1, b1, W2, b2)
Z1 <- fp$Z1
A1 <- fp$A1
Z2 <- fp$Z2
A2 <- fp$A2
# Compute loss
loss <- loss_fn(y_train, A2)
loss_history[e] <- loss
m <- nrow(X_train)
# ===== Backpropagation =====
# Output error:
# dZ2 = A2 - Y
dZ2 <- A2 - y_train # m × 3
# Gradients for output layer
# dW2 = (dZ2^T A1)/m
dW2 <- t(dZ2) %*% A1 / m # 3 × hidden_dim
# db2 = column means of dZ2
db2 <- matrix(colMeans(dZ2), nrow = output_dim, ncol = 1)
# Hidden layer error:
# dA1 = dZ2 W2
dA1 <- dZ2 %*% W2 # m × hidden_dim
# dZ1 = dA1 ⊙ A1 ⊙ (1-A1)
dZ1 <- dA1 * A1 * (1 - A1)
# Gradients for hidden layer
# dW1 = (dZ1^T X)/m
dW1 <- t(dZ1) %*% X_train / m # hidden_dim × 4
# db1 = column means of dZ1
db1 <- matrix(colMeans(dZ1), nrow = hidden_dim, ncol = 1)
# ===== Parameter updates =====
W2 <- W2 - lr * dW2
b2 <- b2 - lr * db2
W1 <- W1 - lr * dW1
b1 <- b1 - lr * db1
if (e %% 500 == 0) {
cat("Epoch:", e, " Loss:", round(loss, 6), "\n")
}
}
# -----------------------------
# 10. Prediction function
# -----------------------------
predict_nn <- function(X, W1, b1, W2, b2, classes) {
fp <- forward(X, W1, b1, W2, b2)
probs <- fp$A2
pred_index <- max.col(probs)
pred_class <- classes[pred_index]
list(probabilities = probs, predicted_class = pred_class)
}
# -----------------------------
# 11. Evaluate on training data
# -----------------------------
train_pred <- predict_nn(X_train, W1, b1, W2, b2, classes)
train_class <- train_pred$predicted_class
train_acc <- mean(train_class == y_labels_train)
cat("\nTraining Accuracy =", round(train_acc, 4), "\n")
# -----------------------------
# 12. Evaluate on test data
# -----------------------------
test_pred <- predict_nn(X_test, W1, b1, W2, b2, classes)
test_class <- test_pred$predicted_class
test_acc <- mean(test_class == y_labels_test)
cat("Test Accuracy =", round(test_acc, 4), "\n")
# -----------------------------
# 13. Confusion matrix
# -----------------------------
cat("\nConfusion Matrix (Test Data):\n")
print(table(Predicted = test_class, Actual = y_labels_test))
# -----------------------------
# 14. Plot training loss
# -----------------------------
plot(loss_history, type = "l", lwd = 2,
main = "Training Loss for IRIS Neural Network",
xlab = "Epoch", ylab = "Cross-Entropy Loss")
# -----------------------------
# 15. Show sample predicted probabilities
# -----------------------------
results <- data.frame(
Actual = y_labels_test,
Predicted = test_class,
Setosa_Prob = round(test_pred$probabilities[, "setosa"], 4),
Versicolor_Prob = round(test_pred$probabilities[, "versicolor"], 4),
Virginica_Prob = round(test_pred$probabilities[, "virginica"], 4)
)
cat("\nSample predictions:\n")
print(head(results, 10))
```