R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(tinytest)
library(DiagrammeR)
## Warning: package 'DiagrammeR' was built under R version 4.3.3

Sigmoid

sigmoid <- function(x) {
  return(1 / (1 + exp(-x)))
}

Derivative of sigmoid for backpropagation

sigmoid_derivative <- function(x) {
  return(x * (1 - x))
}

Initialize weights randomly

initialize_weights <- function(n_input, n_hidden1, n_hidden2, n_hidden3, n_output) {
  list(
    w1 = matrix(runif(n_input * n_hidden1, min = -1, max = 1), nrow = n_input, ncol = n_hidden1),
    w2 = matrix(runif(n_hidden1 * n_hidden2, min = -1, max = 1), nrow = n_hidden1, ncol = n_hidden2),
    w3 = matrix(runif(n_hidden2 * n_hidden3, min = -1, max = 1), nrow = n_hidden2, ncol = n_hidden3),
    w4 = matrix(runif(n_hidden3 * n_output, min = -1, max = 1), nrow = n_hidden3, ncol = n_output)
  )
}

Forward propagation

forward_propagation <- function(input, weights) {
  z1 <- sigmoid(input %*% weights$w1)
  z2 <- sigmoid(z1 %*% weights$w2)
  z3 <- sigmoid(z2 %*% weights$w3)
  output <- sigmoid(z3 %*% weights$w4)
  list(z1 = z1, z2 = z2, z3 = z3, output = output)
}

Backward propagation

backward_propagation <- function(input, output, actual, z1, z2, z3, weights, learning_rate) {
  error_output <- actual - output
  delta_output <- error_output * sigmoid_derivative(output)
  
  error_hidden3 <- delta_output %*% t(weights$w4)
  delta_hidden3 <- error_hidden3 * sigmoid_derivative(z3)
  
  error_hidden2 <- delta_hidden3 %*% t(weights$w3)
  delta_hidden2 <- error_hidden2 * sigmoid_derivative(z2)
  
  error_hidden1 <- delta_hidden2 %*% t(weights$w2)
  delta_hidden1 <- error_hidden1 * sigmoid_derivative(z1)
  
  weights$w4 <- weights$w4 + learning_rate * t(z3) %*% delta_output
  weights$w3 <- weights$w3 + learning_rate * t(z2) %*% delta_hidden3
  weights$w2 <- weights$w2 + learning_rate * t(z1) %*% delta_hidden2
  weights$w1 <- weights$w1 + learning_rate * t(input) %*% delta_hidden1
  
  return(weights)
}

Training function

train_perceptron <- function(input, actual, weights, epochs, learning_rate) {
  for (i in 1:epochs) {
    forward <- forward_propagation(input, weights)
    weights <- backward_propagation(input, forward$output, actual, forward$z1, forward$z2, forward$z3, weights, learning_rate)
    
    if (i %% 100 == 0) {
      cat("Epoch:", i, "Loss:", sum((actual - forward$output)^2) / length(actual), "\n")
    }
  }
  return(weights)
}

Visualization function

visualize_perceptron <- function() {
  grViz("
  digraph neural_network {
  
    # Input layer
    node [shape = circle, style = filled, color = lightblue, label = 'Input\nX1'] x1;
    node [shape = circle, style = filled, color = lightblue, label = 'Input\nX2'] x2;
    
    # Hidden layers
    node [shape = circle, style = filled, color = green, label = 'Hidden\nLayer 1\nNeuron 1'] h1_1;
    node [shape = circle, style = filled, color = green, label = 'Hidden\nLayer 1\nNeuron 2'] h1_2;
    node [shape = circle, style = filled, color = green, label = 'Hidden\nLayer 1\nNeuron 3'] h1_3;
    
    node [shape = circle, style = filled, color = green, label = 'Hidden\nLayer 2\nNeuron 1'] h2_1;
    node [shape = circle, style = filled, color = green, label = 'Hidden\nLayer 2\nNeuron 2'] h2_2;
    node [shape = circle, style = filled, color = green, label = 'Hidden\nLayer 2\nNeuron 3'] h2_3;
    
    node [shape = circle, style = filled, color = green, label = 'Hidden\nLayer 3\nNeuron 1'] h3_1;
    node [shape = circle, style = filled, color = green, label = 'Hidden\nLayer 3\nNeuron 2'] h3_2;
    node [shape = circle, style = filled, color = green, label = 'Hidden\nLayer 3\nNeuron 3'] h3_3;
    
    # Output layer
    node [shape = circle, style = filled, color = orange, label = 'Output\nY'] y;

    # Connections
    x1 -> h1_1; x1 -> h1_2; x1 -> h1_3;
    x2 -> h1_1; x2 -> h1_2; x2 -> h1_3;
    
    h1_1 -> h2_1; h1_1 -> h2_2; h1_1 -> h2_3;
    h1_2 -> h2_1; h1_2 -> h2_2; h1_2 -> h2_3;
    h1_3 -> h2_1; h1_3 -> h2_2; h1_3 -> h2_3;
    
    h2_1 -> h3_1; h2_1 -> h3_2; h2_1 -> h3_3;
    h2_2 -> h3_1; h2_2 -> h3_2; h2_2 -> h3_3;
    h2_3 -> h3_1; h2_3 -> h3_2; h2_3 -> h3_3;
    
    h3_1 -> y; h3_2 -> y; h3_3 -> y;
  }
  ")
}

Example

input <- matrix(c(0, 0, 0, 1, 1, 0, 1, 1), nrow = 4, byrow = TRUE)
actual <- matrix(c(0, 1, 1, 0), ncol = 1)

weights <- initialize_weights(n_input = 2, n_hidden1 = 3, n_hidden2 = 3, n_hidden3 = 3, n_output = 1)

Train

trained_weights <- train_perceptron(input, actual, weights, epochs = 1000, learning_rate = 0.1)
## Epoch: 100 Loss: 0.249962 
## Epoch: 200 Loss: 0.2499156 
## Epoch: 300 Loss: 0.2499117 
## Epoch: 400 Loss: 0.2499089 
## Epoch: 500 Loss: 0.2499061 
## Epoch: 600 Loss: 0.2499032 
## Epoch: 700 Loss: 0.2499001 
## Epoch: 800 Loss: 0.249897 
## Epoch: 900 Loss: 0.2498938 
## Epoch: 1000 Loss: 0.2498904

Perceptron structure

visualize_perceptron()
trained_weights
## $w1
##           [,1]      [,2]       [,3]
## [1,] 0.5447925 0.3448590 -0.8020596
## [2,] 0.2917682 0.1519058 -0.6783117
## 
## $w2
##             [,1]       [,2]       [,3]
## [1,] -0.41028002 -0.5864476 -0.4769052
## [2,]  0.60296513  0.8339715  0.3229327
## [3,]  0.06054248  0.6021078 -0.6962803
## 
## $w3
##            [,1]       [,2]        [,3]
## [1,] -0.6968365  0.9825567 -0.64101995
## [2,]  0.1701645 -0.2187261 -0.08460626
## [3,]  0.9092822 -0.9605916 -0.72314076
## 
## $w4
##            [,1]
## [1,]  0.7353750
## [2,] -0.4396460
## [3,] -0.5055448
test_input <- matrix(c(0, 1), nrow = 1)  # Test with input [0, 1]
output <- forward_propagation(test_input, trained_weights)$output
print(output)
##           [,1]
## [1,] 0.5000333

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.