data("iris")

one_hot <- with(iris,(model.matrix(~. -1, iris))) #categorize w/ one hot

w <- matrix(runif(12,1e-3, 1e-2), nrow = 4, ncol = 3) #initialize weights
w <- rbind(w,runif(3)) #bias
eta <- 0.1 #learning rate

accuracy <- numeric()
weights <- replicate(3, diag(nrow = 1000, ncol = 4), simplify=F)

for (epoch in 1:1000){
  a <- cbind(one_hot[,1:4],1) %*% w #inner product matrix manip
  y <- exp(a)/rowSums(exp(a)) #normalize w/ softmax
  e <- one_hot[,5:7] - y #backpropogation error calcul
  w[5,] <- as.numeric(w[5,] - eta * (-colSums(e))/150) #update bias
  for(i in 1:3){
    w[1:4,i] <- w[1:4,i] - eta * (-colSums(one_hot[,1:4] * e[,i]))/150
    for (k in 1:4){
      weights[[i]][epoch,k] <- w[1:4,i][k] #append weights
    }
  }
  accuracy <- c(accuracy,length(which(max.col(y) == max.col(one_hot[,5:7])))/150) #append accuracy
}

plot(accuracy)
title("Accuracy")

plot(0, 0, xlim = c(1, 1000), ylim = c(-3, 3))
title("Weights Variation over 1000 Epoch")
for (i in 1:3) {
  for (k in 1:4) {
    lines(x = 1:1000, y = weights[[i]][,k], col = i, lty=k)
  }
}
legend("topleft", legend = paste("neuron",1:3,sep = "_"), text.col = 1:3, horiz = T, cex = .7)