# ML lab 05: Logistic regression
# Source: https://youtu.be/2FeWGgnyLSw?list=PLlMkM4tgfjnLSOjrEJN31gZATbcj_MpUm

library(tensorflow)

# Given data
X.data <- cbind(1:6, c(2,3,1,3,3,2))
X.data <- cbind(1, X.data)
X.data
##      [,1] [,2] [,3]
## [1,]    1    1    2
## [2,]    1    2    3
## [3,]    1    3    1
## [4,]    1    4    3
## [5,]    1    5    3
## [6,]    1    6    2
y.data <- matrix(c(0, 0, 0, 1, 1, 1), ncol = 1)
y.data
##      [,1]
## [1,]    0
## [2,]    0
## [3,]    0
## [4,]    1
## [5,]    1
## [6,]    1
# Step1: Build graph
# Set the variables in tensorflow
X <- tf$placeholder(tf$float32, shape(6L, 3L))
y <- tf$placeholder(tf$float32, shape(6L, 1L))

# W vector is our parameter to be estimated.
# Here W include the parameter b in the lecture.
W <- tf$Variable(tf$constant(c(1.0, 1.0, 1.0),
                                shape = c(3L, 1L)),
                    name = 'parameter')

# Calculate y_hat
# sigmoid(x) = 1/(1 + exp(-x))
y_hat <- tf$sigmoid(tf$matmul(X, W))

# Cost function
cost <- -tf$reduce_mean(y * tf$log(y_hat) +
                          (1 - y) * tf$log(1 - y_hat))

# Gradient Descent
optimizer <- tf$train$GradientDescentOptimizer(learning_rate = 0.1)
train <- optimizer$minimize(cost)

sess <- tf$Session()
sess$run(tf$global_variables_initializer())

# Training
result <- matrix(0, nrow = 5000, ncol = 4)
for (i in 1:5000) {
  result[i,] <- unlist(sess$run(c(cost, W, train),
                                feed_dict = dict(X = X.data, y = y.data)))
}
result <- cbind(1:5000, result)
colnames(result) <- c("step", "cost", "b",
                      "W1", "W2")
# Check the result
head(result)
##      step     cost         b        W1        W2
## [1,]    1 2.504657 1.0000000 1.0000000 1.0000000
## [2,]    2 2.283788 0.9504622 0.9007618 0.9008617
## [3,]    3 2.065067 0.9011492 0.8019453 0.8021495
## [4,]    4 1.849687 0.8521757 0.7038056 0.7040907
## [5,]    5 1.639568 0.8037185 0.6067692 0.6070459
## [6,]    6 1.437782 0.7560513 0.5115595 0.5115931
tail(result)
##         step       cost         b       W1       W2
## [4995,] 4995 0.03860849 -11.50039 2.616829 1.160917
## [4996,] 4996 0.03860145 -11.50120 2.617001 1.161018
## [4997,] 4997 0.03859437 -11.50202 2.617172 1.161119
## [4998,] 4998 0.03858732 -11.50284 2.617343 1.161220
## [4999,] 4999 0.03858022 -11.50365 2.617514 1.161321
## [5000,] 5000 0.03857316 -11.50447 2.617685 1.161422
# Check the cost decreasing
plot(1:5000, result[1:5000,2])

# Check our result
W.est <- as.vector(result[5000,3:5])

# Calculate result
round(sess$run(tf$sigmoid(X.data %*% W.est)))
##      [,1]
## [1,]    0
## [2,]    0
## [3,]    0
## [4,]    1
## [5,]    1
## [6,]    1
y.data
##      [,1]
## [1,]    0
## [2,]    0
## [3,]    0
## [4,]    1
## [5,]    1
## [6,]    1
# Close session
sess$close()