# Instaling some useful packages
library(tidyverse)
library(kableExtra)
library(class)     # Using KNN 

Problem 1

Let \[ \begin{align} A_{+} = \{(0, 0),\ (1, 1),\ (-1, 1),\ (1, -1),\ (-1, -1)\} \end{align} \] and \[ \begin{align} A_{-} =\{(1,0),\ (-1,0),\ (0,1),\ (0,-1)\} \end{align} \] represent the positive and negative training instances respectively.

# Creating a dataset
x1 <- c(0, 1, -1, 1, -1, 1, -1, 0, 0)
x2 <- c(0, 1, 1, -1, -1, 0, 0, 1, -1)
y <- c(rep("A+", 5), rep("A-", 4))
data.problem1 <- data.frame(x1, x2, y)
data.problem1 %>% as.matrix() %>% 
  kable(., "html", col.names = c("x1", "x2", "y")) %>%
  kable_styling(bootstrap_options = "striped", full_width = F)
x1 x2 y
0 0 A+
1 1 A+
-1 1 A+
1 -1 A+
-1 -1 A+
1 0 A-
-1 0 A-
0 1 A-
0 -1 A-
  1. Plot the decision boundary for the 1-nearest neighbor(1-NN) algorithm.

    # 1. Creating testing set 
    n.sample <- 30   # ???ͦ??X?Ӽ˥??I
    data.testing <- matrix(0, 
                           nrow = n.sample^2, ncol = 2) %>%
      `colnames<-`(., c("x1", "x2")) %>% as.data.frame() 
    data.testing[, 2] <- rep(seq(-1, 1, length.out = n.sample),
                             n.sample)
    data.testing[, 1] <- rep(seq(-1, 1, length.out = n.sample),
                             each = n.sample)
    # 2. Using KNN 
    KNN.model <- knn(train = data.problem1[, c(1, 2)], 
                     test = data.testing, 
                     cl = data.problem1$y,
                     k = 1)
    data.testing$predict <- KNN.model 
    data.testing$z <- c(0, 1)[sapply(data.testing$predict, 
                                     as.numeric)]
    # 3. Visualization 
    ggplot(data = data.testing, aes(x = x1, y = x2)) + 
      geom_point(aes(colour = predict, 
                     shape = predict), size = 2) + 
      geom_contour(aes(z = z), colour = 'black', size = 0.1) +
      geom_point(data = data.problem1, 
                 aes(x = x1, y = x2, 
                     colour = as.factor(y), shape = y), 
                 size = 5) + 
      labs(title = "1-Nearest Neighbour") 

    Above the figure, It’s the decision boundary of 1-NN

  2. What is the training set accuaracy for 3-nearest neighbor(3-NN) algorithm?

    # Using KNN, K = 3
    prediciton.knn <- knn(train = data.problem1[, c(1, 2)], 
                          test = data.problem1[, c(1, 2)], 
                          cl = data.problem1[, 3], 
                          k = 3)
    # Confusion matrix
    confusion.table <- table(x = data.problem1$y, 
                             y = prediciton.knn, 
                             dnn = c("Actual", "Predict"))
    # Accuaracy
    Accuaracy <- sum(diag(confusion.table)) / 
      sum(confusion.table)
    Accuaracy %>% matrix(., dimnames = list(c("Accuaracy"), 
                                            c("3-NN"))) %>% 
      kable(., "html") %>% 
      kable_styling(bootstrap_options = "striped", full_width = F)
    3-NN
    Accuaracy 0
  3. What is the confusion matrix for 3-nearest neighbor(3-NN) algorithm on the training set?

    # 3-NN confusion matrix 
    confusion.table %>% 
      kable(., "html", caption = "Confusion matrix", 
            booktabs = TRUE) %>% 
      kable_styling(bootstrap_options = "striped", full_width = F) %>% 
      add_header_above(c(" " = 1, "Predict" = 2))
    Confusion matrix
    Predict
    A- A+
    A- 0 4
    A+ 5 0

    Above the information, It’s the confusion matrix of 3-NN.

Problem 2

Let the \(A \in R^{n*n}\) be a symmetric positive definite matrix. Show that all eigenvalues of matrix A are positive.
< Proof >
Let \(\lambda\) be the eigenvalue of A, then \(A\vec{x}=\lambda \vec{x},\ \forall\ \vec{x} \neq \vec{0}\)
\[ \begin{align} &Given\ A\ is\ positive\ definite\ matrix \\ &\Rightarrow\ \vec{x}^{\,t}A\vec{x} = \vec{x}^{\,t}\lambda\vec{x} = \lambda*||\vec{x}||^{2} >0 \\ &\because\ ||\vec{x}||^{2} >0 \\ &\therefore\ \lambda > 0 \end{align} \]

Problem 3

Let \(Z=[X_{1},\ X_{2},\ X_{3}]^{T}\) be a random vector and \(\Sigma\) be a matrix with size 3??3 where \(\Sigma_{ij} = Cov(X_{i},X_{j})\) and \(\Sigma_{ii} = Var(X_{i})\). Let the random variable \(W=a^{T}Z=a_{1}X_{1} + a_{2}X_{2} + a_{3}X_{3}\) where \(a = [a_{1},\ a_{2},\ a_{3}]^{T}\). i.e., the random variable W is the projection of random vector Z onto the vector a. Find the variance of W .
< Solution > \[ \begin{align} Var(W) &= Var(a^{T}Z) \\ &= aVar(Z)a^{T} \\ &= \begin{pmatrix} a_{1} & a_{2} & a_{3} \\ \end{pmatrix}_{3 * 1} \Sigma_{3*3} \begin{pmatrix} a_{1} \\ a_{2} \\ a_{3} \\ \end{pmatrix}_{3 * 1}\quad where\quad \Sigma= \begin{pmatrix} \Sigma_{11} & \Sigma_{12} & \Sigma_{13}\\ \Sigma_{21} & \Sigma_{22} & \Sigma_{23}\\ \Sigma_{31} & \Sigma_{32} & \Sigma_{33}\\ \end{pmatrix}_{3 * 3} \\ &= \sum_{i=1}^{3}a_{i}^{2} \Sigma_{ii} + 2\mathop{\sum_{i=1}^{3}\sum_{j=1}^{3}}_{i<j}a_{i}a_{j}\Sigma_{ij} \end{align} \]

Problem 4

Let S be a set of 10,000 random numbers generated by the uniform distribution, U[0,1].You have to estimate the mean of this uniform distribution.
According to the above statement \[ \begin{align} S \sim U[0,1] \\ \end{align} \] then, we know \[ \begin{align} E(S) = \frac{1}{2},\ and\ Var(S) = \frac{1}{12} \approx 0.0833 \end{align} \] Now, we generate 10000 r.vs from U[0, 1], and to estimate the mean and variance.

# Generating 10000 r.v from U[0, 1]
set.seed(2019)
S <- runif(n = 10000, min = 0, max = 1)
# Calculating mena and variance
c(S %>% mean, S %>% var) %>% round(., 4) %>% 
  matrix(., nrow = 1) %>% `rownames<-`(., "Value") %>% 
  kable(., "html", col.names = c("Mean", "Variance")) %>%
  kable_styling(bootstrap_options = "striped", full_width = F)
Mean Variance
Value 0.4944 0.0836

Above table is the mean and variance by 10000 uniform r.vs.

  1. Randomly select 10 number from the set S and then use the average of these 10 number as the estimation. Repeat this experiment 20 times. What is the sample mean and sample standard deviation of these 20 random experiments?

    set.seed(2019)
    S_10.talbe <- matrix(NA, ncol = 20, nrow = 2) %>%
      `rownames<-`(., c("Mean", "Variance")) %>% 
      `colnames<-`(., c(1:20))
    for(i in 1:20){
      S_10 <- sample(x = S, size = 10, replace = TRUE)
      S_10.talbe[1, i]<- S_10 %>% mean()
      S_10.talbe[2, i ]<- S_10 %>% var
      }
    S_10.talbe %>% apply(., 1, mean) %>% round(., 4) %>% 
      t() %>% `rownames<-`(., "Values") %>% 
      kable(., "html") %>%
      kable_styling(bootstrap_options = "striped", full_width = F)
    Mean Variance
    Values 0.4631 0.0797
  2. Randomly select 1,000 number from the set S and then use the average of these 1,000 number as the estimation. Repeat this experiment 50 times. What is the sample mean and sample standard deviation of these 50 random experiments?

    set.seed(2019)
    S_50.talbe <- matrix(NA, ncol = 50, nrow = 2) %>%
      `rownames<-`(., c("Mean", "Variance")) %>% 
      `colnames<-`(., c(1:50))
    for(i in 1:50){
      S_50 <- sample(x = S, size = 1000, replace = TRUE)
      S_50.talbe[1, i]<- S_10 %>% mean()
      S_50.talbe[2, i ]<- S_10 %>% var
      }
    S_50.talbe %>% apply(., 1, mean) %>% round(., 4) %>% 
      t() %>% `rownames<-`(., "Values") %>% 
      kable(., "html") %>%
      kable_styling(bootstrap_options = "striped", full_width = F)
    Mean Variance
    Values 0.2843 0.0439

Problem 5

Consider a linear system of equations as follows: \[ \begin{equation*} \begin{aligned} 2x_{1} &+ &&2x_{2} & &= 5 \\ x_{1} &+ &&&x_{3} &= 2 \\ & &&2x_{2}+ &2x_{3} &= 3 \\ x_{1} &+ &&&x_{3} &= 4 \end{aligned} \end{equation*} \] Find the least squares approximation solution for it.
According to the statement, we can rewrite from linear system of equations to matrix form.
\[ \begin{align} \begin{pmatrix} 2 & 2 & 0 \\ 1 & 0 & 1 \\ 0 & 2 & 2 \\ 1 & 0 & 1 \end{pmatrix}_{4 * 3} \begin{pmatrix} x_{1} \\ x_{2} \\ x_{3} \end{pmatrix}_{3 * 1} &= \begin{pmatrix} 5 \\ 2 \\ 3 \\ 4 \end{pmatrix}_{4 * 1} \\ \Rightarrow AX &= b \end{align} \] So, the least square solution is \[ \begin{align} X = (A^{T}A)^{-1}A^{T}b \in R^{3*1} \end{align} \]

# LS solution
A <- matrix(c(2, 2, 0,
              1, 0, 1,
              0, 2, 2,
              1, 0, 1), nrow = 4, byrow = TRUE)
    
b <- matrix(c(5, 2, 3, 4), ncol = 1)
    
solve(t(A) %*% A) %*% t(A) %*% b %>% 
  `rownames<-`(., paste("x", seq(1,3))) %>% 
  kable(., "html", caption = "LS solution") %>%
  kable_styling(bootstrap_options = "striped", full_width = F)
LS solution
x 1 2.0
x 2 0.5
x 3 1.0

Above table is the least square solution of linear system

Problem 6

Generate a training dataset with size 1000 by yourself.

  1. That is, \(S = \{(x^{i},y_{i})|x^{i} = (x^{i}_{1}, x^{i}_{2}) \in R^{2},and\ y_{i} \in R,\ i = 1,......, 1000\}\), where \(x_{1}\) and \(x_{2}\) are generated by the uniform distribution, U[-1, 1]. The observation value \(y = 2x^{2}_1 - 0.6x^{2}_{2} + 1.5x_{1}x_{2} + x_{1} + 2x_{2} + \epsilon\) where \(\epsilon\) is the random noise generated by N(0, 1).

    # Genetating s training dataset S
    set.seed(2019)
    x1 <- runif(n = 1000, min = -1, max = 1)
    x2 <- runif(n = 1000, min = -1, max = 1)
    y <- 2*(x1^2) - 0.6 *(x2^2) + 1.5 * x1*x2 + x1 + 2*x2 +
      rnorm(1000)
    S <- data.frame(x1, x2, y)
    S %>% head %>% 
      kable(., "html", caption = "A part of data") %>%
      kable_styling(bootstrap_options = "striped", full_width = F)
    A part of data
    x1 x2 y
    0.5398031 -0.3970481 1.5427073
    0.4256795 -0.8003378 -1.2369026
    -0.3932796 0.4100049 -0.3372891
    0.2364727 -0.6506493 -1.4938536
    -0.8990325 0.1781360 1.5329034
    -0.9135624 0.1022247 1.6144876

    Above table is the simulated dataset

  2. Find a quadratic function f(x), that is fitted in the training dataset S.
    According to the statement, we need to fit the quadratic function

    # Fit a regression
    lm.fit <- lm(y ~ -1 + I(x1^2) + I(x2^2) + x1*x2, data = S) 
    lm.fit %>% summary() %>% coef %>% round(., 3)%>% 
      kable(., "html") %>% 
      kable_styling(bootstrap_options = "striped", full_width = F)
    Estimate Std. Error t value Pr(>|t|)
    I(x1^2) 2.230 0.087 25.572 0
    I(x2^2) -0.819 0.086 -9.559 0
    x1 0.983 0.056 17.605 0
    x2 2.009 0.056 36.058 0
    x1:x2 1.449 0.095 15.220 0
    Above the table is the coefficient, and then the quadratic function is \[ \begin{align} f(x) = 2.23x^{2}_1 - 0.819x^{2}_{2} + 1.449x_{1}x_{2} + 0.983x_{1} + 2.009x_{2} \end{align} \]
  3. Compute the MAE, mean of absolute error, and plot the function you get.

    # MAE
    lm.fit$residuals %>% abs %>% mean %>% round(., 4) %>% 
      matrix(., dimnames = list(c("Value"), c("MAE"))) %>% 
      kable(., "html") %>% 
      kable_styling(bootstrap_options = "striped", full_width = F)
    MAE
    Value 0.8015

    Above the table is the MAE (mean of absolute error) of quadratic function.

    # plot the quadratic function
    obj_fun <- function (x1, x2) {
      coef <- lm.fit %>% coef
      value <- coef[1]*x1^2 + 
        coef[2]*x2^2 + coef[3]*x1 + coef[4]*x2 + coef[5]*x1*x2
      return(value)
    }
    
    x1 <- seq(-1,1, length.out = 15)
    x2 <- seq(-1,1, length.out = 15)
    z <- outer(x1, x2, obj_fun)
    # op <- par(bg = "white")
    persp(x = x1, y = x2, z, 
          theta = 20, phi = 20, expand = 0.7, 
          col = "blue", 
          main = "quadratic function", 
          sub = expression(f(x) == 2.23*x[1]^{2} - 0.819*x[2]^{2} + 1.439*x[1]*x[2] + 0.983*x[1] + 2.009*x[2]))