# Instaling some useful packages
library(tidyverse)
library(kableExtra)
library(class) # Using KNN
Let \[ \begin{align} A_{+} = \{(0, 0),\ (1, 1),\ (-1, 1),\ (1, -1),\ (-1, -1)\} \end{align} \] and \[ \begin{align} A_{-} =\{(1,0),\ (-1,0),\ (0,1),\ (0,-1)\} \end{align} \] represent the positive and negative training instances respectively.
# Creating a dataset
x1 <- c(0, 1, -1, 1, -1, 1, -1, 0, 0)
x2 <- c(0, 1, 1, -1, -1, 0, 0, 1, -1)
y <- c(rep("A+", 5), rep("A-", 4))
data.problem1 <- data.frame(x1, x2, y)
data.problem1 %>% as.matrix() %>%
kable(., "html", col.names = c("x1", "x2", "y")) %>%
kable_styling(bootstrap_options = "striped", full_width = F)
x1 | x2 | y |
---|---|---|
0 | 0 | A+ |
1 | 1 | A+ |
-1 | 1 | A+ |
1 | -1 | A+ |
-1 | -1 | A+ |
1 | 0 | A- |
-1 | 0 | A- |
0 | 1 | A- |
0 | -1 | A- |
Plot the decision boundary for the 1-nearest neighbor(1-NN) algorithm.
# 1. Creating testing set
n.sample <- 30 # ???ͦ??X?Ӽ˥??I
data.testing <- matrix(0,
nrow = n.sample^2, ncol = 2) %>%
`colnames<-`(., c("x1", "x2")) %>% as.data.frame()
data.testing[, 2] <- rep(seq(-1, 1, length.out = n.sample),
n.sample)
data.testing[, 1] <- rep(seq(-1, 1, length.out = n.sample),
each = n.sample)
# 2. Using KNN
KNN.model <- knn(train = data.problem1[, c(1, 2)],
test = data.testing,
cl = data.problem1$y,
k = 1)
data.testing$predict <- KNN.model
data.testing$z <- c(0, 1)[sapply(data.testing$predict,
as.numeric)]
# 3. Visualization
ggplot(data = data.testing, aes(x = x1, y = x2)) +
geom_point(aes(colour = predict,
shape = predict), size = 2) +
geom_contour(aes(z = z), colour = 'black', size = 0.1) +
geom_point(data = data.problem1,
aes(x = x1, y = x2,
colour = as.factor(y), shape = y),
size = 5) +
labs(title = "1-Nearest Neighbour")
Above the figure, It’s the decision boundary of 1-NN
What is the training set accuaracy for 3-nearest neighbor(3-NN) algorithm?
# Using KNN, K = 3
prediciton.knn <- knn(train = data.problem1[, c(1, 2)],
test = data.problem1[, c(1, 2)],
cl = data.problem1[, 3],
k = 3)
# Confusion matrix
confusion.table <- table(x = data.problem1$y,
y = prediciton.knn,
dnn = c("Actual", "Predict"))
# Accuaracy
Accuaracy <- sum(diag(confusion.table)) /
sum(confusion.table)
Accuaracy %>% matrix(., dimnames = list(c("Accuaracy"),
c("3-NN"))) %>%
kable(., "html") %>%
kable_styling(bootstrap_options = "striped", full_width = F)
3-NN | |
---|---|
Accuaracy | 0 |
What is the confusion matrix for 3-nearest neighbor(3-NN) algorithm on the training set?
# 3-NN confusion matrix
confusion.table %>%
kable(., "html", caption = "Confusion matrix",
booktabs = TRUE) %>%
kable_styling(bootstrap_options = "striped", full_width = F) %>%
add_header_above(c(" " = 1, "Predict" = 2))
A- | A+ | |
---|---|---|
A- | 0 | 4 |
A+ | 5 | 0 |
Above the information, It’s the confusion matrix of 3-NN.
Let the \(A \in R^{n*n}\) be a symmetric positive definite matrix. Show that all eigenvalues of matrix A are positive.
< Proof >
Let \(\lambda\) be the eigenvalue of A, then \(A\vec{x}=\lambda \vec{x},\ \forall\ \vec{x} \neq \vec{0}\)
\[
\begin{align}
&Given\
A\ is\ positive\ definite\ matrix \\
&\Rightarrow\
\vec{x}^{\,t}A\vec{x} = \vec{x}^{\,t}\lambda\vec{x} = \lambda*||\vec{x}||^{2} >0 \\
&\because\
||\vec{x}||^{2} >0 \\
&\therefore\
\lambda > 0
\end{align}
\]
Let \(Z=[X_{1},\ X_{2},\ X_{3}]^{T}\) be a random vector and \(\Sigma\) be a matrix with size 3??3 where \(\Sigma_{ij} = Cov(X_{i},X_{j})\) and \(\Sigma_{ii} = Var(X_{i})\). Let the random variable \(W=a^{T}Z=a_{1}X_{1} + a_{2}X_{2} + a_{3}X_{3}\) where \(a = [a_{1},\ a_{2},\ a_{3}]^{T}\). i.e., the random variable W is the projection of random vector Z onto the vector a. Find the variance of W .
< Solution > \[
\begin{align}
Var(W) &= Var(a^{T}Z) \\
&= aVar(Z)a^{T} \\
&= \begin{pmatrix}
a_{1} & a_{2} & a_{3} \\
\end{pmatrix}_{3 * 1} \Sigma_{3*3}
\begin{pmatrix}
a_{1} \\
a_{2} \\
a_{3} \\
\end{pmatrix}_{3 * 1}\quad where\quad
\Sigma=
\begin{pmatrix}
\Sigma_{11} & \Sigma_{12} & \Sigma_{13}\\
\Sigma_{21} & \Sigma_{22} & \Sigma_{23}\\
\Sigma_{31} & \Sigma_{32} & \Sigma_{33}\\
\end{pmatrix}_{3 * 3}
\\
&= \sum_{i=1}^{3}a_{i}^{2} \Sigma_{ii} +
2\mathop{\sum_{i=1}^{3}\sum_{j=1}^{3}}_{i<j}a_{i}a_{j}\Sigma_{ij}
\end{align}
\]
Let S be a set of 10,000 random numbers generated by the uniform distribution, U[0,1].You have to estimate the mean of this uniform distribution.
According to the above statement \[
\begin{align}
S \sim U[0,1] \\
\end{align}
\] then, we know \[
\begin{align}
E(S) = \frac{1}{2},\ and\ Var(S) = \frac{1}{12} \approx 0.0833
\end{align}
\] Now, we generate 10000 r.vs from U[0, 1], and to estimate the mean and variance.
# Generating 10000 r.v from U[0, 1]
set.seed(2019)
S <- runif(n = 10000, min = 0, max = 1)
# Calculating mena and variance
c(S %>% mean, S %>% var) %>% round(., 4) %>%
matrix(., nrow = 1) %>% `rownames<-`(., "Value") %>%
kable(., "html", col.names = c("Mean", "Variance")) %>%
kable_styling(bootstrap_options = "striped", full_width = F)
Mean | Variance | |
---|---|---|
Value | 0.4944 | 0.0836 |
Above table is the mean and variance by 10000 uniform r.vs.
Randomly select 10 number from the set S and then use the average of these 10 number as the estimation. Repeat this experiment 20 times. What is the sample mean and sample standard deviation of these 20 random experiments?
set.seed(2019)
S_10.talbe <- matrix(NA, ncol = 20, nrow = 2) %>%
`rownames<-`(., c("Mean", "Variance")) %>%
`colnames<-`(., c(1:20))
for(i in 1:20){
S_10 <- sample(x = S, size = 10, replace = TRUE)
S_10.talbe[1, i]<- S_10 %>% mean()
S_10.talbe[2, i ]<- S_10 %>% var
}
S_10.talbe %>% apply(., 1, mean) %>% round(., 4) %>%
t() %>% `rownames<-`(., "Values") %>%
kable(., "html") %>%
kable_styling(bootstrap_options = "striped", full_width = F)
Mean | Variance | |
---|---|---|
Values | 0.4631 | 0.0797 |
Randomly select 1,000 number from the set S and then use the average of these 1,000 number as the estimation. Repeat this experiment 50 times. What is the sample mean and sample standard deviation of these 50 random experiments?
set.seed(2019)
S_50.talbe <- matrix(NA, ncol = 50, nrow = 2) %>%
`rownames<-`(., c("Mean", "Variance")) %>%
`colnames<-`(., c(1:50))
for(i in 1:50){
S_50 <- sample(x = S, size = 1000, replace = TRUE)
S_50.talbe[1, i]<- S_10 %>% mean()
S_50.talbe[2, i ]<- S_10 %>% var
}
S_50.talbe %>% apply(., 1, mean) %>% round(., 4) %>%
t() %>% `rownames<-`(., "Values") %>%
kable(., "html") %>%
kable_styling(bootstrap_options = "striped", full_width = F)
Mean | Variance | |
---|---|---|
Values | 0.2843 | 0.0439 |
Consider a linear system of equations as follows: \[
\begin{equation*}
\begin{aligned}
2x_{1} &+ &&2x_{2} & &= 5 \\
x_{1} &+ &&&x_{3} &= 2 \\
& &&2x_{2}+ &2x_{3} &= 3 \\
x_{1} &+ &&&x_{3} &= 4
\end{aligned}
\end{equation*}
\] Find the least squares approximation solution for it.
According to the statement, we can rewrite from linear system of equations to matrix form.
\[
\begin{align}
\begin{pmatrix}
2 & 2 & 0 \\
1 & 0 & 1 \\
0 & 2 & 2 \\
1 & 0 & 1
\end{pmatrix}_{4 * 3}
\begin{pmatrix}
x_{1} \\
x_{2} \\
x_{3}
\end{pmatrix}_{3 * 1} &=
\begin{pmatrix}
5 \\
2 \\
3 \\
4
\end{pmatrix}_{4 * 1} \\
\Rightarrow AX &= b
\end{align}
\] So, the least square solution is \[
\begin{align}
X = (A^{T}A)^{-1}A^{T}b \in R^{3*1}
\end{align}
\]
# LS solution
A <- matrix(c(2, 2, 0,
1, 0, 1,
0, 2, 2,
1, 0, 1), nrow = 4, byrow = TRUE)
b <- matrix(c(5, 2, 3, 4), ncol = 1)
solve(t(A) %*% A) %*% t(A) %*% b %>%
`rownames<-`(., paste("x", seq(1,3))) %>%
kable(., "html", caption = "LS solution") %>%
kable_styling(bootstrap_options = "striped", full_width = F)
x 1 | 2.0 |
x 2 | 0.5 |
x 3 | 1.0 |
Above table is the least square solution of linear system
Generate a training dataset with size 1000 by yourself.
That is, \(S = \{(x^{i},y_{i})|x^{i} = (x^{i}_{1}, x^{i}_{2}) \in R^{2},and\ y_{i} \in R,\ i = 1,......, 1000\}\), where \(x_{1}\) and \(x_{2}\) are generated by the uniform distribution, U[-1, 1]. The observation value \(y = 2x^{2}_1 - 0.6x^{2}_{2} + 1.5x_{1}x_{2} + x_{1} + 2x_{2} + \epsilon\) where \(\epsilon\) is the random noise generated by N(0, 1).
# Genetating s training dataset S
set.seed(2019)
x1 <- runif(n = 1000, min = -1, max = 1)
x2 <- runif(n = 1000, min = -1, max = 1)
y <- 2*(x1^2) - 0.6 *(x2^2) + 1.5 * x1*x2 + x1 + 2*x2 +
rnorm(1000)
S <- data.frame(x1, x2, y)
S %>% head %>%
kable(., "html", caption = "A part of data") %>%
kable_styling(bootstrap_options = "striped", full_width = F)
x1 | x2 | y |
---|---|---|
0.5398031 | -0.3970481 | 1.5427073 |
0.4256795 | -0.8003378 | -1.2369026 |
-0.3932796 | 0.4100049 | -0.3372891 |
0.2364727 | -0.6506493 | -1.4938536 |
-0.8990325 | 0.1781360 | 1.5329034 |
-0.9135624 | 0.1022247 | 1.6144876 |
Above table is the simulated dataset
Find a quadratic function f(x), that is fitted in the training dataset S.
According to the statement, we need to fit the quadratic function
# Fit a regression
lm.fit <- lm(y ~ -1 + I(x1^2) + I(x2^2) + x1*x2, data = S)
lm.fit %>% summary() %>% coef %>% round(., 3)%>%
kable(., "html") %>%
kable_styling(bootstrap_options = "striped", full_width = F)
Estimate | Std. Error | t value | Pr(>|t|) | |
---|---|---|---|---|
I(x1^2) | 2.230 | 0.087 | 25.572 | 0 |
I(x2^2) | -0.819 | 0.086 | -9.559 | 0 |
x1 | 0.983 | 0.056 | 17.605 | 0 |
x2 | 2.009 | 0.056 | 36.058 | 0 |
x1:x2 | 1.449 | 0.095 | 15.220 | 0 |
Compute the MAE, mean of absolute error, and plot the function you get.
# MAE
lm.fit$residuals %>% abs %>% mean %>% round(., 4) %>%
matrix(., dimnames = list(c("Value"), c("MAE"))) %>%
kable(., "html") %>%
kable_styling(bootstrap_options = "striped", full_width = F)
MAE | |
---|---|
Value | 0.8015 |
Above the table is the MAE (mean of absolute error) of quadratic function.
# plot the quadratic function
obj_fun <- function (x1, x2) {
coef <- lm.fit %>% coef
value <- coef[1]*x1^2 +
coef[2]*x2^2 + coef[3]*x1 + coef[4]*x2 + coef[5]*x1*x2
return(value)
}
x1 <- seq(-1,1, length.out = 15)
x2 <- seq(-1,1, length.out = 15)
z <- outer(x1, x2, obj_fun)
# op <- par(bg = "white")
persp(x = x1, y = x2, z,
theta = 20, phi = 20, expand = 0.7,
col = "blue",
main = "quadratic function",
sub = expression(f(x) == 2.23*x[1]^{2} - 0.819*x[2]^{2} + 1.439*x[1]*x[2] + 0.983*x[1] + 2.009*x[2]))