library(latex2exp)

Experiment 1: convergence study of \(\hat{\theta}\) against \(n\).

Linear regression and Gaussian data generation process. The problem assumes the error is Gaussian. The purpose of this experiment is to show convergence rate gauss-markov solution \(\hat{\theta}\) to \(\theta_0\) as n increases.

theta_0 <- c(0,1)
lambda <- 0
p<-2

set.seed(123)


sizes <- c(1e3,5e3,1e4,2e4,3e4,5e4,1e5,2e5,5e5,1e6)
m <- length(sizes)
MSE <-rep(m,0)
results_0 <- rep(m,0)
results_1 <- rep(m,0)

for (i in 1:m)
{
  print(i)
  n <- sizes[i]
  x <- rnorm(n)
  eps <- rnorm(n)
  
  y<-theta_0[1] + theta_0[2]*x + eps
  
  X<-cbind(rep(1,n),x)
  
  A<-solve((t(X)%*%X + diag(rep(n*lambda,p))))
  b <- t(X)%*%y
  
  hattheta <- A%*%b
  print(hattheta)
  results_0[i]<-hattheta[1]
  results_1[i]<-hattheta[2]
  MSE[i]<- mean(eps^2)
  
}
## [1] 1
##         [,1]
##   0.04104524
## x 1.08804729
## [1] 2
##          [,1]
##   0.005664549
## x 0.995633155
## [1] 3
##           [,1]
##   -0.004978205
## x  0.996688098
## [1] 4
##          [,1]
##   0.005435431
## x 0.997131648
## [1] 5
##          [,1]
##   0.003955162
## x 1.002431392
## [1] 6
##          [,1]
##   0.005592122
## x 0.999867804
## [1] 7
##          [,1]
##   0.001078417
## x 0.998206561
## [1] 8
##            [,1]
##   -0.0006539746
## x  1.0043342820
## [1] 9
##           [,1]
##   -0.001514468
## x  1.001936290
## [1] 10
##            [,1]
##   -0.0003644211
## x  0.9997599168
par(mfrow=c(1,2))
plot(sizes, results_0-theta_0[1], xlab= 'n', ylab=TeX('$\\hat{\\theta_0} -\\theta_0$'))
plot(sizes, results_1-theta_0[2], xlab= 'n', ylab=TeX('$\\hat{\\theta_1} -\\theta_1$'))

## Experiment 2: Convergence of \(E[||\hat{\theta}-\theta||_2^2]\) \(n\) trial per experiment \(m\) experiments

m <- 10000
MSE <- rep(0,m)
n <- 10000
x <- rnorm(n)
# loop over experiments
for (i in 1:m){
  #print(i)
  eps <- rnorm(n)
  y<-theta_0[1] + theta_0[2]*x + eps
  X<-cbind(rep(1,n),x)

  A<-solve((t(X)%*%X + diag(rep(n*lambda,p))))
  b <- t(X)%*%y
  
  hattheta <- A%*%b
  
  MSE[i]<-norm(hattheta-theta_0, type='2')^2
}
# plotting of the expected MSE
check_points <- seq(1,10)*1000
EMSE <- rep(0, length(check_points))
for (i in 1:10)
{
  EMSE[i]<-mean(MSE[1:check_points[i]])
  
}
plot(check_points, EMSE)

Experiment 3: Show that Tik. regularization reduces EMSE