library(latex2exp)
Linear regression and Gaussian data generation process. The problem assumes the error is Gaussian. The purpose of this experiment is to show convergence rate gauss-markov solution \(\hat{\theta}\) to \(\theta_0\) as n increases.
theta_0 <- c(0,1)
lambda <- 0
p<-2
set.seed(123)
sizes <- c(1e3,5e3,1e4,2e4,3e4,5e4,1e5,2e5,5e5,1e6)
m <- length(sizes)
MSE <-rep(m,0)
results_0 <- rep(m,0)
results_1 <- rep(m,0)
for (i in 1:m)
{
print(i)
n <- sizes[i]
x <- rnorm(n)
eps <- rnorm(n)
y<-theta_0[1] + theta_0[2]*x + eps
X<-cbind(rep(1,n),x)
A<-solve((t(X)%*%X + diag(rep(n*lambda,p))))
b <- t(X)%*%y
hattheta <- A%*%b
print(hattheta)
results_0[i]<-hattheta[1]
results_1[i]<-hattheta[2]
MSE[i]<- mean(eps^2)
}
## [1] 1
## [,1]
## 0.04104524
## x 1.08804729
## [1] 2
## [,1]
## 0.005664549
## x 0.995633155
## [1] 3
## [,1]
## -0.004978205
## x 0.996688098
## [1] 4
## [,1]
## 0.005435431
## x 0.997131648
## [1] 5
## [,1]
## 0.003955162
## x 1.002431392
## [1] 6
## [,1]
## 0.005592122
## x 0.999867804
## [1] 7
## [,1]
## 0.001078417
## x 0.998206561
## [1] 8
## [,1]
## -0.0006539746
## x 1.0043342820
## [1] 9
## [,1]
## -0.001514468
## x 1.001936290
## [1] 10
## [,1]
## -0.0003644211
## x 0.9997599168
par(mfrow=c(1,2))
plot(sizes, results_0-theta_0[1], xlab= 'n', ylab=TeX('$\\hat{\\theta_0} -\\theta_0$'))
plot(sizes, results_1-theta_0[2], xlab= 'n', ylab=TeX('$\\hat{\\theta_1} -\\theta_1$'))
## Experiment 2: Convergence of \(E[||\hat{\theta}-\theta||_2^2]\) \(n\) trial per experiment \(m\) experiments
m <- 10000
MSE <- rep(0,m)
n <- 10000
x <- rnorm(n)
# loop over experiments
for (i in 1:m){
#print(i)
eps <- rnorm(n)
y<-theta_0[1] + theta_0[2]*x + eps
X<-cbind(rep(1,n),x)
A<-solve((t(X)%*%X + diag(rep(n*lambda,p))))
b <- t(X)%*%y
hattheta <- A%*%b
MSE[i]<-norm(hattheta-theta_0, type='2')^2
}
# plotting of the expected MSE
check_points <- seq(1,10)*1000
EMSE <- rep(0, length(check_points))
for (i in 1:10)
{
EMSE[i]<-mean(MSE[1:check_points[i]])
}
plot(check_points, EMSE)