#..............................................................
# This is a simulation for regression
#
#
#           By :Fahad S. Obaid
#..............................................................


#.................................................................................
# Xi = [1, X1,i, X2,i]'
# beta = [beta0, beta1, beta2]'
# correlation coefficient rho = 0.5
# suppose the conditional distribution Yi given [X1,i, X2,i]
# with mean Xi'beta with beta = [0,1,1]'
# the standard deviation is sd = 1
# carry out the simulation with sample size of n = 50
# and 1000 repeated samples and obtain
# the mean , variance , and histogram of the sampling distribution beta1
#.................................................................................

# set the librry of multivariate regression
library(MASS)

# set beta values


beta <- matrix(rep(0, 2*1), ncol = 1)

for (i in 1:2) {
  beta[i]<-1
  
}


#set the correlation coef
rho <- 0.5

# the standard deviation
sd <- 1

#number of the sample size
n <- 50 

# number of repititions
reps <- 1000


# mean of x is vector of zeros

muX<- matrix(rep(0, 2*1), ncol = 1)
# set up a matrix
# note the values are the covariance and the ones are the variance


Sigma <- matrix(rep(0, 2*2), ncol = 2)

for(i in 1:2){
  for(j in 1:2){
    Sigma[i,j] <- rho^(abs(i - j))
  }
}

Sigma
##      [,1] [,2]
## [1,]  1.0  0.5
## [2,]  0.5  1.0
# the for loop for the simulation of repetitions
# draw a sample from a multivariate normal distribution

# storage to save the values of repetitions
beta1.hat <- matrix(nrow=reps, ncol = 1)

for (j in 1:reps){
  
  
  X <- mvrnorm(n,mu=muX,Sigma = Sigma)
  # note that the multivariate mvrnorm will give 2 random variables for n times
  # n times mean that you will have 2 random varialbes running 50 times
  
  
  
  # get the Y values
  

  # #set the error values
   epsilon <- rt(50, df = 3)
  #
  #
  # # the regression equation for generating epsilon
   Y <-X%*%beta + epsilon
  #
  Y <- rnorm(n,mean = X%*%beta, sd = sd)
  
  # now run the regression estimation
  eq <- lm(Y~0 +X)
  # each time the for loop gives a value during the repition 
  # it will save the vale in the matrix storage beta1.hat
  beta1.hat[j] <- eq$coefficients[2]
  #eq$coefficients[2] this means the storage of the values of X
  # which is number 2 as a coefficient
  # note [1] denoted to Y, [2] to X
  
}


# find the mean of the simulation in beta.hat
print(mean(beta1.hat))
## [1] 1.004395
# find the variance of beta1.hat
print(var(beta1.hat))
##            [,1]
## [1,] 0.02815687
# plot the histogram of beta1.hat
hist(beta1.hat, freq = F)
# show the density of the histogram
lines(density((beta1.hat)), col=2, lwd=3)

# The confidence interval 95%

confint(eq)
##        2.5 %   97.5 %
## X1 0.6450912 1.307053
## X2 0.6772527 1.336773
# generating the errors
epsilon
##  [1]  0.16479046 -0.50129516 -1.45899250 -1.39581180  1.16557736
##  [6] -0.21083576 -0.78504609 -0.70606435 -0.29999883  0.59438912
## [11] -3.41919795 -1.93457467 -1.03792527  1.41165557 -1.07805967
## [16] -0.10493970 -0.72173213 -1.87194124 -0.31145508 -0.54103736
## [21] -1.26593129  2.22924698 -2.16826026  0.93994223 -0.89792085
## [26] -1.93247992  0.95405361 -0.17279047  0.57475659  1.29615745
## [31]  0.22216773 -0.76618237 -0.22775682  0.54520150 -0.06234681
## [36]  1.11390652 -0.98265326 -1.12953982 -0.63971893  1.61472536
## [41] -0.14754833 -0.62148557  0.23143613  2.43276638 -5.48492340
## [46]  2.47877943 -0.51389960  0.57220606  0.70962815 -0.73967551