Riner_ARDE_Homework1

Question 3

3.1

n = 30; set.seed(0); p = 3;
X = matrix(runif(n*p),nrow=n)*2-1;
b = seq(1,p,by=1);
Y = X%*%b + rnorm(n);

myOLS <- function(Y, X, is1 = TRUE) {

X = if(is1) cbind(1, X) else X

n = nrow(X)
p = ncol(X)

N1 = t(X)%*%X
N2 = t(X)%*%Y

B = solve(N1, N2)

E     <- Y - X %*% B

  sigma2 <- as.numeric(t(E)%*%E / (n - p))

  stdv <- sqrt(diag(sigma2 * solve(N1)))  

  list(b = B, stdv = stdv)

}

L1 = myOLS(Y, X, TRUE)

L2 = myOLS(Y, X, FALSE)

print(L1)
$b
           [,1]
[1,] -0.1291359
[2,]  0.9214173
[3,]  2.4020865
[4,]  2.7482181

$stdv
[1] 0.1708274 0.2986600 0.3468138 0.3452394
print(L2)
$b
         [,1]
[1,] 0.892872
[2,] 2.386783
[3,] 2.716730

$stdv
[1] 0.2939026 0.3434637 0.3399867
fit1 = lm(Y ~ X); summary(fit1); # regression with an intercept

Call:
lm(formula = Y ~ X)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.3701 -0.3304  0.1082  0.4938  2.3930 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -0.1291     0.1708  -0.756  0.45648    
X1            0.9214     0.2987   3.085  0.00478 ** 
X2            2.4021     0.3468   6.926 2.36e-07 ***
X3            2.7482     0.3452   7.960 1.94e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9235 on 26 degrees of freedom
Multiple R-squared:  0.802, Adjusted R-squared:  0.7792 
F-statistic: 35.11 on 3 and 26 DF,  p-value: 2.711e-09
fit0 = lm(Y ~ -1 + X); summary(fit0) # regression without an intercept

Call:
lm(formula = Y ~ -1 + X)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.48956 -0.47760 -0.00264  0.37870  2.29687 

Coefficients:
   Estimate Std. Error t value Pr(>|t|)    
X1   0.8929     0.2939   3.038  0.00523 ** 
X2   2.3868     0.3435   6.949 1.81e-07 ***
X3   2.7167     0.3400   7.991 1.38e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9161 on 27 degrees of freedom
Multiple R-squared:  0.7982,    Adjusted R-squared:  0.7758 
F-statistic: 35.61 on 3 and 27 DF,  p-value: 1.583e-09
print("The estimates for my OLS function and the lm() function were the same.") 
[1] "The estimates for my OLS function and the lm() function were the same."

3.2

myPolyReg1 <- function(Y, X1, deg=1) {

n <- length(X1)
exps <- 0:deg

X <- matrix(NA_real_, n, length(exps))
  for (e in seq_along(exps)) {
    X[, e] <- X1 ^ exps[e]
  }

p <- ncol(X)

N1 = t(X)%*%X
N2 = t(X)%*%Y

B = solve(N1, N2)

E     <- Y - X %*% B 

  sigma2 <- as.numeric(t(E)%*%E / (n - p))

  stdv <- sqrt(diag(sigma2 * solve(N1)))  

  list(b = B, stdv = stdv)

}


n = 30; set.seed(0);
X = runif(n)*4-2; # X is uniformly distributed on [-2,2]
Y = 1 + 3*X -2*X^2 + 1*X^3 + rnorm(n);

L1 = myPolyReg1(Y, X, 3)

print(L1)
$b
          [,1]
[1,]  1.141535
[2,]  2.912750
[3,] -2.091594
[4,]  1.015042

$stdv
[1] 0.2384849 0.3166353 0.1318304 0.1221091
fit0 = lm(Y ~ X + I(X^2) + I(X^3)); summary(fit0)

Call:
lm(formula = Y ~ X + I(X^2) + I(X^3))

Residuals:
    Min      1Q  Median      3Q     Max 
-1.3159 -0.5052 -0.1633  0.5612  1.6267 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   1.1415     0.2385   4.787 5.90e-05 ***
X             2.9127     0.3166   9.199 1.17e-09 ***
I(X^2)       -2.0916     0.1318 -15.866 6.88e-15 ***
I(X^3)        1.0150     0.1221   8.313 8.56e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.8295 on 26 degrees of freedom
Multiple R-squared:  0.9856,    Adjusted R-squared:  0.984 
F-statistic: 594.4 on 3 and 26 DF,  p-value: < 2.2e-16
print("The results produced by my regression function and lm() are the same.")
[1] "The results produced by my regression function and lm() are the same."

3.3

myAnova1 <- function(Y, XF, is1 = TRUE) {

 XF <- as.factor(XF)
  lev <- levels(XF)
  L   <- length(lev)
  ny   <- length(Y)

  Z <- sapply(lev, function(lv) as.numeric(XF == lv))
  colnames(Z) <- paste0("XF", lev)


  if (is1) {
    if (L == 1) {
      X <- matrix(1, ny, 1)
      colnames(X) <- "(Intercept)"
    } else {
      X <- cbind(`(Intercept)` = 1, Z[, -1, drop = FALSE])
    }
  } else {
    X <- Z
  }

n = nrow(X)
p = ncol(X)

N1 = t(X)%*%X
N2 = t(X)%*%Y

B = solve(N1, N2)

E     <- Y - X %*% B

  sigma2 <- as.numeric(t(E)%*%E / (n - p))

  stdv <- sqrt(diag(sigma2 * solve(N1)))  

  names(B) <- colnames(X); names(stdv) <- colnames(X)
  
  list(b = B, stdv = stdv)

}

n = 30; set.seed(0);
XF = rep(c("A","B","C"),each=10)
Y = rnorm(n) + rep(c(1,2,3),each=10)
fit1 = lm(Y ~ XF); summary(fit1) # with an intercept

Call:
lm(formula = Y ~ XF)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.89887 -0.62259  0.01652  0.70476  2.04573 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   1.3589     0.2828   4.805 5.15e-05 ***
XFB           0.2786     0.4000   0.697 0.492057    
XFC           1.7105     0.4000   4.276 0.000212 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.8944 on 27 degrees of freedom
Multiple R-squared:  0.4382,    Adjusted R-squared:  0.3966 
F-statistic: 10.53 on 2 and 27 DF,  p-value: 0.0004163
fit0 = lm(Y ~ -1 + XF); summary(fit0) # without an intercept

Call:
lm(formula = Y ~ -1 + XF)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.89887 -0.62259  0.01652  0.70476  2.04573 

Coefficients:
    Estimate Std. Error t value Pr(>|t|)    
XFA   1.3589     0.2828   4.805 5.15e-05 ***
XFB   1.6375     0.2828   5.790 3.69e-06 ***
XFC   3.0694     0.2828  10.853 2.39e-11 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.8944 on 27 degrees of freedom
Multiple R-squared:  0.8659,    Adjusted R-squared:  0.851 
F-statistic: 58.13 on 3 and 27 DF,  p-value: 6.599e-12
L1 = myAnova1(Y, XF, TRUE)

L2 = myAnova1(Y, XF, FALSE)

print(L1)
$b
                 [,1]
(Intercept) 1.3589240
XFB         0.2785947
XFC         1.7104858
attr(,"names")
[1] "(Intercept)" "XFB"         "XFC"        

$stdv
(Intercept)         XFB         XFC 
  0.2828292   0.3999809   0.3999809 
print(L2)
$b
        [,1]
XFA 1.358924
XFB 1.637519
XFC 3.069410
attr(,"names")
[1] "XFA" "XFB" "XFC"

$stdv
      XFA       XFB       XFC 
0.2828292 0.2828292 0.2828292 
print("The values for my anova function and the lm() are the same")
[1] "The values for my anova function and the lm() are the same"

4.1

n = 30; set.seed(0);
X = runif(n)*4-2;
Y = 1 + 3*X + rnorm(n);
fit1 = lm(Y ~ X); summary(fit1) # beta0_hat = 1.0161; beta1_hat = 2.9304, o

Call:
lm(formula = Y ~ X)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.2486 -0.5518 -0.1069  0.5028  1.6770 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   1.0161     0.1480   6.867 1.84e-07 ***
X             2.9304     0.1241  23.605  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.8068 on 28 degrees of freedom
Multiple R-squared:  0.9522,    Adjusted R-squared:  0.9504 
F-statistic: 557.2 on 1 and 28 DF,  p-value: < 2.2e-16
myFullObj <- function(b,sig) {
 
  Xmat <-  cbind(1, X)

  e <- Y - drop(Xmat %*% b)
  n <- length(Y)


 loglik <- n * 0.5 * log(2 * pi * sig^2) + sum(e^2) / (2 * sig^2)
 return(loglik)
}

4.2

sig <- 2
myObj1 <- function(b) myFullObj(b, sig)
B_BFGS <- optim(par=c(0,0), fn=myObj1, method="BFGS")
B_BFGS$par 
[1] 1.016057 2.930393
fit1 = lm(Y ~ X)
sum <- summary(fit1)

fit1$coefficients
(Intercept)           X 
   1.016062    2.930397 
print("My coefficients are the same as those produced by the lm() function up to the 5th decimal place.")
[1] "My coefficients are the same as those produced by the lm() function up to the 5th decimal place."

4.3

myObj2 <- function(params) {
  b <- params[1:2]
  sig <- params[3]
  myFullObj(b, sig)
}

B_LBFGSB <- optim(par=c(0,0,1), fn=myObj2,
              method="L-BFGS-B",
              lower=c(-Inf,-Inf,1e-5))
B_LBFGSB$par   
[1] 1.0160618 2.9303970 0.7794035
fit1$coefficients
(Intercept)           X 
   1.016062    2.930397 
print("My coefficients are the same as those produced by the lm() function up to the 6th decimal place.")
[1] "My coefficients are the same as those produced by the lm() function up to the 6th decimal place."

Would you expect the MLE for sig to be equal to the estimate produced by lm (“Residualstandard error”)? 

I would expect that the maximum likelihood estimation for sigma would be similiar, but smaller than the one used by the lm (ordinary least squares). This is because ordinary least squares takes into account the number of parameters in the model into its variance estimate, but MLE does not.