This is a Tutorial to estimate SE and CI of Reg. Coef by hand

To know how to estimate the OLS Coef by hand please visit my Tutorial , and it is a pleasure to know your feed back at vet.m.mohamed@gmail.com

To Calculating standard error of b by hand we will use this formula : var(b)=s^2(x'x)-1

library(tidyverse);library(readxl) 
## ── Attaching packages ─────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.1     ✓ purrr   0.3.4
## ✓ tibble  3.0.1     ✓ dplyr   1.0.0
## ✓ tidyr   1.1.0     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

importing and prepairing the data

dir<-"~/Statistical-practice/data sets/Ch3 application econometric analysis.xlsx"

dat<-read_xlsx(dir)
## New names:
## * education -> education...6
## * education -> education...7
str(dat)
## tibble [15 × 8] (S3: tbl_df/tbl/data.frame)
##  $ Person       : num [1:15] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Education    : num [1:15] 13 15 10 12 15 15 15 13 13 11 ...
##  $ Wage         : num [1:15] 1.82 2.14 1.56 1.85 2.41 1.83 1.78 2.12 1.95 2.19 ...
##  $ Experience   : num [1:15] 1 4 1 1 2 2 3 4 2 5 ...
##  $ Ability      : chr [1:15] "1" "1.5" "−0.36" "0.26" ...
##  $ education...6: num [1:15] 12 12 12 12 12 12 12 12 12 12 ...
##  $ education...7: num [1:15] 12 12 12 10 12 16 12 15 12 12 ...
##  $ Siblings     : num [1:15] 1 1 1 4 1 2 1 2 2 2 ...
minus<-which(str_detect(dat$Ability,pattern = "−"))

dat$Ability<-str_remove_all(dat$Ability,"−")

dat$Ability<-as.numeric(dat$Ability)

dat$Ability[minus]<-dat$Ability[minus]*-1

names(dat)<-c("Person","Education","Wage","Experience","Ability","M_education","F_education","Siblings")

Look at it using regular function lm

fit<-dat%>%lm(formula = Wage~Education+Ability)

summary(fit)
## 
## Call:
## lm(formula = Wage ~ Education + Ability, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.44495 -0.20835  0.05521  0.15983  0.43944 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  1.74207    0.64603   2.697   0.0194 *
## Education    0.02538    0.05081   0.499   0.6265  
## Ability     -0.02530    0.09717  -0.260   0.7990  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2761 on 12 degrees of freedom
## Multiple R-squared:  0.02101,    Adjusted R-squared:  -0.1422 
## F-statistic: 0.1288 on 2 and 12 DF,  p-value: 0.8804

now get the (x’x)-1 matrix and S^2

x<-cbind(C=rep(1,nrow(dat)),dat[,c("Education","Ability")])%>%as.matrix()

A<-solve(t(x)%*%(x))

S2<-(t(fit$residuals)%*%fit$residuals)/(nrow(dat)-length(coef(fit)))%>%as.vector()

varb<-S2[1,1]*diag(A) #The variance 

sdb<-sqrt(varb)%>%as.matrix() #standard error  

print(sdb)
##                 [,1]
## C         0.64602832
## Education 0.05081059
## Ability   0.09716736

Both Results seem to be identical

Now calculate CI

tk<-c(qt(p = c(.025,.975),df = (nrow(dat)-3)))%>%as.matrix()


cI<-coef(fit)+(sdb%*%t(tk))

print(cI)
##                  [,1]      [,2]
## C          0.33449591 3.1496455
## Education -0.08532947 0.1360840
## Ability   -0.23700584 0.1864132
confint(fit)
##                   2.5 %    97.5 %
## (Intercept)  0.33449591 3.1496455
## Education   -0.08532947 0.1360840
## Ability     -0.23700584 0.1864132

Again , Both are identical

H testing

tvalue<-coef(fit)/sdb

print(tvalue)
##                 [,1]
## C          2.6965857
## Education  0.4994488
## Ability   -0.2603378
#prop of t value

pt(q = tvalue,df = 12,lower.tail = F)
##                  [,1]
## C         0.009715856
## Education 0.313247114
## Ability   0.600491685

Regards