To Calculating standard error of b by hand we will use this formula : var(b)=s^2(x'x)-1
library(tidyverse);library(readxl)
## ── Attaching packages ─────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.1 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 1.0.0
## ✓ tidyr 1.1.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
importing and prepairing the data
dir<-"~/Statistical-practice/data sets/Ch3 application econometric analysis.xlsx"
dat<-read_xlsx(dir)
## New names:
## * education -> education...6
## * education -> education...7
str(dat)
## tibble [15 × 8] (S3: tbl_df/tbl/data.frame)
## $ Person : num [1:15] 1 2 3 4 5 6 7 8 9 10 ...
## $ Education : num [1:15] 13 15 10 12 15 15 15 13 13 11 ...
## $ Wage : num [1:15] 1.82 2.14 1.56 1.85 2.41 1.83 1.78 2.12 1.95 2.19 ...
## $ Experience : num [1:15] 1 4 1 1 2 2 3 4 2 5 ...
## $ Ability : chr [1:15] "1" "1.5" "−0.36" "0.26" ...
## $ education...6: num [1:15] 12 12 12 12 12 12 12 12 12 12 ...
## $ education...7: num [1:15] 12 12 12 10 12 16 12 15 12 12 ...
## $ Siblings : num [1:15] 1 1 1 4 1 2 1 2 2 2 ...
minus<-which(str_detect(dat$Ability,pattern = "−"))
dat$Ability<-str_remove_all(dat$Ability,"−")
dat$Ability<-as.numeric(dat$Ability)
dat$Ability[minus]<-dat$Ability[minus]*-1
names(dat)<-c("Person","Education","Wage","Experience","Ability","M_education","F_education","Siblings")
Look at it using regular function lm
fit<-dat%>%lm(formula = Wage~Education+Ability)
summary(fit)
##
## Call:
## lm(formula = Wage ~ Education + Ability, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.44495 -0.20835 0.05521 0.15983 0.43944
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.74207 0.64603 2.697 0.0194 *
## Education 0.02538 0.05081 0.499 0.6265
## Ability -0.02530 0.09717 -0.260 0.7990
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2761 on 12 degrees of freedom
## Multiple R-squared: 0.02101, Adjusted R-squared: -0.1422
## F-statistic: 0.1288 on 2 and 12 DF, p-value: 0.8804
now get the (x’x)-1 matrix and S^2
x<-cbind(C=rep(1,nrow(dat)),dat[,c("Education","Ability")])%>%as.matrix()
A<-solve(t(x)%*%(x))
S2<-(t(fit$residuals)%*%fit$residuals)/(nrow(dat)-length(coef(fit)))%>%as.vector()
varb<-S2[1,1]*diag(A) #The variance
sdb<-sqrt(varb)%>%as.matrix() #standard error
print(sdb)
## [,1]
## C 0.64602832
## Education 0.05081059
## Ability 0.09716736
Both Results seem to be identical
Now calculate CI
tk<-c(qt(p = c(.025,.975),df = (nrow(dat)-3)))%>%as.matrix()
cI<-coef(fit)+(sdb%*%t(tk))
print(cI)
## [,1] [,2]
## C 0.33449591 3.1496455
## Education -0.08532947 0.1360840
## Ability -0.23700584 0.1864132
confint(fit)
## 2.5 % 97.5 %
## (Intercept) 0.33449591 3.1496455
## Education -0.08532947 0.1360840
## Ability -0.23700584 0.1864132
Again , Both are identical
H testing
tvalue<-coef(fit)/sdb
print(tvalue)
## [,1]
## C 2.6965857
## Education 0.4994488
## Ability -0.2603378
#prop of t value
pt(q = tvalue,df = 12,lower.tail = F)
## [,1]
## C 0.009715856
## Education 0.313247114
## Ability 0.600491685