I Want to Show you the power of matrix algebra in estimating regression coefficients in R and coefficient of determination R2
The data taken from William H. Greene - Econometric Analysis, 7th Edition (2011, Prentice Hall) , Ch3 application
library(tidyverse);library(readxl);library(knitr)
importing and prepairing the data
dir<-"~/Statistical-practice/data sets/Ch3 application econometric analysis.xlsx"
dat<-read_xlsx(dir)
## New names:
## * education -> education...6
## * education -> education...7
str(dat)
## tibble [15 × 8] (S3: tbl_df/tbl/data.frame)
## $ Person : num [1:15] 1 2 3 4 5 6 7 8 9 10 ...
## $ Education : num [1:15] 13 15 10 12 15 15 15 13 13 11 ...
## $ Wage : num [1:15] 1.82 2.14 1.56 1.85 2.41 1.83 1.78 2.12 1.95 2.19 ...
## $ Experience : num [1:15] 1 4 1 1 2 2 3 4 2 5 ...
## $ Ability : chr [1:15] "1" "1.5" "−0.36" "0.26" ...
## $ education...6: num [1:15] 12 12 12 12 12 12 12 12 12 12 ...
## $ education...7: num [1:15] 12 12 12 10 12 16 12 15 12 12 ...
## $ Siblings : num [1:15] 1 1 1 4 1 2 1 2 2 2 ...
minus<-which(str_detect(dat$Ability,pattern = "−"))
dat$Ability<-str_remove_all(dat$Ability,"−")
dat$Ability<-as.numeric(dat$Ability)
dat$Ability[minus]<-dat$Ability[minus]*-1
names(dat)<-c("Person","Education","Wage","Experience","Ability","M_education","F_education","Siblings")
dat%>%kable()
| Person | Education | Wage | Experience | Ability | M_education | F_education | Siblings |
|---|---|---|---|---|---|---|---|
| 1 | 13 | 1.82 | 1 | 1.00 | 12 | 12 | 1 |
| 2 | 15 | 2.14 | 4 | 1.50 | 12 | 12 | 1 |
| 3 | 10 | 1.56 | 1 | -0.36 | 12 | 12 | 1 |
| 4 | 12 | 1.85 | 1 | 0.26 | 12 | 10 | 4 |
| 5 | 15 | 2.41 | 2 | 0.30 | 12 | 12 | 1 |
| 6 | 15 | 1.83 | 2 | 0.44 | 12 | 16 | 2 |
| 7 | 15 | 1.78 | 3 | 0.91 | 12 | 12 | 1 |
| 8 | 13 | 2.12 | 4 | 0.51 | 12 | 15 | 2 |
| 9 | 13 | 1.95 | 2 | 0.86 | 12 | 12 | 2 |
| 10 | 11 | 2.19 | 5 | 0.26 | 12 | 12 | 2 |
| 11 | 12 | 2.44 | 1 | 1.82 | 16 | 17 | 2 |
| 12 | 13 | 2.41 | 4 | -1.30 | 13 | 12 | 5 |
| 13 | 12 | 2.07 | 3 | -0.63 | 12 | 12 | 4 |
| 14 | 12 | 2.20 | 6 | -0.36 | 10 | 12 | 2 |
| 15 | 12 | 2.12 | 3 | 0.28 | 10 | 12 | 3 |
splitting the data
X1<-cbind(C=rep(1,nrow(dat)),dat[,c("Education","Experience","Ability")])%>%as.matrix()
X2<-dat[,c("M_education","F_education","Siblings")]%>%as.matrix()
y<-dat$Wage%>%as.matrix()
X<-cbind(X1,X2)%>%as.matrix()
regressing y on X1 and estimating coef. using OLS
coefX1<-solve(t(X1)%*%X1)%*%t(X1)%*%y
coefX1%>%kable(col.names = "coef")
| coef | |
|---|---|
| C | 1.6636400 |
| Education | 0.0145390 |
| Experience | 0.0710300 |
| Ability | 0.0266154 |
regressing y on X and estimating coef
coefX<-solve(t(X)%*%X)%*%t(X)%*%y
coefX%>%kable(col.names = "Coef")
| Coef | |
|---|---|
| C | 0.0489963 |
| Education | 0.0258221 |
| Experience | 0.1033912 |
| Ability | 0.0307435 |
| M_education | 0.1016307 |
| F_education | 0.0016444 |
| Siblings | 0.0591692 |
Regressing each of X2 on X1 and get the fitted values and Coef
#make identity matrix
I<-diag(1,nrow = nrow(dat),ncol = nrow(dat))
#make a projection matrix of X1
PX1<-(X1%*%solve(t(X1)%*%X1)%*%t(X1))
#Make a residual matrix of X1
MX1<-I-(X1%*%solve(t(X1)%*%X1)%*%t(X1))
#calculate Fitted Value of each Value of X2
X2starfitted<-PX1%*%X2
X2starfitted%>%kable()
| M_education | F_education | Siblings |
|---|---|---|
| 12.88906 | 13.22151 | 1.6868244 |
| 12.11518 | 13.71475 | 0.9453077 |
| 12.45630 | 11.95435 | 3.1323313 |
| 12.60065 | 12.54894 | 2.4432204 |
| 12.08366 | 12.66525 | 2.2061892 |
| 12.15406 | 12.78743 | 2.0721015 |
| 12.10445 | 13.19873 | 1.5661708 |
| 11.78476 | 12.79725 | 1.9887931 |
| 12.53269 | 13.10045 | 1.7651327 |
| 11.54051 | 12.52668 | 2.2677501 |
| 13.38513 | 13.91037 | 0.9491009 |
| 10.87456 | 11.21765 | 3.7223549 |
| 11.58117 | 11.77448 | 3.1840761 |
| 10.85905 | 12.01349 | 2.7581405 |
| 12.03878 | 12.56865 | 2.3125064 |
#Calculate coef of each of X2 variables regressed on X1
coefX2star<-solve(t(X1)%*%X1)%*%t(X1)%*%X2
coefX2star%>%kable()
| M_education | F_education | Siblings | |
|---|---|---|---|
| C | 13.7604348 | 11.9998132 | 3.3197844 |
| Education | -0.0837141 | 0.0267585 | -0.0476471 |
| Experience | -0.2859633 | 0.0011246 | -0.0557793 |
| Ability | 0.5028718 | 0.8727106 | -0.9577689 |
Calculate R2 of y on both X1 and X2
#Make centering matrix
i<-rep(1,nrow(dat))
M0<-I-(i%*%solve(t(i)%*%i)%*%t(i))
#residual matrix Of total X
MX<-I-(X%*%solve(t(X)%*%X)%*%t(X))
#total residual of y on X1 and X2
e<-MX%*%y
#SSE calculation
s<-t(e)%*%e
#SST calculation
ym0y<-t(y)%*%M0%*%y
#R2 Calculation = 1-(SSE/SST)
R2<-1-(s/ym0y)
print(R2)
## [,1]
## [1,] 0.5161341
computing R-square after removing the constant variable from X1
X_1<-X[,-1]
MX_1<-I-(X_1%*%solve(t(X_1)%*%X_1)%*%t(X_1)) #residual matrix
e_1<-MX_1%*%y #residual
s_1<-t(e_1)%*%e_1 #SSE
R2_1<-1-(s_1/ym0y)
print(R2_1)
## [,1]
## [1,] 0.5159728
computing R2 for constant term only
#the centering matrix is the same residual matrix made by constants only
e_c<-M0%*%y #residual
s_c<-t(e_c)%*%e_c #SSE
R2_c<-1-(s_c/ym0y) #approximately zero
print(R2_c)
## [,1]
## [1,] 9.992007e-16
regressing y on X1 and X2 after partialing effect of X1 on X2 (X2start) and calculating R2
X2star<-MX1%*%X2
Xstar<-cbind(X1,X2star)
Coef_star<-solve(t(Xstar)%*%Xstar)%*%t(Xstar)%*%y
Coef_star%>%kable(col.names = "Coef")
| Coef | |
|---|---|
| C | 1.6636400 |
| Education | 0.0145390 |
| Experience | 0.0710300 |
| Ability | 0.0266154 |
| M_education | 0.1016307 |
| F_education | 0.0016444 |
| Siblings | 0.0591692 |
#residual matrix for star data
Mstart<-I-((Xstar%*%solve(t(Xstar)%*%Xstar)%*%t(Xstar)))
estar<-Mstart%*%y
s_star<-t(estar)%*%estar
R2star<-1-(s_star/ym0y)
R2star
## [,1]
## [1,] 0.5161341
fitting partitioned regression
#getting Coef of regressing y on X2 partialing out X1
coef_p<-solve(t(X2)%*%MX1%*%X2)%*%(t(X2)%*%MX1%*%y)
coef_p%>%kable(col.names = "coef")
| coef | |
|---|---|
| M_education | 0.1016307 |
| F_education | 0.0016444 |
| Siblings | 0.0591692 |
#getting Coef of regressing y on X1 partialing out X2
coef_P2<-solve(t(X1)%*%X1)%*%t(X1)%*%(y-(X2%*%coef_p))
coef_P2%>%kable(col.names = "coef")
| coef | |
|---|---|
| C | 0.0489963 |
| Education | 0.0258221 |
| Experience | 0.1033912 |
| Ability | 0.0307435 |
Putting R2 together
#Table of R2
cbind(R2,R2_1,R2_c,R2star)%>%kable(col.names =c("R2","R2_1","R2_c","R2star") )
| R2 | R2_1 | R2_c | R2star |
|---|---|---|---|
| 0.5161341 | 0.5159728 | 0 | 0.5161341 |