It is a pleasure to know your feedback on vet.m.mohamed@gmail.com

I Want to Show you the power of matrix algebra in estimating regression coefficients in R and coefficient of determination R2

The data taken from William H. Greene - Econometric Analysis, 7th Edition (2011, Prentice Hall) , Ch3 application

library(tidyverse);library(readxl);library(knitr)

importing and prepairing the data

dir<-"~/Statistical-practice/data sets/Ch3 application econometric analysis.xlsx"

dat<-read_xlsx(dir)
## New names:
## * education -> education...6
## * education -> education...7
str(dat)
## tibble [15 × 8] (S3: tbl_df/tbl/data.frame)
##  $ Person       : num [1:15] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Education    : num [1:15] 13 15 10 12 15 15 15 13 13 11 ...
##  $ Wage         : num [1:15] 1.82 2.14 1.56 1.85 2.41 1.83 1.78 2.12 1.95 2.19 ...
##  $ Experience   : num [1:15] 1 4 1 1 2 2 3 4 2 5 ...
##  $ Ability      : chr [1:15] "1" "1.5" "−0.36" "0.26" ...
##  $ education...6: num [1:15] 12 12 12 12 12 12 12 12 12 12 ...
##  $ education...7: num [1:15] 12 12 12 10 12 16 12 15 12 12 ...
##  $ Siblings     : num [1:15] 1 1 1 4 1 2 1 2 2 2 ...
minus<-which(str_detect(dat$Ability,pattern = "−"))

dat$Ability<-str_remove_all(dat$Ability,"−")

dat$Ability<-as.numeric(dat$Ability)

dat$Ability[minus]<-dat$Ability[minus]*-1

names(dat)<-c("Person","Education","Wage","Experience","Ability","M_education","F_education","Siblings")


dat%>%kable()
Person Education Wage Experience Ability M_education F_education Siblings
1 13 1.82 1 1.00 12 12 1
2 15 2.14 4 1.50 12 12 1
3 10 1.56 1 -0.36 12 12 1
4 12 1.85 1 0.26 12 10 4
5 15 2.41 2 0.30 12 12 1
6 15 1.83 2 0.44 12 16 2
7 15 1.78 3 0.91 12 12 1
8 13 2.12 4 0.51 12 15 2
9 13 1.95 2 0.86 12 12 2
10 11 2.19 5 0.26 12 12 2
11 12 2.44 1 1.82 16 17 2
12 13 2.41 4 -1.30 13 12 5
13 12 2.07 3 -0.63 12 12 4
14 12 2.20 6 -0.36 10 12 2
15 12 2.12 3 0.28 10 12 3

splitting the data

X1<-cbind(C=rep(1,nrow(dat)),dat[,c("Education","Experience","Ability")])%>%as.matrix()
X2<-dat[,c("M_education","F_education","Siblings")]%>%as.matrix()
y<-dat$Wage%>%as.matrix()

X<-cbind(X1,X2)%>%as.matrix()

regressing y on X1 and estimating coef. using OLS

coefX1<-solve(t(X1)%*%X1)%*%t(X1)%*%y

coefX1%>%kable(col.names = "coef")
coef
C 1.6636400
Education 0.0145390
Experience 0.0710300
Ability 0.0266154

regressing y on X and estimating coef

coefX<-solve(t(X)%*%X)%*%t(X)%*%y
coefX%>%kable(col.names = "Coef")
Coef
C 0.0489963
Education 0.0258221
Experience 0.1033912
Ability 0.0307435
M_education 0.1016307
F_education 0.0016444
Siblings 0.0591692

Regressing each of X2 on X1 and get the fitted values and Coef

#make identity matrix

I<-diag(1,nrow = nrow(dat),ncol = nrow(dat))

#make a projection matrix of X1

PX1<-(X1%*%solve(t(X1)%*%X1)%*%t(X1))

#Make a residual matrix of X1

MX1<-I-(X1%*%solve(t(X1)%*%X1)%*%t(X1))

#calculate Fitted Value of each Value of X2

X2starfitted<-PX1%*%X2

X2starfitted%>%kable()
M_education F_education Siblings
12.88906 13.22151 1.6868244
12.11518 13.71475 0.9453077
12.45630 11.95435 3.1323313
12.60065 12.54894 2.4432204
12.08366 12.66525 2.2061892
12.15406 12.78743 2.0721015
12.10445 13.19873 1.5661708
11.78476 12.79725 1.9887931
12.53269 13.10045 1.7651327
11.54051 12.52668 2.2677501
13.38513 13.91037 0.9491009
10.87456 11.21765 3.7223549
11.58117 11.77448 3.1840761
10.85905 12.01349 2.7581405
12.03878 12.56865 2.3125064
#Calculate coef of each of X2 variables regressed on X1

coefX2star<-solve(t(X1)%*%X1)%*%t(X1)%*%X2
coefX2star%>%kable()
M_education F_education Siblings
C 13.7604348 11.9998132 3.3197844
Education -0.0837141 0.0267585 -0.0476471
Experience -0.2859633 0.0011246 -0.0557793
Ability 0.5028718 0.8727106 -0.9577689

Calculate R2 of y on both X1 and X2

#Make centering matrix
i<-rep(1,nrow(dat))
M0<-I-(i%*%solve(t(i)%*%i)%*%t(i))

#residual matrix Of total X
MX<-I-(X%*%solve(t(X)%*%X)%*%t(X))

#total residual of y on X1 and X2
e<-MX%*%y

#SSE calculation
s<-t(e)%*%e

#SST calculation
ym0y<-t(y)%*%M0%*%y

#R2 Calculation = 1-(SSE/SST)
R2<-1-(s/ym0y)

print(R2)
##           [,1]
## [1,] 0.5161341

computing R-square after removing the constant variable from X1

X_1<-X[,-1]

MX_1<-I-(X_1%*%solve(t(X_1)%*%X_1)%*%t(X_1)) #residual matrix

e_1<-MX_1%*%y #residual

s_1<-t(e_1)%*%e_1 #SSE

R2_1<-1-(s_1/ym0y) 

print(R2_1)
##           [,1]
## [1,] 0.5159728

computing R2 for constant term only

#the centering matrix is the same residual matrix made by constants only 

e_c<-M0%*%y #residual
s_c<-t(e_c)%*%e_c #SSE

R2_c<-1-(s_c/ym0y) #approximately zero

print(R2_c)
##              [,1]
## [1,] 9.992007e-16

regressing y on X1 and X2 after partialing effect of X1 on X2 (X2start) and calculating R2

X2star<-MX1%*%X2
Xstar<-cbind(X1,X2star)


Coef_star<-solve(t(Xstar)%*%Xstar)%*%t(Xstar)%*%y

Coef_star%>%kable(col.names = "Coef")
Coef
C 1.6636400
Education 0.0145390
Experience 0.0710300
Ability 0.0266154
M_education 0.1016307
F_education 0.0016444
Siblings 0.0591692
#residual matrix for star data 

Mstart<-I-((Xstar%*%solve(t(Xstar)%*%Xstar)%*%t(Xstar)))

estar<-Mstart%*%y

s_star<-t(estar)%*%estar

R2star<-1-(s_star/ym0y)

R2star
##           [,1]
## [1,] 0.5161341

fitting partitioned regression

#getting Coef of regressing y on X2 partialing out X1
coef_p<-solve(t(X2)%*%MX1%*%X2)%*%(t(X2)%*%MX1%*%y)

coef_p%>%kable(col.names = "coef")
coef
M_education 0.1016307
F_education 0.0016444
Siblings 0.0591692
#getting Coef of regressing y on X1 partialing out X2

coef_P2<-solve(t(X1)%*%X1)%*%t(X1)%*%(y-(X2%*%coef_p))

coef_P2%>%kable(col.names = "coef")
coef
C 0.0489963
Education 0.0258221
Experience 0.1033912
Ability 0.0307435

Putting R2 together

#Table of R2

cbind(R2,R2_1,R2_c,R2star)%>%kable(col.names =c("R2","R2_1","R2_c","R2star") )
R2 R2_1 R2_c R2star
0.5161341 0.5159728 0 0.5161341

Good Luck