library(readxl)
library(skimr) #summary statistics
library(foreign) #panel data models
library(plm) # Lagrange multiplier test and panel modelsdata <- read_excel("Grunfeld_data.xlsx")
df <- data.frame(data)head(df)## invest value capital firm year firmcod
## 1 317.6 3078.5 2.8 General Motors 1935 6
## 2 391.8 4661.7 52.6 General Motors 1936 6
## 3 410.6 5387.1 156.9 General Motors 1937 6
## 4 257.7 2792.2 209.2 General Motors 1938 6
## 5 330.8 4313.2 203.4 General Motors 1939 6
## 6 461.2 4643.9 207.2 General Motors 1940 6
df$firmcod = NULLdf$firm = factor(df$firm)
df$year = factor(df$year, ordered = T)summary(df)## invest value capital firm
## Min. : 0.93 Min. : 30.28 Min. : 0.8 American Steel : 20
## 1st Qu.: 27.38 1st Qu.: 160.32 1st Qu.: 67.1 Atlantic Refining: 20
## Median : 52.37 Median : 404.65 Median : 180.1 Chrysler : 20
## Mean : 133.31 Mean : 988.58 Mean : 257.1 Diamond Match : 20
## 3rd Qu.: 99.78 3rd Qu.:1605.92 3rd Qu.: 344.5 General Electric : 20
## Max. :1486.70 Max. :6241.70 Max. :2226.3 General Motors : 20
## (Other) :100
## year
## 1935 : 11
## 1936 : 11
## 1937 : 11
## 1938 : 11
## 1939 : 11
## 1940 : 11
## (Other):154
Mô hình hồi quy bằng phương pháp bình phương nhỏ nhất hay bé nhất hoặc tối thiểu | cực tiểu viết tắt là OLS
mlr = lm(invest ~ value + capital, data = df)Panel data models use one way and two way component models to overcome heterogeneity, correlation in the disturbance terms, and heteroscedasticity.
One way error component model: variable-intercept models across individuals or time; Two way error component model: variable-intercept models across individuals and time.
Modelling Specifications:
With fixed-effects: effects that are in the sample. Fixed-effects explore the causes of change within a person or entity (In this example the entity is the firms);
With random-effects: effect randomly drawn from a population. The random effects model is an appropriate specification if we are drawing n individuals randomly from a large population.
fixed = plm(
invest ~ value + capital,
data = df,
index = c("firm", "year"), #panel settings
model = "within" #fixed effects option
)
summary(fixed)## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = invest ~ value + capital, data = df, model = "within",
## index = c("firm", "year"))
##
## Balanced Panel: n = 11, T = 20, N = 220
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -184.00792 -15.66024 0.27161 16.41421 250.75337
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## value 0.11013 0.01130 9.7461 < 2.2e-16 ***
## capital 0.31003 0.01654 18.7439 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 2244500
## Residual Sum of Squares: 523720
## R-Squared: 0.76667
## Adj. R-Squared: 0.75314
## F-statistic: 340.079 on 2 and 207 DF, p-value: < 2.22e-16
random = plm(
invest ~ value + capital,
data = df,
index = c("firm", "year"),
model = "random" #random effects option
)
summary(random)## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = invest ~ value + capital, data = df, model = "random",
## index = c("firm", "year"))
##
## Balanced Panel: n = 11, T = 20, N = 220
##
## Effects:
## var std.dev share
## idiosyncratic 2530.04 50.30 0.29
## individual 6201.93 78.75 0.71
## theta: 0.8586
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -178.0540 -18.9286 4.2636 14.8933 253.3183
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) -53.9436014 25.6969760 -2.0992 0.0358 *
## value 0.1093053 0.0099138 11.0256 <2e-16 ***
## capital 0.3080360 0.0163873 18.7972 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 2393800
## Residual Sum of Squares: 550610
## R-Squared: 0.76999
## Adj. R-Squared: 0.76787
## Chisq: 726.428 on 2 DF, p-value: < 2.22e-16
Use the Hausman test to evaluate when to use fixed or random effects Ho: The null hypothesis is that random effect model is more appropriate than the fixed effect model.
phtest(random, fixed)##
## Hausman Test
##
## data: invest ~ value + capital
## chisq = 3.9675, df = 2, p-value = 0.1376
## alternative hypothesis: one model is inconsistent
So, Random effect model is more appropriate than the fixed effect model.
fixed_tw <-
plm(
invest ~ value + capital,
data = df,
effect = "twoways", #effects option
model = "within", #fixed
index = c("firm", "year") #panel settings
)
summary(fixed_tw)## Twoways effects Within Model
##
## Call:
## plm(formula = invest ~ value + capital, data = df, effect = "twoways",
## model = "within", index = c("firm", "year"))
##
## Balanced Panel: n = 11, T = 20, N = 220
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -163.4113 -17.6747 -1.8345 17.9490 217.1070
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## value 0.116681 0.012933 9.0219 < 2.2e-16 ***
## capital 0.351436 0.021049 16.6964 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1672200
## Residual Sum of Squares: 459400
## R-Squared: 0.72527
## Adj. R-Squared: 0.67997
## F-statistic: 248.15 on 2 and 188 DF, p-value: < 2.22e-16
random_tw <-
plm(
invest ~ value + capital,
data = df,
effect = "twoways",
model = "random",
index = c("firm", "year"),
random.method = "amemiya"
)
summary(random_tw)## Twoways effects Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = invest ~ value + capital, data = df, effect = "twoways",
## model = "random", random.method = "amemiya", index = c("firm",
## "year"))
##
## Balanced Panel: n = 11, T = 20, N = 220
##
## Effects:
## var std.dev share
## idiosyncratic 2417.89 49.17 0.256
## individual 6859.38 82.82 0.726
## time 175.63 13.25 0.019
## theta: 0.8684 (id) 0.2544 (time) 0.2535 (total)
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -176.9380 -16.0966 3.9358 15.6184 237.6072
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) -58.498376 27.083483 -2.1599 0.03078 *
## value 0.110623 0.010299 10.7413 < 2e-16 ***
## capital 0.320686 0.017630 18.1897 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 2119700
## Residual Sum of Squares: 523110
## R-Squared: 0.75321
## Adj. R-Squared: 0.75094
## Chisq: 662.3 on 2 DF, p-value: < 2.22e-16
The Lagrange multiplier statistic, is used to test the null hypothesis that: HO: there are no group effects in the Random Effects model
Large values of the Lagrange Multiplier indicate that effects model is more suitable than the classical model with no common effects.
plmtest(random_tw)##
## Lagrange Multiplier Test - (Honda) for balanced panels
##
## data: invest ~ value + capital
## normal = 29.576, p-value < 2.2e-16
## alternative hypothesis: significant effects
Note: Large values of H indicate that the fixed effects model is prefered over the random effects model. While, a large value of the LM statistic in the presence of a small H statistic indicate that the random effects model is more suitable