This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
data("Grunfeld", package="plm")
library(AER)
library(tidyr)
library(ggplot2)
library(dplyr)
library(plm)
library(sandwich)
library(lmtest)
library(tidyverse)
library(car)
data <- as.data.frame(Grunfeld)
head(data)
#Loading in the data and packages
data %>%
select(inv, value, capital) %>%
summary()
inv value capital
Min. : 0.93 Min. : 58.12 Min. : 0.80
1st Qu.: 33.56 1st Qu.: 199.97 1st Qu.: 79.17
Median : 57.48 Median : 517.95 Median : 205.60
Mean : 145.96 Mean :1081.68 Mean : 276.02
3rd Qu.: 138.04 3rd Qu.:1679.85 3rd Qu.: 358.10
Max. :1486.70 Max. :6241.70 Max. :2226.30
#Quick summary of the data
ggplot(data, aes(x = value, y = inv)) +
geom_point(alpha = 0.6) + geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(title = "Investment vs Market Value", x = "Market Value", y = "Investment")
ggplot(data, aes(x = capital, y = inv)) +
geom_point(alpha = 0.6) + geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Investment vs Capital Stock", x = "Capital", y = "Investment")
#Scatter plots comparing value and captial on x axis with invesment on y
ggplot(gather(data), aes(value)) +
geom_histogram(bins = 30, fill = "skyblue") +
facet_wrap(~key, scales = 'free') +
theme_dark()
#simple histograms
#model without year and firm
model1 <- lm(inv ~ value + capital, data = data)
summary(model1)
Call:
lm(formula = inv ~ value + capital, data = data)
Residuals:
Min 1Q Median 3Q Max
-291.68 -30.01 5.30 34.83 369.45
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -42.714369 9.511676 -4.491 1.21e-05 ***
value 0.115562 0.005836 19.803 < 2e-16 ***
capital 0.230678 0.025476 9.055 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 94.41 on 197 degrees of freedom
Multiple R-squared: 0.8124, Adjusted R-squared: 0.8105
F-statistic: 426.6 on 2 and 197 DF, p-value: < 2.2e-16
plot(model1)
#Model with year and firm
model2 <- lm(inv ~ value + capital + firm + year, data = data)
summary(model2)
Call:
lm(formula = inv ~ value + capital + firm + year, data = data)
Residuals:
Min 1Q Median 3Q Max
-293.93 -28.04 5.38 35.42 386.78
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.781e+03 2.732e+03 -0.652 0.515
value 1.127e-01 8.323e-03 13.539 < 2e-16 ***
capital 2.186e-01 3.090e-02 7.075 2.63e-11 ***
firm -2.364e+00 3.668e+00 -0.645 0.520
year 9.037e-01 1.408e+00 0.642 0.522
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 94.72 on 195 degrees of freedom
Multiple R-squared: 0.8131, Adjusted R-squared: 0.8093
F-statistic: 212.1 on 4 and 195 DF, p-value: < 2.2e-16
plot(model2)
# fixed effects
fe_model <- plm(inv ~ value + capital, data = data,
index = c("firm", "year"),
effect = "twoways",
model = "within")
# Random
re_model <- plm(inv ~ value + capital, data = data,
index = c("firm", "year"),
effect = "twoways",
model = "random")
#We should use the fixed effects model because the p value is less than 0.05
summary(fe_model)
Twoways effects Within Model
Call:
plm(formula = inv ~ value + capital, data = data, effect = "twoways",
model = "within", index = c("firm", "year"))
Balanced Panel: n = 10, T = 20, N = 200
Residuals:
Min. 1st Qu. Median 3rd Qu. Max.
-162.6094 -19.4710 -1.2669 19.1277 211.8420
Coefficients:
Estimate Std. Error t-value Pr(>|t|)
value 0.117716 0.013751 8.5604 6.653e-15 ***
capital 0.357916 0.022719 15.7540 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Total Sum of Squares: 1615600
Residual Sum of Squares: 452150
R-Squared: 0.72015
Adj. R-Squared: 0.67047
F-statistic: 217.442 on 2 and 169 DF, p-value: < 2.22e-16
#This is to check for mulitcolinearity
vif(model1)
value capital
1.313773 1.313773
#this test suggests multicolinearity is not an issue.
coeftest(fe_model, vcov. = vcovHC, type = "HC1")
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
value 0.117716 0.009761 12.0599 < 2.2e-16 ***
capital 0.357916 0.043147 8.2952 3.277e-14 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
fixef(fe_model, type = "dmean")
1 2 3 4 5 6 7 8 9 10
-54.0639 152.9903 -189.2947 41.2899 -59.5025 48.8247 -2.5973 13.4266 -23.8464 72.7732
plmtest(fe_model, effect="twoways")
Lagrange Multiplier Test - two-ways effects (Honda)
data: inv ~ value + capital
normal = 18.181, p-value < 2.2e-16
alternative hypothesis: significant effects
#Lets now use the random effects model
summary(re_model)
Twoways effects Random Effect Model
(Swamy-Arora's transformation)
Call:
plm(formula = inv ~ value + capital, data = data, effect = "twoways",
model = "random", index = c("firm", "year"))
Balanced Panel: n = 10, T = 20, N = 200
Effects:
var std.dev share
idiosyncratic 2675.43 51.72 0.274
individual 7095.25 84.23 0.726
time 0.00 0.00 0.000
theta: 0.864 (id) 0 (time) 0 (total)
Residuals:
Min. 1st Qu. Median 3rd Qu. Max.
-177.1700 -19.7576 4.6048 19.4676 252.7596
Coefficients:
Estimate Std. Error z-value Pr(>|z|)
(Intercept) -57.865377 29.393359 -1.9687 0.04899 *
value 0.109790 0.010528 10.4285 < 2e-16 ***
capital 0.308190 0.017171 17.9483 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Total Sum of Squares: 2376000
Residual Sum of Squares: 547910
R-Squared: 0.7694
Adj. R-Squared: 0.76706
Chisq: 657.295 on 2 DF, p-value: < 2.22e-16
phtest(fe_model, re_model)
Hausman Test
data: inv ~ value + capital
chisq = 13.46, df = 2, p-value = 0.001194
alternative hypothesis: one model is inconsistent
#Im now gonna look at residuals
plot(resid(fe_model),
main = "Residuals from Fixed Effects Model",
xlab = "Observation Index",
ylab = "Residuals")
#This is just to see the resiudals
residuals_df <- data.frame(
fitted = fitted(fe_model),
resid = resid(fe_model))
ggplot(residuals_df, aes(x = fitted, y = resid)) +
geom_point(alpha = 0.6, color = "blue") +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
labs(title = "Residuals vs Fitted Values",
x = "Fitted Values",
y = "Residuals") +
theme_minimal()