# Clear the workspace
rm(list = ls()) # Clear environment-remove all files from your workspace
gc() # Clear unused memory
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 522159 27.9 1161690 62.1 660385 35.3
## Vcells 953990 7.3 8388608 64.0 1769879 13.6
graphics.off() # Clear all graphs
# Prepare needed libraries
packages <- c("AER", # load dataset
"plm", # for FE
"stargazer", # summary stats for sharing
"ggplot2", #for graphing,
"psych",
"naniar", # for visualisation of missing data,
"visdat", # for visualisation of missing data,
"VIM", # for visualisation of missing data,
"DataExplorer", # for visualisation of missing data,
"dplyr",
"magrittr"
)
for (i in 1:length(packages)) {
if (!packages[i] %in% rownames(installed.packages())) {
install.packages(packages[i]
, repos = "http://cran.rstudio.com/"
, dependencies = TRUE
)
}
library(packages[i], character.only = TRUE)
}
## Warning: package 'AER' was built under R version 4.3.3
## Loading required package: car
## Warning: package 'car' was built under R version 4.3.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.3.3
## Loading required package: lmtest
## Warning: package 'lmtest' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Warning: package 'sandwich' was built under R version 4.3.3
## Loading required package: survival
## Warning: package 'plm' was built under R version 4.3.3
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
## Warning: package 'ggplot2' was built under R version 4.3.3
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:car':
##
## logit
## Warning: package 'naniar' was built under R version 4.3.3
## Warning: package 'visdat' was built under R version 4.3.2
## Warning: package 'VIM' was built under R version 4.3.3
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
## Warning: package 'DataExplorer' was built under R version 4.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plm':
##
## between, lag, lead
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
rm(packages)
# Load necessary library and data
library(AER)
data("Produc")
# Calculate the new metric (GSP per 10,000 per capital)
Produc$gsp_per_capita_rate <- with(data = Produc,
expr = gsp / pcap * 10000
)
# Verify the changes
str(Produc)
## 'data.frame': 816 obs. of 12 variables:
## $ state : Factor w/ 48 levels "ALABAMA","ARIZONA",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ year : int 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 ...
## $ region : Factor w/ 9 levels "1","2","3","4",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ pcap : num 15033 15502 15972 16406 16763 ...
## $ hwy : num 7326 7526 7765 7908 8026 ...
## $ water : num 1656 1721 1765 1742 1735 ...
## $ util : num 6051 6255 6442 6756 7002 ...
## $ pc : num 35794 37300 38670 40084 42057 ...
## $ gsp : int 28418 29375 31303 33430 33749 33604 35764 37463 39964 40979 ...
## $ emp : num 1010 1022 1072 1136 1170 ...
## $ unemp : num 4.7 5.2 4.7 3.9 5.5 7.7 6.8 7.4 6.3 7.1 ...
## $ gsp_per_capita_rate: num 18904 18949 19598 20376 20133 ...
head(Produc)
## state year region pcap hwy water util pc gsp emp
## 1 ALABAMA 1970 6 15032.67 7325.80 1655.68 6051.20 35793.80 28418 1010.5
## 2 ALABAMA 1971 6 15501.94 7525.94 1721.02 6254.98 37299.91 29375 1021.9
## 3 ALABAMA 1972 6 15972.41 7765.42 1764.75 6442.23 38670.30 31303 1072.3
## 4 ALABAMA 1973 6 16406.26 7907.66 1742.41 6756.19 40084.01 33430 1135.5
## 5 ALABAMA 1974 6 16762.67 8025.52 1734.85 7002.29 42057.31 33749 1169.8
## 6 ALABAMA 1975 6 17316.26 8158.23 1752.27 7405.76 43971.71 33604 1155.4
## unemp gsp_per_capita_rate
## 1 4.7 18904.16
## 2 5.2 18949.24
## 3 4.7 19598.17
## 4 3.9 20376.37
## 5 5.5 20133.43
## 6 7.7 19406.04
# Load the stargazer package
library(stargazer)
# Generate summary statistics table for the Produc dataset
stargazer(Produc,
type = "text", # You can use "html" or "latex" as needed
digits = 1, # Number of digits to display
summary.stat = c("mean", "sd", "min", "max", "n"), # Statistics to include
covariate.labels = colnames(Produc) # Optionally rename column labels
)
##
## ==================================================
## Statistic Mean St. Dev. Min Max N
## --------------------------------------------------
## state 1,978.0 4.9 1,970 1,986 816
## year 25,036.7 27,780.4 2,627.1 140,217.3 816
## region 10,218.4 9,253.6 1,827.1 47,699.4 816
## pcap 3,618.8 4,311.7 228.5 24,592.3 816
## hwy 11,199.4 14,768.9 538.5 80,728.1 816
## water 58,188.3 59,770.8 4,052.7 375,341.6 816
## util 61,014.3 69,973.9 4,354 464,550 816
## pc 1,747.1 1,856.0 108.3 11,258.0 816
## gsp 6.6 2.2 2.8 18.0 816
## emp 23,379.1 4,610.5 13,683.5 39,415.5 816
## --------------------------------------------------
dim(Produc)
## [1] 816 12
The PRoduc dataset has 816 observations and 12 variables.
observation : regional
country : United States
Format A data frame containing :
state the state
year the year
region the region
pcap public capital stock
hwy highway and streets
water water and sewer facilities
util other public buildings and structures
pc private capital stock
gsp gross state product
emp labor input measured by the employment in non–agricultural payrolls
unemp state unemployment rate
pdim(Produc)
## Balanced Panel: n = 48, T = 17, N = 816
vis_miss(Produc)
I confimed that the Produc dataset is balanced after I used pdim function. The data has 48 observation for 17 years and the total no.of observations are 816 respectively. Also, there is no missing values in this dataset.
# Load necessary libraries
library(ggplot2)
# Define the function to reorder levels
reorder_size <- function(x) {
factor(x, levels = names(sort(table(x), decreasing = TRUE)))
}
# Create the bar plot using the Produc dataset
ggplot(data = Produc,
aes(x = reorder_size(year))) +
geom_bar() +
xlab("Year") +
ylab("Frequency") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Adjust angle for readability
The estimate equation is :
gsp= β0 + β1pcap + ϵi
Where, gsp is the Gross State Product. pcap is the physical capital per worker.
# Fit a linear model: Predict 'gsp' based on 'pcap'
produc_lm_mod <- lm(data = Produc,
formula = gsp ~ pcap)
# Display the summary of the model
summary(produc_lm_mod)
##
## Call:
## lm(formula = gsp ~ pcap, data = Produc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -82383 -4637 -1212 3278 125096
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -133.38588 807.22707 -0.165 0.869
## pcap 2.44233 0.02159 113.111 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17120 on 814 degrees of freedom
## Multiple R-squared: 0.9402, Adjusted R-squared: 0.9401
## F-statistic: 1.279e+04 on 1 and 814 DF, p-value: < 2.2e-16
The Intercept (β0) is -133.39 and pcap (β1) is 2.44233. The coefficient for ‘pcap’(physical capital per worker) is positive (2.44233), which makes sense theoretically. This suggests that an increase in physical capital per worker is associated with an increase in Gross State Product (GSP).
The magnitude of the coefficient (2.44233) indicates that for every unit increase in ‘pcap’ the GSP is expected to increase by approximately 2.44233 units, holding other factors constant.
p-value for for ‘pcap’ < 2e-16 (very small)
The p-value is highly significant (well below 0.05), indicating that the coefficient for pcap is statistically significant. This means there is strong evidence to reject the null hypothesis.
p-value for Intercept: 0.869
The p-value for the intercept is much larger than 0.05, suggesting that the intercept is not significantly different from zero.
R-squared: 0.9402
This indicates that approximately 94% of the variance in GSP is explained by the model with ‘pcap’. The value is really fit well with dataset.
Residual Standard Error: 17120 is measures the average distance between the observed values and the predicted values of the model.
The estimate equation is, gsp= β0 +β1pcap + β2emp + ϵi Where, ‘emp’ is number of employees.
# Fit a linear model: Predict 'gsp' based on 'pcap' and 'emp'
produc_lm_mod1 <- lm(data = Produc,
formula = gsp ~ pcap + emp)
# Display the summary of the model
summary(produc_lm_mod1)
##
## Call:
## lm(formula = gsp ~ pcap + emp, data = Produc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26080 -4856 766 2989 55592
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.124e+03 4.501e+02 -9.162 < 2e-16 ***
## pcap 2.987e-01 5.036e-02 5.930 4.47e-09 ***
## emp 3.300e+01 7.538e-01 43.783 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9351 on 813 degrees of freedom
## Multiple R-squared: 0.9822, Adjusted R-squared: 0.9821
## F-statistic: 2.241e+04 on 2 and 813 DF, p-value: < 2.2e-16
stargazer(produc_lm_mod1,
type="text"
)
##
## ===============================================
## Dependent variable:
## ---------------------------
## gsp
## -----------------------------------------------
## pcap 0.299***
## (0.050)
##
## emp 33.004***
## (0.754)
##
## Constant -4,123.804***
## (450.110)
##
## -----------------------------------------------
## Observations 816
## R2 0.982
## Adjusted R2 0.982
## Residual Std. Error 9,350.783 (df = 813)
## F Statistic 22,412.880*** (df = 2; 813)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
stargazer(produc_lm_mod, produc_lm_mod1,
type = 'text',
column.labels = c("OLS with no State Dummy","OLS with State Dummy")
)
##
## ===========================================================================
## Dependent variable:
## -------------------------------------------------------
## gsp
## OLS with no State Dummy OLS with State Dummy
## (1) (2)
## ---------------------------------------------------------------------------
## pcap 2.442*** 0.299***
## (0.022) (0.050)
##
## emp 33.004***
## (0.754)
##
## Constant -133.386 -4,123.804***
## (807.227) (450.110)
##
## ---------------------------------------------------------------------------
## Observations 816 816
## R2 0.940 0.982
## Adjusted R2 0.940 0.982
## Residual Std. Error 17,124.400 (df = 814) 9,350.783 (df = 813)
## F Statistic 12,794.140*** (df = 1; 814) 22,412.880*** (df = 2; 813)
## ===========================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Coefficient for ‘pcap’
Estimate: 2.442 Standard Error: 0.022 Significance: *** (p < 0.01)
Interpretation: For each unit increase in pcap and gsp increases by 2.442 units. This coefficient is highly statistically significant, suggesting a strong positive relationship between pcap and gsp.
Constant:
Estimate: -133.386 Standard Error: 807.227 Significance: Not statistically significant (no asterisks)
Interpretation: The intercept term represents the estimated pcap when gsp is zero, but it is not statistically significant in this model.
R-squared: 0.940
Interpretation: The model explains 94% of the variability in gsp which is very high, indicating a good fit.
Adjusted R-squared: 0.940
Interpretation: Adjusted for the number of predictors in the model. It remains high, indicating that adding pcap provides a good explanation of gsp.
Residual Std. Error: 17,124.400
Interpretation: This is the standard deviation of the residuals. A lower value would indicate a better fit. F Statistic: 12,794.140 (df = 1; 814)
Interpretation: This is a test for the overall significance of the model. The high value and significance (p < 0.01) indicate that the model is statistically significant.
Coefficient for pcap:
Estimate: 0.299 Standard Error: 0.050 Significance: *** (p < 0.01) Interpretation: For each unit increase in pcap, gsp increases by 0.299 units. This is significantly lower compared to the coefficient in Model 1, suggesting that controlling for state-specific effects reduces the impact of pcap on gsp.
Coefficient for emp:
Estimate: 33.004 Standard Error: 0.754 Significance: *** (p < 0.01) Interpretation: For each unit increase in emp, gsp increases by 33.004 units. This is a large and highly significant effect, indicating that employment has a strong positive impact on gsp. Constant:
Estimate: -4,123.804 Standard Error: 450.110 Significance: *** (p < 0.01) Interpretation: The intercept term is now significant, reflecting the adjusted baseline level of gsp when both pcap and emp are zero, after controlling for state-specific effects. R-squared: 0.982
Interpretation: The model explains 98.2% of the variability in gsp, indicating an even better fit compared to Model 1. Adjusted R-squared: 0.982
Interpretation: The high value suggests that the model, even after adding more variables, still provides a good explanation of gsp. Residual Std. Error: 9,350.783
Interpretation: A lower residual standard error compared to Model 1, indicating improved model fit with state dummies included. F Statistic: 22,412.880 (df = 2; 813)
Interpretation: A very high F-statistic, indicating that the model with state dummies is highly significant.
Effect of pcap: The coefficient of pcap decreases from 2.442 in Model 1 to 0.299 in Model 2. This significant drop suggests that part of the effect of pcap on gsp in Model 1 was actually capturing unobserved state-specific effects. Including state dummies controls for these unobserved factors, showing that the direct impact of pcap is less pronounced once state-specific effects are accounted for.
New Variable emp: In Model 2, emp is included and is highly significant. This suggests that employment has a significant impact on gsp and that state-specific effects may be more important for explaining variability in gsp than just pcap.
Model Fit: R-squared and adjusted R-squared improve significantly in Model 2, indicating a better fit with state dummies included.
Residuals and Significance: The reduced residual standard error and higher F-statistic in Model 2 suggest that including state dummies has improved the model’s explanatory power and fit.
# Obtain demeaned data by year
Produc_demeaned <- with(data = Produc,
expr = data.frame(
gsp = gsp - ave(x = gsp, year), # Demean 'gsp' by 'year'
pcap = pcap - ave(x = pcap, year), # Demean 'pcap' by 'year'
emp = emp - ave(x = emp, year), # Demean 'emp' by 'year'
unemp = unemp - ave(x = unemp, year) # Demean 'unemp' by 'year'
)
)
# Fit the linear model to the demeaned data
produc_demeaned_lm_mod <- lm(data = Produc_demeaned,
formula = gsp ~ pcap - 1) # '- 1' indicates no intercept
# Display the summary of the model
summary(produc_demeaned_lm_mod)
##
## Call:
## lm(formula = gsp ~ pcap - 1, data = Produc_demeaned)
##
## Residuals:
## Min 1Q Median 3Q Max
## -78176 -5102 -391 3438 117032
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## pcap 2.43671 0.02117 115.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16740 on 815 degrees of freedom
## Multiple R-squared: 0.9421, Adjusted R-squared: 0.942
## F-statistic: 1.325e+04 on 1 and 815 DF, p-value: < 2.2e-16
library(plm)
# Estimate the fixed effects regression with plm()
produc_fe_mod <- plm(formula = gsp ~ pcap,
data = Produc,
index = c("state", "year"), # declaring data to be panel
model = "within" # fixed effects model
)
# Display the summary of the model
summary(produc_fe_mod)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = gsp ~ pcap, data = Produc, model = "within", index = c("state",
## "year"))
##
## Balanced Panel: n = 48, T = 17, N = 816
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -69428.85 -1791.38 -190.95 1470.69 110204.09
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## pcap 2.84940 0.12257 23.247 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1.3289e+11
## Residual Sum of Squares: 7.796e+10
## R-Squared: 0.41335
## Adj. R-Squared: 0.37664
## F-statistic: 540.43 on 1 and 767 DF, p-value: < 2.22e-16
# print summary using robust standard errors
coeftest(produc_fe_mod, vcov. = vcovHC, type = "HC1")
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## pcap 2.84940 0.56487 5.0443 5.686e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
stargazer( # OLS
produc_lm_mod1, produc_demeaned_lm_mod, produc_fe_mod, # fixed effect
type = "text",
column.labels = c(#"OLS - no Dummy",
"OLS - State Dummy",
"Demeaned Reg",
"FE"
)
)
##
## ====================================================================================================
## Dependent variable:
## --------------------------------------------------------------------------------
## gsp
## OLS panel
## linear
## OLS - State Dummy Demeaned Reg FE
## (1) (2) (3)
## ----------------------------------------------------------------------------------------------------
## pcap 0.299*** 2.437*** 2.849***
## (0.050) (0.021) (0.123)
##
## emp 33.004***
## (0.754)
##
## Constant -4,123.804***
## (450.110)
##
## ----------------------------------------------------------------------------------------------------
## Observations 816 816 816
## R2 0.982 0.942 0.413
## Adjusted R2 0.982 0.942 0.377
## Residual Std. Error 9,350.783 (df = 813) 16,744.560 (df = 815)
## F Statistic 22,412.880*** (df = 2; 813) 13,252.270*** (df = 1; 815) 540.430*** (df = 1; 767)
## ====================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
# Load necessary libraries
library(AER)
library(plm)
library(stargazer)
# Fit the one-way fixed effects model using lm() for the Produc dataset
produc_fe_lm_mod <- lm(data = Produc,
formula = gsp ~ pcap + as.factor(state) + as.factor(year))
# Estimate the two-way fixed effects regression with plm()
produc_fe_mod2 <- plm(formula = gsp ~ pcap + as.factor(year),
data = Produc,
index = c("state", "year"), # Declaring data to be panel
model = "within") # Fixed effects model
# Compare models using stargazer
stargazer(produc_fe_lm_mod, # One-way Fixed Effects (State + Year OLS)
produc_fe_lm_mod, # Two-way Fixed Effects (State + Year OLS)
produc_fe_mod2, # Two-way Fixed Effects (PLM)
type = "text",
column.labels = c("FE - One way (state)",
"FE - Two way (state-year) OLS",
"FE - Two way (state-year) PLM"),
add.lines = list(c('Two Way Fixed effects', "No", "Yes", "Yes"))
)
##
## ====================================================================================================================
## Dependent variable:
## -------------------------------------------------------------------------------------
## gsp
## OLS panel
## linear
## FE - One way (state) FE - Two way (state-year) OLS FE - Two way (state-year) PLM
## (1) (2) (3)
## --------------------------------------------------------------------------------------------------------------------
## pcap 2.231*** 2.231*** 2.231***
## (0.160) (0.160) (0.160)
##
## as.factor(state)ARIZONA -435.620 -435.620
## (3,316.455) (3,316.455)
##
## as.factor(state)ARKANSAS 3,381.503 3,381.503
## (3,571.735) (3,571.735)
##
## as.factor(state)CALIFORNIA 45,147.830** 45,147.830**
## (19,407.610) (19,407.610)
##
## as.factor(state)COLORADO 7,674.150** 7,674.150**
## (3,313.678) (3,313.678)
##
## as.factor(state)CONNECTICUT 8,061.302** 8,061.302**
## (3,281.734) (3,281.734)
##
## as.factor(state)DELAWARE -407.535 -407.535
## (3,899.469) (3,899.469)
##
## as.factor(state)FLORIDA 17,789.810*** 17,789.810***
## (4,875.994) (4,875.994)
##
## as.factor(state)GEORGIA 8,188.580** 8,188.580**
## (3,465.838) (3,465.838)
##
## as.factor(state)IDAHO 2,237.565 2,237.565
## (3,943.262) (3,943.262)
##
## as.factor(state)ILLINOIS 30,031.580*** 30,031.580***
## (7,654.671) (7,654.671)
##
## as.factor(state)INDIANA 14,916.710*** 14,916.710***
## (3,422.802) (3,422.802)
##
## as.factor(state)IOWA -87.358 -87.358
## (3,284.490) (3,284.490)
##
## as.factor(state)KANSAS 3,370.320 3,370.320
## (3,354.405) (3,354.405)
##
## as.factor(state)KENTUCKY -695.338 -695.338
## (3,289.792) (3,289.792)
##
## as.factor(state)LOUISIANA 19,403.480*** 19,403.480***
## (3,400.003) (3,400.003)
##
## as.factor(state)MAINE 2,865.959 2,865.959
## (3,910.283) (3,910.283)
##
## as.factor(state)MARYLAND -3,742.072 -3,742.072
## (3,514.484) (3,514.484)
##
## as.factor(state)MASSACHUSETTS 15,130.500*** 15,130.500***
## (3,668.940) (3,668.940)
##
## as.factor(state)MICHIGAN 10,821.020* 10,821.020*
## (6,068.878) (6,068.878)
##
## as.factor(state)MINNESOTA -4,632.223 -4,632.223
## (3,570.809) (3,570.809)
##
## as.factor(state)MISSISSIPPI -1,163.844 -1,163.844
## (3,423.676) (3,423.676)
##
## as.factor(state)MISSOURI 11,348.380*** 11,348.380***
## (3,375.139) (3,375.139)
##
## as.factor(state)MONTANA -778.071 -778.071
## (3,826.474) (3,826.474)
##
## as.factor(state)NEBRASKA -7,180.471** -7,180.471**
## (3,375.906) (3,375.906)
##
## as.factor(state)NEVADA 2,831.423 2,831.423
## (3,897.046) (3,897.046)
##
## as.factor(state)NEW_HAMPSHIRE 3,373.618 3,373.618
## (3,968.953) (3,968.953)
##
## as.factor(state)NEW_JERSEY 34,096.800*** 34,096.800***
## (3,971.481) (3,971.481)
##
## as.factor(state)NEW_MEXICO 3,932.344 3,932.344
## (3,720.231) (3,720.231)
##
## as.factor(state)NEW_YORK -24,117.880 -24,117.880
## (17,920.510) (17,920.510)
##
## as.factor(state)NORTH_CAROLINA 18,282.710*** 18,282.710***
## (3,345.062) (3,345.062)
##
## as.factor(state)NORTH_DAKOTA 282.151 282.151
## (3,911.680) (3,911.680)
##
## as.factor(state)OHIO 18,742.790*** 18,742.790***
## (6,702.743) (6,702.743)
##
## as.factor(state)OKLAHOMA 13,298.080*** 13,298.080***
## (3,362.952) (3,362.952)
##
## as.factor(state)OREGON 1,065.086 1,065.086
## (3,340.158) (3,340.158)
##
## as.factor(state)PENNSYLVANIA 11,809.930 11,809.930
## (7,478.970) (7,478.970)
##
## as.factor(state)RHODE_ISLAND 2,793.617 2,793.617
## (3,932.614) (3,932.614)
##
## as.factor(state)SOUTH_CAROLINA 5,843.381* 5,843.381*
## (3,446.313) (3,446.313)
##
## as.factor(state)SOUTH_DAKOTA -1,428.932 -1,428.932
## (3,895.531) (3,895.531)
##
## as.factor(state)TENNESSE -4,243.782 -4,243.782
## (3,455.961) (3,455.961)
##
## as.factor(state)TEXAS 73,179.360*** 73,179.360***
## (8,708.798) (8,708.798)
##
## as.factor(state)UTAH -92.931 -92.931
## (3,643.378) (3,643.378)
##
## as.factor(state)VERMONT 742.641 742.641
## (4,063.379) (4,063.379)
##
## as.factor(state)VIRGINIA 10,187.310*** 10,187.310***
## (3,507.875) (3,507.875)
##
## as.factor(state)WASHINGTON -19,465.120*** -19,465.120***
## (4,093.990) (4,093.990)
##
## as.factor(state)WEST_VIRGINIA 966.557 966.557
## (3,538.311) (3,538.311)
##
## as.factor(state)WISCONSIN 1,911.150 1,911.150
## (3,504.954) (3,504.954)
##
## as.factor(state)WYOMING 2,661.946 2,661.946
## (3,938.082) (3,938.082)
##
## as.factor(year)1971 -713.025 -713.025 -713.025
## (1,957.380) (1,957.380) (1,957.380)
##
## as.factor(year)1972 310.307 310.307 310.307
## (1,968.997) (1,968.997) (1,968.997)
##
## as.factor(year)1973 1,810.790 1,810.790 1,810.790
## (1,985.804) (1,985.804) (1,985.804)
##
## as.factor(year)1974 83.941 83.941 83.941
## (2,005.307) (2,005.307) (2,005.307)
##
## as.factor(year)1975 -2,204.030 -2,204.030 -2,204.030
## (2,030.096) (2,030.096) (2,030.096)
##
## as.factor(year)1976 -816.016 -816.016 -816.016
## (2,054.757) (2,054.757) (2,054.757)
##
## as.factor(year)1977 961.151 961.151 961.151
## (2,076.967) (2,076.967) (2,076.967)
##
## as.factor(year)1978 3,379.896 3,379.896 3,379.896
## (2,093.476) (2,093.476) (2,093.476)
##
## as.factor(year)1979 4,107.233* 4,107.233* 4,107.233*
## (2,115.261) (2,115.261) (2,115.261)
##
## as.factor(year)1980 2,790.613 2,790.613 2,790.613
## (2,134.930) (2,134.930) (2,134.930)
##
## as.factor(year)1981 3,221.869 3,221.869 3,221.869
## (2,155.235) (2,155.235) (2,155.235)
##
## as.factor(year)1982 1,520.366 1,520.366 1,520.366
## (2,167.692) (2,167.692) (2,167.692)
##
## as.factor(year)1983 3,349.665 3,349.665 3,349.665
## (2,175.614) (2,175.614) (2,175.614)
##
## as.factor(year)1984 7,834.846*** 7,834.846*** 7,834.846***
## (2,182.975) (2,182.975) (2,182.975)
##
## as.factor(year)1985 10,243.290*** 10,243.290*** 10,243.290***
## (2,195.767) (2,195.767) (2,195.767)
##
## as.factor(year)1986 12,005.980*** 12,005.980*** 12,005.980***
## (2,214.983) (2,214.983) (2,214.983)
##
## Constant -4,686.657 -4,686.657
## (3,467.709) (3,467.709)
##
## --------------------------------------------------------------------------------------------------------------------
## Two Way Fixed effects No Yes Yes
## Observations 816 816 816
## R2 0.983 0.983 0.483
## Adjusted R2 0.981 0.981 0.439
## Residual Std. Error (df = 751) 9,567.755 9,567.755
## F Statistic 669.395*** (df = 64; 751) 669.395*** (df = 64; 751) 41.217*** (df = 17; 751)
## ====================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
# Compare models using stargazer
stargazer(produc_fe_lm_mod, # One-way Fixed Effects (State + Year OLS)
produc_fe_lm_mod, # Two-way Fixed Effects (State + Year OLS)
produc_fe_mod2, # Two-way Fixed Effects (State + Year PLM)
type = "text",
column.labels = c("FE - One way (state)", # Label for the first model
"FE - Two way (state-year) OLS", # Label for the second model
"FE - Two way (state-year) PLM"), # Label for the third model
add.lines = list(c('Two Way Fixed effects', "No", "Yes", "Yes")), # Add additional lines
keep = c("pcap") # Only keep the coefficient of 'pcap'
)
##
## ====================================================================================================================
## Dependent variable:
## -------------------------------------------------------------------------------------
## gsp
## OLS panel
## linear
## FE - One way (state) FE - Two way (state-year) OLS FE - Two way (state-year) PLM
## (1) (2) (3)
## --------------------------------------------------------------------------------------------------------------------
## pcap 2.231*** 2.231*** 2.231***
## (0.160) (0.160) (0.160)
##
## --------------------------------------------------------------------------------------------------------------------
## Two Way Fixed effects No Yes Yes
## Observations 816 816 816
## R2 0.983 0.983 0.483
## Adjusted R2 0.981 0.981 0.439
## Residual Std. Error (df = 751) 9,567.755 9,567.755
## F Statistic 669.395*** (df = 64; 751) 669.395*** (df = 64; 751) 41.217*** (df = 17; 751)
## ====================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
The table compares the effect of pcap (capital per worker) on gsp (Gross State Product) across different regression models:
The coefficient for pcap is identical across all models and highly significant (p < 0.01). This consistency suggests that the estimated effect of pcap on gsp is robust to the inclusion of different types of fixed effects.
Interpretation:
Interpretation:
Consistency of Coefficient: - The coefficient for pcap is the same across all models, which suggests that the effect of pcap on gsp is robust to different fixed effects specifications. This indicates that the inclusion of state and/or year fixed effects does not alter the estimated impact of pcap.
Fixed Effects Control: - Entity Fixed Effects (State): Controls for time-invariant state-specific factors. - Time Fixed Effects (Year): Controls for time-specific effects impacting all states.
Coefficient Specification: - The consistent coefficient across different fixed effects models suggests that the estimated effect of pcap is not sensitive to whether only state fixed effects, both state and year fixed effects, or an OLS model with these fixed effects are used.
Differences in Model Fit: - Models with higher R-squared values (Models 1 and 2) suggest better fit and explanatory power compared to the OLS model (Model 3), which might be due to the way fixed effects are included or estimated. The lower R-squared in Model 3 could indicate that the OLS approach is less effective at capturing the relationship between pcap and gsp when fixed effects are considered.
The fixed effects models (one-way and two-way) provide robust estimates of the impact of pcap on gsp, controlling for both entity-specific and time-specific unobserved heterogeneity. The consistency of the coefficient across these models reinforces the reliability of the estimated effect.
library(psych)
library(dplyr)
# Create a subset of the Produc dataset
set.seed(7) # For reproducibility
Produc_unbalanced <- Produc %>% select(gsp, pcap, year, state)
# Define the indices to replace with NA
indices_to_replace <- sample(x = 1:length(Produc_unbalanced$gsp),
size = round(.3 * length(Produc_unbalanced$gsp)),
replace = TRUE)
# Introduce NA values
Produc_unbalanced$gsp[indices_to_replace] <- NA
# Describe the 'gsp' column to check for missing values and summary statistics
describe(Produc_unbalanced$gsp)
## vars n mean sd median trimmed mad min max range skew
## X1 1 604 62191.14 72889.64 39258 46571.91 38850.05 4354 464550 460196 2.52
## kurtosis se
## X1 7.25 2965.84
# Define the indices to replace with NA
indices_to_replace <- sample(x = 1:length(Produc_unbalanced$pcap),
size = round(.3 * length(Produc_unbalanced$pcap)),
replace = TRUE)
# Introduce NA values
Produc_unbalanced$pcap[indices_to_replace] <- NA
# Describe the 'pcap' column to check for missing values and summary statistics
describe(Produc_unbalanced$pcap)
## vars n mean sd median trimmed mad min max
## X1 1 610 24779.64 27273.33 17331.96 19184.29 15880.73 2627.12 139910.2
## range skew kurtosis se
## X1 137283.1 2.54 7.15 1104.26
vis_dat(Produc_unbalanced)
vis_miss(Produc_unbalanced)
naniar::gg_miss_upset(Produc_unbalanced)
# Count the number of complete cases where neither 'gsp' nor 'pcap' is missing
complete_cases_count <- sum(!is.na(Produc_unbalanced$gsp) & !is.na(Produc_unbalanced$pcap))
# Print the result
complete_cases_count
## [1] 448
# Fit the original model using lm() on the original dataset
produc_fe_lm_mod <- lm(data = Produc,
formula = gsp ~ pcap + state)
# Fit the model with missing values using lm() on the unbalanced dataset
produc_fe_lm_mod_NA <- lm(data = Produc_unbalanced,
formula = gsp ~ pcap + state)
# Compare models using stargazer
stargazer(produc_fe_lm_mod, # Original model
produc_fe_lm_mod_NA, # Model with missing values
type = "text",
column.labels = c("Original Model",
"Model with Missing Values")
)
##
## =======================================================================
## Dependent variable:
## ---------------------------------------------------
## gsp
## Original Model Model with Missing Values
## (1) (2)
## -----------------------------------------------------------------------
## pcap 2.849*** 2.880***
## (0.123) (0.153)
##
## stateARIZONA 1,417.043 1,063.024
## (3,477.431) (4,830.028)
##
## stateARKANSAS 8,836.930** 8,667.037*
## (3,622.879) (4,886.943)
##
## stateCALIFORNIA -28,869.110* -42,130.670**
## (15,061.340) (18,860.550)
##
## stateCOLORADO 9,450.871*** 8,580.249*
## (3,475.876) (4,618.222)
##
## stateCONNECTICUT 8,106.579** 8,508.150*
## (3,458.031) (5,065.977)
##
## stateDELAWARE 7,742.605** 7,474.868
## (3,816.190) (4,987.466)
##
## stateFLORIDA 3,834.969 1,164.984
## (4,426.758) (5,643.557)
##
## stateGEORGIA 3,875.493 5,083.441
## (3,561.960) (5,003.556)
##
## stateIDAHO 10,697.320*** 10,419.830**
## (3,842.528) (5,265.584)
##
## stateILLINOIS 3,271.777 -3,821.207
## (6,328.138) (8,476.500)
##
## stateINDIANA 11,153.230*** 10,012.160**
## (3,537.435) (4,577.965)
##
## stateIOWA 435.169 136.770
## (3,459.567) (4,780.334)
##
## stateKANSAS 6,057.885* 5,619.256
## (3,498.744) (4,734.365)
##
## stateKENTUCKY -1,586.921 -2,164.425
## (3,462.524) (4,581.490)
##
## stateLOUISIANA 15,963.330*** 15,556.230***
## (3,524.499) (4,710.446)
##
## stateMAINE 11,093.310*** 10,863.670**
## (3,822.683) (5,355.404)
##
## stateMARYLAND -8,609.010** -9,675.033**
## (3,589.845) (4,810.924)
##
## stateMASSACHUSETTS 8,782.245** 10,077.900**
## (3,679.486) (4,661.824)
##
## stateMICHIGAN -8,933.172* -11,291.510*
## (5,221.494) (6,695.382)
##
## stateMINNESOTA -10,078.560*** -9,741.399*
## (3,622.343) (5,219.464)
##
## stateMISSISSIPPI 2,611.512 2,144.493
## (3,537.932) (4,895.418)
##
## stateMISSOURI 8,296.786** 8,872.209*
## (3,510.435) (4,950.210)
##
## stateMONTANA 6,836.417* 6,541.229
## (3,772.544) (4,929.055)
##
## stateNEBRASKA -4,116.200 -6,404.624
## (3,510.868) (5,322.530)
##
## stateNEVADA 10,964.180*** 11,057.940**
## (3,814.736) (5,131.226)
##
## stateNEW_HAMPSHIRE 12,011.390*** 12,058.170**
## (3,858.031) (5,187.211)
##
## stateNEW_JERSEY 25,441.660*** 25,286.080***
## (3,859.558) (5,293.799)
##
## stateNEW_MEXICO 10,712.840*** 10,549.750**
## (3,709.609) (5,001.052)
##
## stateNEW_YORK -92,289.210*** -92,963.820***
## (13,937.090) (17,474.420)
##
## stateNORTH_CAROLINA 15,775.570*** 12,551.060***
## (3,493.486) (4,801.048)
##
## stateNORTH_DAKOTA 8,519.442** 8,346.031
## (3,823.523) (5,141.802)
##
## stateOHIO -3,872.339 -5,264.400
## (5,658.491) (7,108.376)
##
## stateOKLAHOMA 16,141.090*** 15,024.280***
## (3,503.559) (5,354.310)
##
## stateOREGON 3,472.348 3,553.079
## (3,490.730) (4,963.229)
##
## statePENNSYLVANIA -14,195.330** -17,392.200**
## (6,203.522) (7,875.815)
##
## stateRHODE_ISLAND 11,178.830*** 10,895.190**
## (3,836.114) (5,157.741)
##
## stateSOUTH_CAROLINA 9,915.473*** 9,962.342*
## (3,550.816) (5,376.027)
##
## stateSOUTH_DAKOTA 6,692.960* 6,530.299
## (3,813.827) (4,986.225)
##
## stateTENNESSE -8,436.518** -8,701.285*
## (3,556.319) (4,677.506)
##
## stateTEXAS 41,964.560*** 43,457.970***
## (7,083.491) (9,212.140)
##
## stateUTAH 6,030.915 5,648.664
## (3,664.538) (5,325.138)
##
## stateVERMONT 10,014.520** 9,873.379*
## (3,915.335) (5,348.827)
##
## stateVIRGINIA 5,392.290 5,903.444
## (3,586.047) (4,658.014)
##
## stateWASHINGTON -28,936.470*** -28,869.960***
## (3,934.019) (5,483.019)
##
## stateWEST_VIRGINIA 6,085.468* 5,688.002
## (3,603.565) (5,096.347)
##
## stateWISCONSIN -2,851.776 -4,153.211
## (3,584.369) (5,015.852)
##
## stateWYOMING 11,085.490*** 10,847.200**
## (3,839.406) (5,364.970)
##
## Constant -12,972.710*** -12,901.990***
## (3,288.499) (4,585.595)
##
## -----------------------------------------------------------------------
## Observations 816 448
## R2 0.980 0.984
## Adjusted R2 0.979 0.982
## Residual Std. Error 10,081.770 (df = 767) 9,474.853 (df = 399)
## F Statistic 801.948*** (df = 48; 767) 521.558*** (df = 48; 399)
## =======================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
I have a little bit of confusion to find CI for unbalanced data.