# Clear the workspace
rm(list = ls()) # Clear environment-remove all files from your workspace
gc()            # Clear unused memory
##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 522159 27.9    1161690 62.1   660385 35.3
## Vcells 953990  7.3    8388608 64.0  1769879 13.6
graphics.off()  # Clear all graphs


# Prepare needed libraries
packages <- c("AER",          # load dataset
              "plm",          # for FE
              "stargazer",    # summary stats for sharing
              "ggplot2",      #for graphing,
              "psych",
              "naniar",      # for visualisation of missing data,
              "visdat",      # for visualisation of missing data,
              "VIM",         # for visualisation of missing data,
              "DataExplorer", # for visualisation of missing data,
              "dplyr",
              "magrittr"
              )

for (i in 1:length(packages)) {
  if (!packages[i] %in% rownames(installed.packages())) {
    install.packages(packages[i]
                     , repos = "http://cran.rstudio.com/"
                     , dependencies = TRUE
                     )
  }
  library(packages[i], character.only = TRUE)
}
## Warning: package 'AER' was built under R version 4.3.3
## Loading required package: car
## Warning: package 'car' was built under R version 4.3.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.3.3
## Loading required package: lmtest
## Warning: package 'lmtest' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## Warning: package 'sandwich' was built under R version 4.3.3
## Loading required package: survival
## Warning: package 'plm' was built under R version 4.3.3
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
## Warning: package 'ggplot2' was built under R version 4.3.3
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## The following object is masked from 'package:car':
## 
##     logit
## Warning: package 'naniar' was built under R version 4.3.3
## Warning: package 'visdat' was built under R version 4.3.2
## Warning: package 'VIM' was built under R version 4.3.3
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
## 
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
## 
##     sleep
## Warning: package 'DataExplorer' was built under R version 4.3.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plm':
## 
##     between, lag, lead
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
rm(packages)

Please choose any panel data (can check out inbuilt R datasets or from the AER package), and show/tell if your data is balanced or not. What is the time component and the entity component in the data?

# Load necessary library and data
library(AER)
data("Produc")

# Calculate the new metric (GSP per 10,000 per capital)
Produc$gsp_per_capita_rate <- with(data = Produc,
                                    expr = gsp / pcap * 10000
                                    )

# Verify the changes
str(Produc)
## 'data.frame':    816 obs. of  12 variables:
##  $ state              : Factor w/ 48 levels "ALABAMA","ARIZONA",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ year               : int  1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 ...
##  $ region             : Factor w/ 9 levels "1","2","3","4",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ pcap               : num  15033 15502 15972 16406 16763 ...
##  $ hwy                : num  7326 7526 7765 7908 8026 ...
##  $ water              : num  1656 1721 1765 1742 1735 ...
##  $ util               : num  6051 6255 6442 6756 7002 ...
##  $ pc                 : num  35794 37300 38670 40084 42057 ...
##  $ gsp                : int  28418 29375 31303 33430 33749 33604 35764 37463 39964 40979 ...
##  $ emp                : num  1010 1022 1072 1136 1170 ...
##  $ unemp              : num  4.7 5.2 4.7 3.9 5.5 7.7 6.8 7.4 6.3 7.1 ...
##  $ gsp_per_capita_rate: num  18904 18949 19598 20376 20133 ...
head(Produc)
##     state year region     pcap     hwy   water    util       pc   gsp    emp
## 1 ALABAMA 1970      6 15032.67 7325.80 1655.68 6051.20 35793.80 28418 1010.5
## 2 ALABAMA 1971      6 15501.94 7525.94 1721.02 6254.98 37299.91 29375 1021.9
## 3 ALABAMA 1972      6 15972.41 7765.42 1764.75 6442.23 38670.30 31303 1072.3
## 4 ALABAMA 1973      6 16406.26 7907.66 1742.41 6756.19 40084.01 33430 1135.5
## 5 ALABAMA 1974      6 16762.67 8025.52 1734.85 7002.29 42057.31 33749 1169.8
## 6 ALABAMA 1975      6 17316.26 8158.23 1752.27 7405.76 43971.71 33604 1155.4
##   unemp gsp_per_capita_rate
## 1   4.7            18904.16
## 2   5.2            18949.24
## 3   4.7            19598.17
## 4   3.9            20376.37
## 5   5.5            20133.43
## 6   7.7            19406.04
# Load the stargazer package
library(stargazer)

# Generate summary statistics table for the Produc dataset
stargazer(Produc, 
          type = "text",   # You can use "html" or "latex" as needed
          digits = 1,      # Number of digits to display
          summary.stat = c("mean", "sd", "min", "max", "n"),  # Statistics to include
          covariate.labels = colnames(Produc) # Optionally rename column labels
          )
## 
## ==================================================
## Statistic   Mean   St. Dev.   Min       Max     N 
## --------------------------------------------------
## state     1,978.0    4.9     1,970     1,986   816
## year      25,036.7 27,780.4 2,627.1  140,217.3 816
## region    10,218.4 9,253.6  1,827.1  47,699.4  816
## pcap      3,618.8  4,311.7   228.5   24,592.3  816
## hwy       11,199.4 14,768.9  538.5   80,728.1  816
## water     58,188.3 59,770.8 4,052.7  375,341.6 816
## util      61,014.3 69,973.9  4,354    464,550  816
## pc        1,747.1  1,856.0   108.3   11,258.0  816
## gsp         6.6      2.2      2.8      18.0    816
## emp       23,379.1 4,610.5  13,683.5 39,415.5  816
## --------------------------------------------------
dim(Produc)
## [1] 816  12

The PRoduc dataset has 816 observations and 12 variables.

observation : regional

country : United States

Format A data frame containing :

state the state

year the year

region the region

pcap public capital stock

hwy highway and streets

water water and sewer facilities

util other public buildings and structures

pc private capital stock

gsp gross state product

emp labor input measured by the employment in non–agricultural payrolls

unemp state unemployment rate

pdim(Produc)
## Balanced Panel: n = 48, T = 17, N = 816
vis_miss(Produc)

I confimed that the Produc dataset is balanced after I used pdim function. The data has 48 observation for 17 years and the total no.of observations are 816 respectively. Also, there is no missing values in this dataset.

# Load necessary libraries
library(ggplot2)

# Define the function to reorder levels
reorder_size <- function(x) {
  factor(x, levels = names(sort(table(x), decreasing = TRUE)))
}

# Create the bar plot using the Produc dataset
ggplot(data = Produc, 
       aes(x = reorder_size(year))) + 
  geom_bar() + 
  xlab("Year") + 
  ylab("Frequency") + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Adjust angle for readability

OLS

Type out meaningful estimating equation and run the OLS regression/estimate the coefficients.

The estimate equation is :

                         gsp= β0 + β1pcap + ϵi

Where, gsp is the Gross State Product. pcap is the physical capital per worker.

# Fit a linear model: Predict 'gsp' based on 'pcap'
produc_lm_mod <- lm(data = Produc,
                     formula = gsp ~ pcap)

# Display the summary of the model
summary(produc_lm_mod)
## 
## Call:
## lm(formula = gsp ~ pcap, data = Produc)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -82383  -4637  -1212   3278 125096 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -133.38588  807.22707  -0.165    0.869    
## pcap           2.44233    0.02159 113.111   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17120 on 814 degrees of freedom
## Multiple R-squared:  0.9402, Adjusted R-squared:  0.9401 
## F-statistic: 1.279e+04 on 1 and 814 DF,  p-value: < 2.2e-16

Do the estimated coefficients make sense (direction, magnitude, statistical significance)?

The Intercept (β0) is -133.39 and pcap (β1) is 2.44233. The coefficient for ‘pcap’(physical capital per worker) is positive (2.44233), which makes sense theoretically. This suggests that an increase in physical capital per worker is associated with an increase in Gross State Product (GSP).

The magnitude of the coefficient (2.44233) indicates that for every unit increase in ‘pcap’ the GSP is expected to increase by approximately 2.44233 units, holding other factors constant.

p-value for for ‘pcap’ < 2e-16 (very small)

The p-value is highly significant (well below 0.05), indicating that the coefficient for pcap is statistically significant. This means there is strong evidence to reject the null hypothesis.

p-value for Intercept: 0.869

The p-value for the intercept is much larger than 0.05, suggesting that the intercept is not significantly different from zero.

R-squared: 0.9402

This indicates that approximately 94% of the variance in GSP is explained by the model with ‘pcap’. The value is really fit well with dataset.

Residual Standard Error: 17120 is measures the average distance between the observed values and the predicted values of the model.

Model: OLS with Dummy Variables

Could there be omitted variable bias that could potentially be reduced by throwing in fixed effects?

The estimate equation is, gsp= β0 +β1pcap + β2emp + ϵi Where, ‘emp’ is number of employees.

# Fit a linear model: Predict 'gsp' based on 'pcap' and 'emp'
produc_lm_mod1 <- lm(data = Produc,
                     formula = gsp ~ pcap + emp)

# Display the summary of the model
summary(produc_lm_mod1)
## 
## Call:
## lm(formula = gsp ~ pcap + emp, data = Produc)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -26080  -4856    766   2989  55592 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.124e+03  4.501e+02  -9.162  < 2e-16 ***
## pcap         2.987e-01  5.036e-02   5.930 4.47e-09 ***
## emp          3.300e+01  7.538e-01  43.783  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9351 on 813 degrees of freedom
## Multiple R-squared:  0.9822, Adjusted R-squared:  0.9821 
## F-statistic: 2.241e+04 on 2 and 813 DF,  p-value: < 2.2e-16
stargazer(produc_lm_mod1,
          type="text"
          )
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                                 gsp            
## -----------------------------------------------
## pcap                         0.299***          
##                               (0.050)          
##                                                
## emp                          33.004***         
##                               (0.754)          
##                                                
## Constant                   -4,123.804***       
##                              (450.110)         
##                                                
## -----------------------------------------------
## Observations                    816            
## R2                             0.982           
## Adjusted R2                    0.982           
## Residual Std. Error    9,350.783 (df = 813)    
## F Statistic         22,412.880*** (df = 2; 813)
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
stargazer(produc_lm_mod, produc_lm_mod1, 
          type  = 'text',
          column.labels = c("OLS with no State Dummy","OLS with State Dummy")
          )
## 
## ===========================================================================
##                                       Dependent variable:                  
##                     -------------------------------------------------------
##                                               gsp                          
##                       OLS with no State Dummy      OLS with State Dummy    
##                                 (1)                         (2)            
## ---------------------------------------------------------------------------
## pcap                         2.442***                    0.299***          
##                               (0.022)                     (0.050)          
##                                                                            
## emp                                                      33.004***         
##                                                           (0.754)          
##                                                                            
## Constant                     -133.386                  -4,123.804***       
##                              (807.227)                   (450.110)         
##                                                                            
## ---------------------------------------------------------------------------
## Observations                    816                         816            
## R2                             0.940                       0.982           
## Adjusted R2                    0.940                       0.982           
## Residual Std. Error    17,124.400 (df = 814)       9,350.783 (df = 813)    
## F Statistic         12,794.140*** (df = 1; 814) 22,412.880*** (df = 2; 813)
## ===========================================================================
## Note:                                           *p<0.1; **p<0.05; ***p<0.01

Model 1: OLS with No State Dummy

Coefficient for ‘pcap’

Estimate: 2.442 Standard Error: 0.022 Significance: *** (p < 0.01)

Interpretation: For each unit increase in pcap and gsp increases by 2.442 units. This coefficient is highly statistically significant, suggesting a strong positive relationship between pcap and gsp.

Constant:

Estimate: -133.386 Standard Error: 807.227 Significance: Not statistically significant (no asterisks)

Interpretation: The intercept term represents the estimated pcap when gsp is zero, but it is not statistically significant in this model.

R-squared: 0.940

Interpretation: The model explains 94% of the variability in gsp which is very high, indicating a good fit.

Adjusted R-squared: 0.940

Interpretation: Adjusted for the number of predictors in the model. It remains high, indicating that adding pcap provides a good explanation of gsp.

Residual Std. Error: 17,124.400

Interpretation: This is the standard deviation of the residuals. A lower value would indicate a better fit. F Statistic: 12,794.140 (df = 1; 814)

Interpretation: This is a test for the overall significance of the model. The high value and significance (p < 0.01) indicate that the model is statistically significant.

Model 2: OLS with State Dummy

Coefficient for pcap:

Estimate: 0.299 Standard Error: 0.050 Significance: *** (p < 0.01) Interpretation: For each unit increase in pcap, gsp increases by 0.299 units. This is significantly lower compared to the coefficient in Model 1, suggesting that controlling for state-specific effects reduces the impact of pcap on gsp.

Coefficient for emp:

Estimate: 33.004 Standard Error: 0.754 Significance: *** (p < 0.01) Interpretation: For each unit increase in emp, gsp increases by 33.004 units. This is a large and highly significant effect, indicating that employment has a strong positive impact on gsp. Constant:

Estimate: -4,123.804 Standard Error: 450.110 Significance: *** (p < 0.01) Interpretation: The intercept term is now significant, reflecting the adjusted baseline level of gsp when both pcap and emp are zero, after controlling for state-specific effects. R-squared: 0.982

Interpretation: The model explains 98.2% of the variability in gsp, indicating an even better fit compared to Model 1. Adjusted R-squared: 0.982

Interpretation: The high value suggests that the model, even after adding more variables, still provides a good explanation of gsp. Residual Std. Error: 9,350.783

Interpretation: A lower residual standard error compared to Model 1, indicating improved model fit with state dummies included. F Statistic: 22,412.880 (df = 2; 813)

Interpretation: A very high F-statistic, indicating that the model with state dummies is highly significant.

Comparison and Interpretation

Effect of pcap: The coefficient of pcap decreases from 2.442 in Model 1 to 0.299 in Model 2. This significant drop suggests that part of the effect of pcap on gsp in Model 1 was actually capturing unobserved state-specific effects. Including state dummies controls for these unobserved factors, showing that the direct impact of pcap is less pronounced once state-specific effects are accounted for.

New Variable emp: In Model 2, emp is included and is highly significant. This suggests that employment has a significant impact on gsp and that state-specific effects may be more important for explaining variability in gsp than just pcap.

Model Fit: R-squared and adjusted R-squared improve significantly in Model 2, indicating a better fit with state dummies included.

Residuals and Significance: The reduced residual standard error and higher F-statistic in Model 2 suggest that including state dummies has improved the model’s explanatory power and fit.

Model: Demeaned Regression

# Obtain demeaned data by year
Produc_demeaned <- with(data = Produc,
                        expr = data.frame(
                          gsp     = gsp     - ave(x = gsp, year),  # Demean 'gsp' by 'year'
                          pcap    = pcap    - ave(x = pcap, year), # Demean 'pcap' by 'year'
                          emp     = emp     - ave(x = emp, year),  # Demean 'emp' by 'year'
                          unemp   = unemp   - ave(x = unemp, year) # Demean 'unemp' by 'year'
                        )
                       )
# Fit the linear model to the demeaned data
produc_demeaned_lm_mod <- lm(data = Produc_demeaned,
                             formula = gsp ~ pcap - 1)  # '- 1' indicates no intercept

# Display the summary of the model
summary(produc_demeaned_lm_mod)
## 
## Call:
## lm(formula = gsp ~ pcap - 1, data = Produc_demeaned)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -78176  -5102   -391   3438 117032 
## 
## Coefficients:
##      Estimate Std. Error t value Pr(>|t|)    
## pcap  2.43671    0.02117   115.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16740 on 815 degrees of freedom
## Multiple R-squared:  0.9421, Adjusted R-squared:  0.942 
## F-statistic: 1.325e+04 on 1 and 815 DF,  p-value: < 2.2e-16

Model: Fixed Effects

Now, run a fixed effects model (there are three different ways to do so)

library(plm)

# Estimate the fixed effects regression with plm()
produc_fe_mod <- plm(formula = gsp ~ pcap, 
                     data    = Produc,
                     index   = c("state", "year"), # declaring data to be panel
                     model   = "within"            # fixed effects model
                     )

# Display the summary of the model
summary(produc_fe_mod)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = gsp ~ pcap, data = Produc, model = "within", index = c("state", 
##     "year"))
## 
## Balanced Panel: n = 48, T = 17, N = 816
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -69428.85  -1791.38   -190.95   1470.69 110204.09 
## 
## Coefficients:
##      Estimate Std. Error t-value  Pr(>|t|)    
## pcap  2.84940    0.12257  23.247 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    1.3289e+11
## Residual Sum of Squares: 7.796e+10
## R-Squared:      0.41335
## Adj. R-Squared: 0.37664
## F-statistic: 540.43 on 1 and 767 DF, p-value: < 2.22e-16
# print summary using robust standard errors
coeftest(produc_fe_mod, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##      Estimate Std. Error t value  Pr(>|t|)    
## pcap  2.84940    0.56487  5.0443 5.686e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
stargazer(                                            # OLS
          produc_lm_mod1, produc_demeaned_lm_mod,  produc_fe_mod,    # fixed effect
                  type  = "text",
          column.labels = c(#"OLS - no Dummy",
                            "OLS - State Dummy",
                            "Demeaned Reg",
                            "FE"
                            )
          )
## 
## ====================================================================================================
##                                                   Dependent variable:                               
##                     --------------------------------------------------------------------------------
##                                                           gsp                                       
##                                               OLS                                    panel          
##                                                                                      linear         
##                          OLS - State Dummy             Demeaned Reg                    FE           
##                                 (1)                         (2)                       (3)           
## ----------------------------------------------------------------------------------------------------
## pcap                         0.299***                    2.437***                   2.849***        
##                               (0.050)                     (0.021)                   (0.123)         
##                                                                                                     
## emp                          33.004***                                                              
##                               (0.754)                                                               
##                                                                                                     
## Constant                   -4,123.804***                                                            
##                              (450.110)                                                              
##                                                                                                     
## ----------------------------------------------------------------------------------------------------
## Observations                    816                         816                       816           
## R2                             0.982                       0.942                     0.413          
## Adjusted R2                    0.982                       0.942                     0.377          
## Residual Std. Error    9,350.783 (df = 813)        16,744.560 (df = 815)                            
## F Statistic         22,412.880*** (df = 2; 813) 13,252.270*** (df = 1; 815) 540.430*** (df = 1; 767)
## ====================================================================================================
## Note:                                                                    *p<0.1; **p<0.05; ***p<0.01

Comparison of Model Results

  1. OLS with State Dummy (Model 1):
    • This is a standard OLS regression that includes state dummies to control for state-specific effects.
  2. Panel Linear Model (Demeaned Regression) (Model 2):
    • This is a linear panel model using demeaned (within) data, which essentially corresponds to the fixed effects (FE) model in the panel data context.
  3. Fixed Effects Model (FE) (Model 3):
    • This is the fixed effects model estimated directly, which accounts for state-specific (entity) effects.

FE limitations

Two way limitations

# Load necessary libraries
library(AER)
library(plm)
library(stargazer)

# Fit the one-way fixed effects model using lm() for the Produc dataset
produc_fe_lm_mod <- lm(data = Produc,
                       formula = gsp ~ pcap + as.factor(state) + as.factor(year))

# Estimate the two-way fixed effects regression with plm()
produc_fe_mod2 <- plm(formula = gsp ~ pcap + as.factor(year), 
                      data    = Produc,
                      index   = c("state", "year"), # Declaring data to be panel
                      model   = "within")           # Fixed effects model

# Compare models using stargazer
stargazer(produc_fe_lm_mod,  # One-way Fixed Effects (State + Year OLS)
          produc_fe_lm_mod,  # Two-way Fixed Effects (State + Year OLS)
          produc_fe_mod2,    # Two-way Fixed Effects (PLM)
          type  = "text",
          column.labels = c("FE - One way (state)",
                            "FE - Two way (state-year) OLS",
                            "FE - Two way (state-year) PLM"),
          add.lines = list(c('Two Way Fixed effects', "No", "Yes", "Yes"))
          )
## 
## ====================================================================================================================
##                                                                 Dependent variable:                                 
##                                -------------------------------------------------------------------------------------
##                                                                         gsp                                         
##                                                          OLS                                       panel            
##                                                                                                   linear            
##                                  FE - One way (state)    FE - Two way (state-year) OLS FE - Two way (state-year) PLM
##                                           (1)                         (2)                           (3)             
## --------------------------------------------------------------------------------------------------------------------
## pcap                                   2.231***                    2.231***                      2.231***           
##                                         (0.160)                     (0.160)                       (0.160)           
##                                                                                                                     
## as.factor(state)ARIZONA                -435.620                    -435.620                                         
##                                       (3,316.455)                 (3,316.455)                                       
##                                                                                                                     
## as.factor(state)ARKANSAS               3,381.503                   3,381.503                                        
##                                       (3,571.735)                 (3,571.735)                                       
##                                                                                                                     
## as.factor(state)CALIFORNIA           45,147.830**                45,147.830**                                       
##                                      (19,407.610)                (19,407.610)                                       
##                                                                                                                     
## as.factor(state)COLORADO              7,674.150**                 7,674.150**                                       
##                                       (3,313.678)                 (3,313.678)                                       
##                                                                                                                     
## as.factor(state)CONNECTICUT           8,061.302**                 8,061.302**                                       
##                                       (3,281.734)                 (3,281.734)                                       
##                                                                                                                     
## as.factor(state)DELAWARE               -407.535                    -407.535                                         
##                                       (3,899.469)                 (3,899.469)                                       
##                                                                                                                     
## as.factor(state)FLORIDA              17,789.810***               17,789.810***                                      
##                                       (4,875.994)                 (4,875.994)                                       
##                                                                                                                     
## as.factor(state)GEORGIA               8,188.580**                 8,188.580**                                       
##                                       (3,465.838)                 (3,465.838)                                       
##                                                                                                                     
## as.factor(state)IDAHO                  2,237.565                   2,237.565                                        
##                                       (3,943.262)                 (3,943.262)                                       
##                                                                                                                     
## as.factor(state)ILLINOIS             30,031.580***               30,031.580***                                      
##                                       (7,654.671)                 (7,654.671)                                       
##                                                                                                                     
## as.factor(state)INDIANA              14,916.710***               14,916.710***                                      
##                                       (3,422.802)                 (3,422.802)                                       
##                                                                                                                     
## as.factor(state)IOWA                    -87.358                     -87.358                                         
##                                       (3,284.490)                 (3,284.490)                                       
##                                                                                                                     
## as.factor(state)KANSAS                 3,370.320                   3,370.320                                        
##                                       (3,354.405)                 (3,354.405)                                       
##                                                                                                                     
## as.factor(state)KENTUCKY               -695.338                    -695.338                                         
##                                       (3,289.792)                 (3,289.792)                                       
##                                                                                                                     
## as.factor(state)LOUISIANA            19,403.480***               19,403.480***                                      
##                                       (3,400.003)                 (3,400.003)                                       
##                                                                                                                     
## as.factor(state)MAINE                  2,865.959                   2,865.959                                        
##                                       (3,910.283)                 (3,910.283)                                       
##                                                                                                                     
## as.factor(state)MARYLAND              -3,742.072                  -3,742.072                                        
##                                       (3,514.484)                 (3,514.484)                                       
##                                                                                                                     
## as.factor(state)MASSACHUSETTS        15,130.500***               15,130.500***                                      
##                                       (3,668.940)                 (3,668.940)                                       
##                                                                                                                     
## as.factor(state)MICHIGAN              10,821.020*                 10,821.020*                                       
##                                       (6,068.878)                 (6,068.878)                                       
##                                                                                                                     
## as.factor(state)MINNESOTA             -4,632.223                  -4,632.223                                        
##                                       (3,570.809)                 (3,570.809)                                       
##                                                                                                                     
## as.factor(state)MISSISSIPPI           -1,163.844                  -1,163.844                                        
##                                       (3,423.676)                 (3,423.676)                                       
##                                                                                                                     
## as.factor(state)MISSOURI             11,348.380***               11,348.380***                                      
##                                       (3,375.139)                 (3,375.139)                                       
##                                                                                                                     
## as.factor(state)MONTANA                -778.071                    -778.071                                         
##                                       (3,826.474)                 (3,826.474)                                       
##                                                                                                                     
## as.factor(state)NEBRASKA             -7,180.471**                -7,180.471**                                       
##                                       (3,375.906)                 (3,375.906)                                       
##                                                                                                                     
## as.factor(state)NEVADA                 2,831.423                   2,831.423                                        
##                                       (3,897.046)                 (3,897.046)                                       
##                                                                                                                     
## as.factor(state)NEW_HAMPSHIRE          3,373.618                   3,373.618                                        
##                                       (3,968.953)                 (3,968.953)                                       
##                                                                                                                     
## as.factor(state)NEW_JERSEY           34,096.800***               34,096.800***                                      
##                                       (3,971.481)                 (3,971.481)                                       
##                                                                                                                     
## as.factor(state)NEW_MEXICO             3,932.344                   3,932.344                                        
##                                       (3,720.231)                 (3,720.231)                                       
##                                                                                                                     
## as.factor(state)NEW_YORK              -24,117.880                 -24,117.880                                       
##                                      (17,920.510)                (17,920.510)                                       
##                                                                                                                     
## as.factor(state)NORTH_CAROLINA       18,282.710***               18,282.710***                                      
##                                       (3,345.062)                 (3,345.062)                                       
##                                                                                                                     
## as.factor(state)NORTH_DAKOTA            282.151                     282.151                                         
##                                       (3,911.680)                 (3,911.680)                                       
##                                                                                                                     
## as.factor(state)OHIO                 18,742.790***               18,742.790***                                      
##                                       (6,702.743)                 (6,702.743)                                       
##                                                                                                                     
## as.factor(state)OKLAHOMA             13,298.080***               13,298.080***                                      
##                                       (3,362.952)                 (3,362.952)                                       
##                                                                                                                     
## as.factor(state)OREGON                 1,065.086                   1,065.086                                        
##                                       (3,340.158)                 (3,340.158)                                       
##                                                                                                                     
## as.factor(state)PENNSYLVANIA          11,809.930                  11,809.930                                        
##                                       (7,478.970)                 (7,478.970)                                       
##                                                                                                                     
## as.factor(state)RHODE_ISLAND           2,793.617                   2,793.617                                        
##                                       (3,932.614)                 (3,932.614)                                       
##                                                                                                                     
## as.factor(state)SOUTH_CAROLINA        5,843.381*                  5,843.381*                                        
##                                       (3,446.313)                 (3,446.313)                                       
##                                                                                                                     
## as.factor(state)SOUTH_DAKOTA          -1,428.932                  -1,428.932                                        
##                                       (3,895.531)                 (3,895.531)                                       
##                                                                                                                     
## as.factor(state)TENNESSE              -4,243.782                  -4,243.782                                        
##                                       (3,455.961)                 (3,455.961)                                       
##                                                                                                                     
## as.factor(state)TEXAS                73,179.360***               73,179.360***                                      
##                                       (8,708.798)                 (8,708.798)                                       
##                                                                                                                     
## as.factor(state)UTAH                    -92.931                     -92.931                                         
##                                       (3,643.378)                 (3,643.378)                                       
##                                                                                                                     
## as.factor(state)VERMONT                 742.641                     742.641                                         
##                                       (4,063.379)                 (4,063.379)                                       
##                                                                                                                     
## as.factor(state)VIRGINIA             10,187.310***               10,187.310***                                      
##                                       (3,507.875)                 (3,507.875)                                       
##                                                                                                                     
## as.factor(state)WASHINGTON          -19,465.120***              -19,465.120***                                      
##                                       (4,093.990)                 (4,093.990)                                       
##                                                                                                                     
## as.factor(state)WEST_VIRGINIA           966.557                     966.557                                         
##                                       (3,538.311)                 (3,538.311)                                       
##                                                                                                                     
## as.factor(state)WISCONSIN              1,911.150                   1,911.150                                        
##                                       (3,504.954)                 (3,504.954)                                       
##                                                                                                                     
## as.factor(state)WYOMING                2,661.946                   2,661.946                                        
##                                       (3,938.082)                 (3,938.082)                                       
##                                                                                                                     
## as.factor(year)1971                    -713.025                    -713.025                      -713.025           
##                                       (1,957.380)                 (1,957.380)                   (1,957.380)         
##                                                                                                                     
## as.factor(year)1972                     310.307                     310.307                       310.307           
##                                       (1,968.997)                 (1,968.997)                   (1,968.997)         
##                                                                                                                     
## as.factor(year)1973                    1,810.790                   1,810.790                     1,810.790          
##                                       (1,985.804)                 (1,985.804)                   (1,985.804)         
##                                                                                                                     
## as.factor(year)1974                     83.941                      83.941                        83.941            
##                                       (2,005.307)                 (2,005.307)                   (2,005.307)         
##                                                                                                                     
## as.factor(year)1975                   -2,204.030                  -2,204.030                    -2,204.030          
##                                       (2,030.096)                 (2,030.096)                   (2,030.096)         
##                                                                                                                     
## as.factor(year)1976                    -816.016                    -816.016                      -816.016           
##                                       (2,054.757)                 (2,054.757)                   (2,054.757)         
##                                                                                                                     
## as.factor(year)1977                     961.151                     961.151                       961.151           
##                                       (2,076.967)                 (2,076.967)                   (2,076.967)         
##                                                                                                                     
## as.factor(year)1978                    3,379.896                   3,379.896                     3,379.896          
##                                       (2,093.476)                 (2,093.476)                   (2,093.476)         
##                                                                                                                     
## as.factor(year)1979                   4,107.233*                  4,107.233*                    4,107.233*          
##                                       (2,115.261)                 (2,115.261)                   (2,115.261)         
##                                                                                                                     
## as.factor(year)1980                    2,790.613                   2,790.613                     2,790.613          
##                                       (2,134.930)                 (2,134.930)                   (2,134.930)         
##                                                                                                                     
## as.factor(year)1981                    3,221.869                   3,221.869                     3,221.869          
##                                       (2,155.235)                 (2,155.235)                   (2,155.235)         
##                                                                                                                     
## as.factor(year)1982                    1,520.366                   1,520.366                     1,520.366          
##                                       (2,167.692)                 (2,167.692)                   (2,167.692)         
##                                                                                                                     
## as.factor(year)1983                    3,349.665                   3,349.665                     3,349.665          
##                                       (2,175.614)                 (2,175.614)                   (2,175.614)         
##                                                                                                                     
## as.factor(year)1984                  7,834.846***                7,834.846***                  7,834.846***         
##                                       (2,182.975)                 (2,182.975)                   (2,182.975)         
##                                                                                                                     
## as.factor(year)1985                  10,243.290***               10,243.290***                 10,243.290***        
##                                       (2,195.767)                 (2,195.767)                   (2,195.767)         
##                                                                                                                     
## as.factor(year)1986                  12,005.980***               12,005.980***                 12,005.980***        
##                                       (2,214.983)                 (2,214.983)                   (2,214.983)         
##                                                                                                                     
## Constant                              -4,686.657                  -4,686.657                                        
##                                       (3,467.709)                 (3,467.709)                                       
##                                                                                                                     
## --------------------------------------------------------------------------------------------------------------------
## Two Way Fixed effects                     No                          Yes                           Yes             
## Observations                              816                         816                           816             
## R2                                       0.983                       0.983                         0.483            
## Adjusted R2                              0.981                       0.981                         0.439            
## Residual Std. Error (df = 751)         9,567.755                   9,567.755                                        
## F Statistic                    669.395*** (df = 64; 751)   669.395*** (df = 64; 751)     41.217*** (df = 17; 751)   
## ====================================================================================================================
## Note:                                                                                    *p<0.1; **p<0.05; ***p<0.01
# Compare models using stargazer
stargazer(produc_fe_lm_mod,  # One-way Fixed Effects (State + Year OLS)
          produc_fe_lm_mod,  # Two-way Fixed Effects (State + Year OLS)
          produc_fe_mod2,    # Two-way Fixed Effects (State + Year PLM)
          type  = "text",
          column.labels = c("FE - One way (state)",  # Label for the first model
                            "FE - Two way (state-year) OLS",  # Label for the second model
                            "FE - Two way (state-year) PLM"),  # Label for the third model
          add.lines = list(c('Two Way Fixed effects', "No", "Yes", "Yes")),  # Add additional lines
          keep = c("pcap")  # Only keep the coefficient of 'pcap'
          )
## 
## ====================================================================================================================
##                                                                 Dependent variable:                                 
##                                -------------------------------------------------------------------------------------
##                                                                         gsp                                         
##                                                          OLS                                       panel            
##                                                                                                   linear            
##                                  FE - One way (state)    FE - Two way (state-year) OLS FE - Two way (state-year) PLM
##                                           (1)                         (2)                           (3)             
## --------------------------------------------------------------------------------------------------------------------
## pcap                                   2.231***                    2.231***                      2.231***           
##                                         (0.160)                     (0.160)                       (0.160)           
##                                                                                                                     
## --------------------------------------------------------------------------------------------------------------------
## Two Way Fixed effects                     No                          Yes                           Yes             
## Observations                              816                         816                           816             
## R2                                       0.983                       0.983                         0.483            
## Adjusted R2                              0.981                       0.981                         0.439            
## Residual Std. Error (df = 751)         9,567.755                   9,567.755                                        
## F Statistic                    669.395*** (df = 64; 751)   669.395*** (df = 64; 751)     41.217*** (df = 17; 751)   
## ====================================================================================================================
## Note:                                                                                    *p<0.1; **p<0.05; ***p<0.01

Interpretation of Results

The table compares the effect of pcap (capital per worker) on gsp (Gross State Product) across different regression models:

  1. FE - One Way (State) (Model 1):
    • Fixed effects model controlling for state-specific (entity) effects only.
  2. FE - Two Way (State-Year) (Model 2):
    • Fixed effects model controlling for both state-specific (entity) effects and year-specific effects.
  3. OLS FE - Two Way (State-Year) (Model 3):
    • OLS model with both state and year fixed effects.

Summary of Results

  1. Coefficient for pcap:

The coefficient for pcap is identical across all models and highly significant (p < 0.01). This consistency suggests that the estimated effect of pcap on gsp is robust to the inclusion of different types of fixed effects.

  1. Fixed Effects Control:
  1. R-Squared and Adjusted R-Squared:

Interpretation:

  1. Residual Standard Error and F-Statistic:

Interpretation:

Consistency of Coefficient: - The coefficient for pcap is the same across all models, which suggests that the effect of pcap on gsp is robust to different fixed effects specifications. This indicates that the inclusion of state and/or year fixed effects does not alter the estimated impact of pcap.

Fixed Effects Control: - Entity Fixed Effects (State): Controls for time-invariant state-specific factors. - Time Fixed Effects (Year): Controls for time-specific effects impacting all states.

Coefficient Specification: - The consistent coefficient across different fixed effects models suggests that the estimated effect of pcap is not sensitive to whether only state fixed effects, both state and year fixed effects, or an OLS model with these fixed effects are used.

Differences in Model Fit: - Models with higher R-squared values (Models 1 and 2) suggest better fit and explanatory power compared to the OLS model (Model 3), which might be due to the way fixed effects are included or estimated. The lower R-squared in Model 3 could indicate that the OLS approach is less effective at capturing the relationship between pcap and gsp when fixed effects are considered.

The fixed effects models (one-way and two-way) provide robust estimates of the impact of pcap on gsp, controlling for both entity-specific and time-specific unobserved heterogeneity. The consistency of the coefficient across these models reinforces the reliability of the estimated effect.

Unbalanced Panel

library(psych)
library(dplyr)
# Create a subset of the Produc dataset
set.seed(7)  # For reproducibility
Produc_unbalanced <- Produc %>% select(gsp, pcap, year, state)

# Define the indices to replace with NA
indices_to_replace <- sample(x = 1:length(Produc_unbalanced$gsp),
                              size = round(.3 * length(Produc_unbalanced$gsp)), 
                              replace = TRUE)

# Introduce NA values
Produc_unbalanced$gsp[indices_to_replace] <- NA

# Describe the 'gsp' column to check for missing values and summary statistics
describe(Produc_unbalanced$gsp)
##    vars   n     mean       sd median  trimmed      mad  min    max  range skew
## X1    1 604 62191.14 72889.64  39258 46571.91 38850.05 4354 464550 460196 2.52
##    kurtosis      se
## X1     7.25 2965.84
# Define the indices to replace with NA
indices_to_replace <- sample(x = 1:length(Produc_unbalanced$pcap),
                              size = round(.3 * length(Produc_unbalanced$pcap)), 
                              replace = TRUE)

# Introduce NA values
Produc_unbalanced$pcap[indices_to_replace] <- NA

# Describe the 'pcap' column to check for missing values and summary statistics
describe(Produc_unbalanced$pcap)
##    vars   n     mean       sd   median  trimmed      mad     min      max
## X1    1 610 24779.64 27273.33 17331.96 19184.29 15880.73 2627.12 139910.2
##       range skew kurtosis      se
## X1 137283.1 2.54     7.15 1104.26
vis_dat(Produc_unbalanced)

vis_miss(Produc_unbalanced)

naniar::gg_miss_upset(Produc_unbalanced)

# Count the number of complete cases where neither 'gsp' nor 'pcap' is missing
complete_cases_count <- sum(!is.na(Produc_unbalanced$gsp) & !is.na(Produc_unbalanced$pcap))

# Print the result
complete_cases_count
## [1] 448

Create the CI

# Fit the original model using lm() on the original dataset
produc_fe_lm_mod <- lm(data = Produc,
                       formula = gsp ~ pcap + state)

# Fit the model with missing values using lm() on the unbalanced dataset
produc_fe_lm_mod_NA <- lm(data = Produc_unbalanced,
                          formula = gsp ~ pcap + state)

# Compare models using stargazer
stargazer(produc_fe_lm_mod,    # Original model
          produc_fe_lm_mod_NA, # Model with missing values
          type  = "text",
          column.labels = c("Original Model",
                            "Model with Missing Values")
          )
## 
## =======================================================================
##                                     Dependent variable:                
##                     ---------------------------------------------------
##                                             gsp                        
##                          Original Model       Model with Missing Values
##                                (1)                       (2)           
## -----------------------------------------------------------------------
## pcap                        2.849***                  2.880***         
##                              (0.123)                   (0.153)         
##                                                                        
## stateARIZONA                1,417.043                 1,063.024        
##                            (3,477.431)               (4,830.028)       
##                                                                        
## stateARKANSAS              8,836.930**               8,667.037*        
##                            (3,622.879)               (4,886.943)       
##                                                                        
## stateCALIFORNIA           -28,869.110*              -42,130.670**      
##                           (15,061.340)              (18,860.550)       
##                                                                        
## stateCOLORADO             9,450.871***               8,580.249*        
##                            (3,475.876)               (4,618.222)       
##                                                                        
## stateCONNECTICUT           8,106.579**               8,508.150*        
##                            (3,458.031)               (5,065.977)       
##                                                                        
## stateDELAWARE              7,742.605**                7,474.868        
##                            (3,816.190)               (4,987.466)       
##                                                                        
## stateFLORIDA                3,834.969                 1,164.984        
##                            (4,426.758)               (5,643.557)       
##                                                                        
## stateGEORGIA                3,875.493                 5,083.441        
##                            (3,561.960)               (5,003.556)       
##                                                                        
## stateIDAHO                10,697.320***             10,419.830**       
##                            (3,842.528)               (5,265.584)       
##                                                                        
## stateILLINOIS               3,271.777                -3,821.207        
##                            (6,328.138)               (8,476.500)       
##                                                                        
## stateINDIANA              11,153.230***             10,012.160**       
##                            (3,537.435)               (4,577.965)       
##                                                                        
## stateIOWA                    435.169                   136.770         
##                            (3,459.567)               (4,780.334)       
##                                                                        
## stateKANSAS                6,057.885*                 5,619.256        
##                            (3,498.744)               (4,734.365)       
##                                                                        
## stateKENTUCKY              -1,586.921                -2,164.425        
##                            (3,462.524)               (4,581.490)       
##                                                                        
## stateLOUISIANA            15,963.330***             15,556.230***      
##                            (3,524.499)               (4,710.446)       
##                                                                        
## stateMAINE                11,093.310***             10,863.670**       
##                            (3,822.683)               (5,355.404)       
##                                                                        
## stateMARYLAND             -8,609.010**              -9,675.033**       
##                            (3,589.845)               (4,810.924)       
##                                                                        
## stateMASSACHUSETTS         8,782.245**              10,077.900**       
##                            (3,679.486)               (4,661.824)       
##                                                                        
## stateMICHIGAN              -8,933.172*              -11,291.510*       
##                            (5,221.494)               (6,695.382)       
##                                                                        
## stateMINNESOTA           -10,078.560***              -9,741.399*       
##                            (3,622.343)               (5,219.464)       
##                                                                        
## stateMISSISSIPPI            2,611.512                 2,144.493        
##                            (3,537.932)               (4,895.418)       
##                                                                        
## stateMISSOURI              8,296.786**               8,872.209*        
##                            (3,510.435)               (4,950.210)       
##                                                                        
## stateMONTANA               6,836.417*                 6,541.229        
##                            (3,772.544)               (4,929.055)       
##                                                                        
## stateNEBRASKA              -4,116.200                -6,404.624        
##                            (3,510.868)               (5,322.530)       
##                                                                        
## stateNEVADA               10,964.180***             11,057.940**       
##                            (3,814.736)               (5,131.226)       
##                                                                        
## stateNEW_HAMPSHIRE        12,011.390***             12,058.170**       
##                            (3,858.031)               (5,187.211)       
##                                                                        
## stateNEW_JERSEY           25,441.660***             25,286.080***      
##                            (3,859.558)               (5,293.799)       
##                                                                        
## stateNEW_MEXICO           10,712.840***             10,549.750**       
##                            (3,709.609)               (5,001.052)       
##                                                                        
## stateNEW_YORK            -92,289.210***            -92,963.820***      
##                           (13,937.090)              (17,474.420)       
##                                                                        
## stateNORTH_CAROLINA       15,775.570***             12,551.060***      
##                            (3,493.486)               (4,801.048)       
##                                                                        
## stateNORTH_DAKOTA          8,519.442**                8,346.031        
##                            (3,823.523)               (5,141.802)       
##                                                                        
## stateOHIO                  -3,872.339                -5,264.400        
##                            (5,658.491)               (7,108.376)       
##                                                                        
## stateOKLAHOMA             16,141.090***             15,024.280***      
##                            (3,503.559)               (5,354.310)       
##                                                                        
## stateOREGON                 3,472.348                 3,553.079        
##                            (3,490.730)               (4,963.229)       
##                                                                        
## statePENNSYLVANIA         -14,195.330**             -17,392.200**      
##                            (6,203.522)               (7,875.815)       
##                                                                        
## stateRHODE_ISLAND         11,178.830***             10,895.190**       
##                            (3,836.114)               (5,157.741)       
##                                                                        
## stateSOUTH_CAROLINA       9,915.473***               9,962.342*        
##                            (3,550.816)               (5,376.027)       
##                                                                        
## stateSOUTH_DAKOTA          6,692.960*                 6,530.299        
##                            (3,813.827)               (4,986.225)       
##                                                                        
## stateTENNESSE             -8,436.518**               -8,701.285*       
##                            (3,556.319)               (4,677.506)       
##                                                                        
## stateTEXAS                41,964.560***             43,457.970***      
##                            (7,083.491)               (9,212.140)       
##                                                                        
## stateUTAH                   6,030.915                 5,648.664        
##                            (3,664.538)               (5,325.138)       
##                                                                        
## stateVERMONT              10,014.520**               9,873.379*        
##                            (3,915.335)               (5,348.827)       
##                                                                        
## stateVIRGINIA               5,392.290                 5,903.444        
##                            (3,586.047)               (4,658.014)       
##                                                                        
## stateWASHINGTON          -28,936.470***            -28,869.960***      
##                            (3,934.019)               (5,483.019)       
##                                                                        
## stateWEST_VIRGINIA         6,085.468*                 5,688.002        
##                            (3,603.565)               (5,096.347)       
##                                                                        
## stateWISCONSIN             -2,851.776                -4,153.211        
##                            (3,584.369)               (5,015.852)       
##                                                                        
## stateWYOMING              11,085.490***             10,847.200**       
##                            (3,839.406)               (5,364.970)       
##                                                                        
## Constant                 -12,972.710***            -12,901.990***      
##                            (3,288.499)               (4,585.595)       
##                                                                        
## -----------------------------------------------------------------------
## Observations                   816                       448           
## R2                            0.980                     0.984          
## Adjusted R2                   0.979                     0.982          
## Residual Std. Error   10,081.770 (df = 767)     9,474.853 (df = 399)   
## F Statistic         801.948*** (df = 48; 767) 521.558*** (df = 48; 399)
## =======================================================================
## Note:                                       *p<0.1; **p<0.05; ***p<0.01

I have a little bit of confusion to find CI for unbalanced data.