1 Preliminaries

The code for this document is largely based on the public materials of the econometric academy.

1.2 Clarify your workflow

1.3 Load packages

library(tidyverse) # Modern data science library 
library(plm)       # Panel data analysis library

2 Data Import and Tidying

dataWage <- read_csv("https://github.com/ds777/sample-datasets/blob/master/dataWage.csv?raw=true")

Declare the dataset as panel data

dataWage <- plm.data(dataWage, index=c("id","t"))
use of 'plm.data' is discouraged, better use 'pdata.frame' instead

2.1 View tabular data

dataWage

3 Exploratory Data Analysis

summary(dataWage)
       id       t            exp             wks             occ        
 1      :   7   1:595   Min.   : 1.00   Min.   : 5.00   Min.   :0.0000  
 2      :   7   2:595   1st Qu.:11.00   1st Qu.:46.00   1st Qu.:0.0000  
 3      :   7   3:595   Median :18.00   Median :48.00   Median :1.0000  
 4      :   7   4:595   Mean   :19.85   Mean   :46.81   Mean   :0.5112  
 5      :   7   5:595   3rd Qu.:29.00   3rd Qu.:50.00   3rd Qu.:1.0000  
 6      :   7   6:595   Max.   :51.00   Max.   :52.00   Max.   :1.0000  
 (Other):4123   7:595                                                   
      ind             south             smsa              ms        
 Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
 1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:1.0000  
 Median :0.0000   Median :0.0000   Median :1.0000   Median :1.0000  
 Mean   :0.3954   Mean   :0.2903   Mean   :0.6538   Mean   :0.8144  
 3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
 Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
                                                                    
      fem             union             ed             blk         
 Min.   :0.0000   Min.   :0.000   Min.   : 4.00   Min.   :0.00000  
 1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:12.00   1st Qu.:0.00000  
 Median :0.0000   Median :0.000   Median :12.00   Median :0.00000  
 Mean   :0.1126   Mean   :0.364   Mean   :12.85   Mean   :0.07227  
 3rd Qu.:0.0000   3rd Qu.:1.000   3rd Qu.:16.00   3rd Qu.:0.00000  
 Max.   :1.0000   Max.   :1.000   Max.   :17.00   Max.   :1.00000  
                                                                   
     lwage           tdum1            tdum2            tdum3       
 Min.   :4.605   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
 1st Qu.:6.395   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
 Median :6.685   Median :0.0000   Median :0.0000   Median :0.0000  
 Mean   :6.676   Mean   :0.1429   Mean   :0.1429   Mean   :0.1429  
 3rd Qu.:6.953   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
 Max.   :8.537   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
                                                                   
     tdum4            tdum5            tdum6            tdum7       
 Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
 1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
 Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
 Mean   :0.1429   Mean   :0.1429   Mean   :0.1429   Mean   :0.1429  
 3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
 Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
                                                                    
      exp2       
 Min.   :   1.0  
 1st Qu.: 121.0  
 Median : 324.0  
 Mean   : 514.4  
 3rd Qu.: 841.0  
 Max.   :2601.0  
                 

4 Panel Data Regression Modeling

4.1 Variable Definitions

Y <- cbind(dataWage$lwage)
X <- cbind(dataWage$exp, dataWage$exp2, dataWage$wks, dataWage$ed)

4.2 Pooled OLS estimator

pooling <- plm(Y ~ X, data=dataWage, model= "pooling")
summary(pooling)
Pooling Model

Call:
plm(formula = Y ~ X, data = dataWage, model = "pooling")

Balanced Panel: n = 595, T = 7, N = 4165

Residuals:
       Min.     1st Qu.      Median     3rd Qu.        Max. 
-2.16057670 -0.25034526  0.00027256  0.26792139  2.12969386 

Coefficients:
               Estimate  Std. Error  t-value  Pr(>|t|)    
(Intercept)  4.9080e+00  6.7330e-02  72.8945 < 2.2e-16 ***
X1           4.4675e-02  2.3929e-03  18.6701 < 2.2e-16 ***
X2          -7.1563e-04  5.2794e-05 -13.5552 < 2.2e-16 ***
X3           5.8270e-03  1.1826e-03   4.9271 8.673e-07 ***
X4           7.6041e-02  2.2266e-03  34.1511 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Total Sum of Squares:    886.9
Residual Sum of Squares: 635.41
R-Squared:      0.28356
Adj. R-Squared: 0.28287
F-statistic: 411.624 on 4 and 4160 DF, p-value: < 2.22e-16

4.3 Between estimator

between <- plm(Y ~ X, data=dataWage, model= "between")
summary(between)
Oneway (individual) effect Between Model

Call:
plm(formula = Y ~ X, data = dataWage, model = "between")

Balanced Panel: n = 595, T = 7, N = 4165
Observations used in estimation: 595

Residuals:
     Min.   1st Qu.    Median   3rd Qu.      Max. 
-0.978153 -0.220264  0.036574  0.250118  0.985629 

Coefficients:
               Estimate  Std. Error t-value  Pr(>|t|)    
(Intercept)  4.68303917  0.21009890 22.2897 < 2.2e-16 ***
X1           0.03815295  0.00569666  6.6974 4.953e-11 ***
X2          -0.00063127  0.00012568 -5.0228 6.757e-07 ***
X3           0.01309028  0.00406592  3.2195  0.001355 ** 
X4           0.07378378  0.00489848 15.0626 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Total Sum of Squares:    92.322
Residual Sum of Squares: 62.187
R-Squared:      0.32641
Adj. R-Squared: 0.32185
F-statistic: 71.4768 on 4 and 590 DF, p-value: < 2.22e-16

4.4 First differences estimator

firstdiff <- plm(Y ~ X, data=dataWage, model= "fd")
summary(firstdiff)
Oneway (individual) effect First-Difference Model

Call:
plm(formula = Y ~ X, data = dataWage, model = "fd")

Balanced Panel: n = 595, T = 7, N = 4165
Observations used in estimation: 3570

Residuals:
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-2.0217 -0.0486  0.0157  0.0271  0.0873  2.4263 

Coefficients:
     Estimate Std. Error t-value Pr(>|t|)    
X2 1.7323e-03 7.0214e-05 24.6712   <2e-16 ***
X3 8.1012e-05 5.9103e-04  0.1371    0.891    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Total Sum of Squares:    118.06
Residual Sum of Squares: 128.92
R-Squared:      0.0040562
Adj. R-Squared: 0.003777
F-statistic: -300.514 on 1 and 3568 DF, p-value: 1

4.5 Fixed effects or within estimator

fixed <- plm(Y ~ X, data=dataWage, model= "within")
summary(fixed)
Oneway (individual) effect Within Model

Call:
plm(formula = Y ~ X, data = dataWage, model = "within")

Balanced Panel: n = 595, T = 7, N = 4165

Residuals:
      Min.    1st Qu.     Median    3rd Qu.       Max. 
-1.8120879 -0.0511128  0.0037112  0.0614250  1.9434065 

Coefficients:
      Estimate  Std. Error t-value  Pr(>|t|)    
X1  1.1379e-01  2.4689e-03 46.0888 < 2.2e-16 ***
X2 -4.2437e-04  5.4632e-05 -7.7678 1.036e-14 ***
X3  8.3588e-04  5.9967e-04  1.3939    0.1634    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Total Sum of Squares:    240.65
Residual Sum of Squares: 82.632
R-Squared:      0.65663
Adj. R-Squared: 0.59916
F-statistic: 2273.74 on 3 and 3567 DF, p-value: < 2.22e-16

4.6 Random effects estimator

random <- plm(Y ~ X, data=dataWage, model= "random")
summary(random)
Oneway (individual) effect Random Effect Model 
   (Swamy-Arora's transformation)

Call:
plm(formula = Y ~ X, data = dataWage, model = "random")

Balanced Panel: n = 595, T = 7, N = 4165

Effects:
                  var std.dev share
idiosyncratic 0.02317 0.15220 0.185
individual    0.10209 0.31952 0.815
theta: 0.8228

Residuals:
      Min.    1st Qu.     Median    3rd Qu.       Max. 
-2.0439676 -0.1057048  0.0070992  0.1147499  2.0875839 

Coefficients:
               Estimate  Std. Error  t-value Pr(>|t|)    
(Intercept)  3.8294e+00  9.3634e-02  40.8974   <2e-16 ***
X1           8.8861e-02  2.8178e-03  31.5360   <2e-16 ***
X2          -7.7257e-04  6.2262e-05 -12.4083   <2e-16 ***
X3           9.6577e-04  7.4329e-04   1.2993   0.1939    
X4           1.1171e-01  6.0572e-03  18.4426   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Total Sum of Squares:    260.94
Residual Sum of Squares: 151.35
R-Squared:      0.42
Adj. R-Squared: 0.41945
F-statistic: 753.113 on 4 and 4160 DF, p-value: < 2.22e-16

4.7 Tests for choosing between models

4.7.1 LM test for random effects versus OLS

plmtest(pooling)

    Lagrange Multiplier Test - (Honda) for balanced panels

data:  Y ~ X
normal = 72.056, p-value < 2.2e-16
alternative hypothesis: significant effects

4.7.2 LM test for fixed effects versus OLS

pFtest(fixed, pooling)

    F test for individual effects

data:  Y ~ X
F = 40.239, df1 = 593, df2 = 3567, p-value < 2.2e-16
alternative hypothesis: significant effects

4.7.3 Hausman test for fixed versus random effects model

phtest(random, fixed)

    Hausman Test

data:  Y ~ X
chisq = 6191.4, df = 3, p-value < 2.2e-16
alternative hypothesis: one model is inconsistent
LS0tCnRpdGxlOiAiV2hhdCBhcmUgdGhlIE1pbmNlcmlhbiBEZXRlcm1pbmFudHMgb2YgV2FnZXM/IgpzdWJ0aXRsZTogIkEgUGFuZWwgRGF0YSBBcHByb2FjaCIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICB0aGVtZTogY29zbW8KICAgIGhpZ2hsaWdodDogbW9ub2Nocm9tZQogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIHRvY19kZXB0aDogNAogICAgbnVtYmVyX3NlY3Rpb25zOiB0cnVlCiAgICBjb2RlX2ZvbGRpbmc6IHNob3cKICAgIGRmX3ByaW50OiBrYWJsZQogIGdpdGh1Yl9kb2N1bWVudDogZGVmYXVsdAotLS0KCiMgUHJlbGltaW5hcmllcyAKClRoZSBjb2RlIGZvciB0aGlzIGRvY3VtZW50IGlzIGxhcmdlbHkgYmFzZWQgb24gdGhlIHB1YmxpYyBtYXRlcmlhbHMgb2YgdGhlIFtlY29ub21ldHJpYyBhY2FkZW15XShodHRwczovL3NpdGVzLmdvb2dsZS5jb20vc2l0ZS9lY29ub21ldHJpY3NhY2FkZW15L2Vjb25vbWV0cmljcy1tb2RlbHMvcGFuZWwtZGF0YS1tb2RlbHMpLiAgCgojIyBXYXRjaCBhbmQgZGlzY3VzcwoKLSBbQ29uY2VwdHVhbCB1bmRlcnN0YW5kaW5nXShodHRwczovL3ZpYWxvZ3Vlcy5jb20vdmlhbG9ndWVzL3BsYXkvNDMzMTM/a2V5PTM1MzNjMWFjMmY2MTViZWM0YTlkMWZjZTBkMzIxMjZlNDQ0NmI3ZTFiNDcwODAzOTkzNWUpCi0gW0V4YW1wbGVdKGh0dHBzOi8vdmlhbG9ndWVzLmNvbS92aWFsb2d1ZXMvcGxheS80MzMxND9rZXk9Mjg4ZWU5ZDYzMmMxZTg0MTE3YjZkYzc5OWNhNTkzM2IxZTgzZGFiZTk2NzVkODM5N2YzNSkKLSBbUiBpbXBsZW1lbnRhdGlvbl0oaHR0cHM6Ly92aWFsb2d1ZXMuY29tL3ZpYWxvZ3Vlcy9wbGF5LzQzMzE1P2tleT02MzQ2NTgyNjg2N2JmNGY0NmZhOTIzYzgzYzY1OTc0NmJmYjdiNzhmYTdmNzE4ODIwY2Y3KQotIFtZb3VyIFR1cm46IEV4ZWN1dGUgdGhpcyBub3RlYm9va10oaHR0cHM6Ly9yc3R1ZGlvLmNsb3VkL3Byb2plY3QvMjQ2NzMpCgojIyBDbGFyaWZ5IHlvdXIgd29ya2Zsb3cKCiFbU291cmNlOiBodHRwOi8vcjRkcy5oYWQuY28ubnovZXhwbG9yZS1pbnRyby5odG1sXShodHRwOi8vcjRkcy5oYWQuY28ubnovZGlhZ3JhbXMvZGF0YS1zY2llbmNlLWV4cGxvcmUucG5nKQoKIyMgTG9hZCBwYWNrYWdlcwoKYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KbGlicmFyeSh0aWR5dmVyc2UpICMgTW9kZXJuIGRhdGEgc2NpZW5jZSBsaWJyYXJ5IApsaWJyYXJ5KHBsbSkgICAgICAgIyBQYW5lbCBkYXRhIGFuYWx5c2lzIGxpYnJhcnkKYGBgCgojIERhdGEgSW1wb3J0IGFuZCBUaWR5aW5nCgpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQpkYXRhV2FnZSA8LSByZWFkX2NzdigiaHR0cHM6Ly9naXRodWIuY29tL2RzNzc3L3NhbXBsZS1kYXRhc2V0cy9ibG9iL21hc3Rlci9kYXRhV2FnZS5jc3Y/cmF3PXRydWUiKQpgYGAKCkRlY2xhcmUgdGhlIGRhdGFzZXQgYXMgcGFuZWwgZGF0YQoKYGBge3J9CmRhdGFXYWdlIDwtIHBsbS5kYXRhKGRhdGFXYWdlLCBpbmRleD1jKCJpZCIsInQiKSkKYGBgCgoKCiMjIFZpZXcgdGFidWxhciBkYXRhCgpgYGB7cn0KZGF0YVdhZ2UKYGBgCgoKIyBFeHBsb3JhdG9yeSBEYXRhIEFuYWx5c2lzCgpgYGB7cn0Kc3VtbWFyeShkYXRhV2FnZSkKYGBgCgoKCiMgUGFuZWwgRGF0YSBSZWdyZXNzaW9uIE1vZGVsaW5nCgojIyBWYXJpYWJsZSBEZWZpbml0aW9ucwoKYGBge3J9ClkgPC0gY2JpbmQoZGF0YVdhZ2UkbHdhZ2UpClggPC0gY2JpbmQoZGF0YVdhZ2UkZXhwLCBkYXRhV2FnZSRleHAyLCBkYXRhV2FnZSR3a3MsIGRhdGFXYWdlJGVkKQpgYGAKCgojIyBQb29sZWQgT0xTIGVzdGltYXRvcgoKYGBge3J9CnBvb2xpbmcgPC0gcGxtKFkgfiBYLCBkYXRhPWRhdGFXYWdlLCBtb2RlbD0gInBvb2xpbmciKQpzdW1tYXJ5KHBvb2xpbmcpCmBgYAoKCiMjIEJldHdlZW4gZXN0aW1hdG9yCgpgYGB7cn0KYmV0d2VlbiA8LSBwbG0oWSB+IFgsIGRhdGE9ZGF0YVdhZ2UsIG1vZGVsPSAiYmV0d2VlbiIpCnN1bW1hcnkoYmV0d2VlbikKYGBgCgoKIyMgRmlyc3QgZGlmZmVyZW5jZXMgZXN0aW1hdG9yCgpgYGB7cn0KZmlyc3RkaWZmIDwtIHBsbShZIH4gWCwgZGF0YT1kYXRhV2FnZSwgbW9kZWw9ICJmZCIpCnN1bW1hcnkoZmlyc3RkaWZmKQpgYGAKCiMjIEZpeGVkIGVmZmVjdHMgb3Igd2l0aGluIGVzdGltYXRvcgoKYGBge3J9CmZpeGVkIDwtIHBsbShZIH4gWCwgZGF0YT1kYXRhV2FnZSwgbW9kZWw9ICJ3aXRoaW4iKQpzdW1tYXJ5KGZpeGVkKQpgYGAKCgojIyBSYW5kb20gZWZmZWN0cyBlc3RpbWF0b3IKCmBgYHtyfQpyYW5kb20gPC0gcGxtKFkgfiBYLCBkYXRhPWRhdGFXYWdlLCBtb2RlbD0gInJhbmRvbSIpCnN1bW1hcnkocmFuZG9tKQpgYGAKCiMjIFRlc3RzIGZvciBjaG9vc2luZyBiZXR3ZWVuIG1vZGVscwoKCiMjIyBMTSB0ZXN0IGZvciByYW5kb20gZWZmZWN0cyB2ZXJzdXMgT0xTCgpgYGB7cn0KcGxtdGVzdChwb29saW5nKQpgYGAKCgojIyMgTE0gdGVzdCBmb3IgZml4ZWQgZWZmZWN0cyB2ZXJzdXMgT0xTCgpgYGB7cn0KcEZ0ZXN0KGZpeGVkLCBwb29saW5nKQpgYGAKCgojIyMgSGF1c21hbiB0ZXN0IGZvciBmaXhlZCB2ZXJzdXMgcmFuZG9tIGVmZmVjdHMgbW9kZWwKCmBgYHtyfQpwaHRlc3QocmFuZG9tLCBmaXhlZCkKYGBgCgo=