First Steps
Overall Project Stuff
- Data File: master.xlsx
- Gold: #EED311
- Blue: #112CEE
- Green: #11EE65
- Red: #EE119A
Read in required libraries
Loading required package: readxl
Loading required package: tidyverse
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages ---------------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.1 --
v ggplot2 3.3.5 v purrr 0.3.4
v tibble 3.1.6 v dplyr 1.0.7
v tidyr 1.1.4 v stringr 1.4.0
v readr 2.1.1 v forcats 0.5.1
-- Conflicts ------------------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
Loading required package: janitor
Attaching package: ‘janitor’
The following objects are masked from ‘package:stats’:
chisq.test, fisher.test
Loading required package: kableExtra
Attaching package: ‘kableExtra’
The following object is masked from ‘package:dplyr’:
group_rows
Loading required package: stargazer
Please cite as:
Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
Loading required package: plm
Attaching package: ‘plm’
The following objects are masked from ‘package:dplyr’:
between, lag, lead
Loading required package: lmtest
Loading required package: zoo
Attaching package: ‘zoo’
The following objects are masked from ‘package:base’:
as.Date, as.Date.numeric
Read in Data
- original df was cleaned wholly in Excel and produced “X1.csv”
- we recreated X1 as X2 with thru data manipulation through R
- the df “DirtyD” was an attempt to recreate a dataset ran by Prof K from master.xlsx, R don’t like it
# attempting 540 observations
#DirtyD <- read_excel("C:\\Users\\hisjo\\OneDrive\\Documents\\Sac State\\ECON 145 Economic Research Methods\\Data\\master.xlsx") %>%
# clean_names() %>% #piping prevents an umlaut over the i
# select(-c(l1_inf_gdpd)) %>% #Extranious data, just in case
# #transform(cbc = as.numeric(cbc)) %>% #cbc had some char, convert and coerce NAs
# filter(time1!=2020) %>% #Remove 2020 Entries
# drop_na()
#N = 546
X2 <- read_excel("C:\\Users\\hisjo\\OneDrive\\Documents\\Sac State\\ECON 145 Economic Research Methods\\Data\\master.xlsx") %>%
clean_names() %>%
transform(cbc = as.numeric(cbc)) %>% #cbc had some char, convert and coerce NAs
filter(time1 != 2020) %>% # Remove 2020 from our dataset
select(-c(l1_inf_gdpd)) %>% #Extraneous data dropped (lagged gdpd)
drop_na() # remove NA
Warning in eval(substitute(list(...)), `_data`, parent.frame()) :
NAs introduced by coercion
#N=357, nCol=12
#Tell R and PLM that X2 is panel data with time1=time and location1=id
#creates a new data frame with index column, moving forward I think this is
#the correct way to handle this with PLM
P_data <- pdata.frame(X2, index = c("location1", "time1"))
# display to show transformation
X2
P_data
Variables Descriptions and Source
VarTable <- data.frame(
Variable = c("hur", "taxcorp","ud","cbc","brr1y","pslm","epl","wsc","ttr","inf_gdpd"),
Description = c("Annual Harmonized Unemployment Rate, %",
"Tax on Corporate Profits, % GDP",
"Union Density Ratio",
"Collective Bargaining Coverage, %",
"Benefit Replacement Rate at 1 year, %",
"Public Spending on Labor Markets, %GDP",
"Employment Protection Legislation, Instrumented",
"Wage Setting Coordination, Instrumented",
"Total Tax Revenue, %GDP",
"Change in Inflation with GDP Deflator, %"),
Source = c("OECD 2022a","OECD 2022b","OECD 2022f","OECD 2022g","OECD 2022d",
"OECD 2022e","OECD 2022c","OECD 2022h","OECD 2022i","World Bank 2022")
)
# create the object to output in plots
VarTable_V <- kbl(VarTable, caption = "Table 1: Variables") %>%
kable_styling(bootstrap_options = c("striped", "hover","condensed"))
VarTable_V
Table 1: Variables
| Variable |
Description |
Source |
| hur |
Annual Harmonized Unemployment Rate, % |
OECD 2022a |
| taxcorp |
Tax on Corporate Profits, % GDP |
OECD 2022b |
| ud |
Union Density Ratio |
OECD 2022f |
| cbc |
Collective Bargaining Coverage, % |
OECD 2022g |
| brr1y |
Benefit Replacement Rate at 1 year, % |
OECD 2022d |
| pslm |
Public Spending on Labor Markets, %GDP |
OECD 2022e |
| epl |
Employment Protection Legislation, Instrumented |
OECD 2022c |
| wsc |
Wage Setting Coordination, Instrumented |
OECD 2022h |
| ttr |
Total Tax Revenue, %GDP |
OECD 2022i |
| inf_gdpd |
Change in Inflation with GDP Deflator, % |
World Bank 2022 |
Overall Summary Statistics
#stargazer wont make a table if there are NA or type char
# I don't like the way it outputs, even in html. Using Kable
#stargazer(X2, title = "Table 2: Summary Statistics", type = "html", omit = c("time1") )
Descriptive1 <- matrix(
c(
c("hur"),nrow(X2), round(mean(X2$hur),3),round(sd(X2$hur),3),round(min(X2$hur),3),
round(max(X2$hur),3),
c("taxcorp"),nrow(X2),round(mean(X2$taxcorp),3), round(sd(X2$taxcorp),3),
round(min(X2$taxcorp),3),round(max(X2$taxcorp),3),
c("ud"),nrow(X2),round(mean(X2$ud),3),round(sd(X2$ud),3), round(min(X2$ud),3),
round(max(X2$ud),3),
c("pslm"),nrow(X2),round(mean(X2$pslm),3),round(sd(X2$pslm),3),round(min(X2$pslm),3),
round(max(X2$pslm),3),
c("epl"),nrow(X2),round(mean(X2$epl),3), round(sd(X2$epl),3),
round(min(X2$epl),3),round(max(X2$epl),3),
c("wsc"),nrow(X2),round(mean(X2$wsc),3), round(sd(X2$wsc),3),
round(min(X2$wsc),3),round(max(X2$wsc),3),
c("cbc"),nrow(X2),round(mean(X2$cbc),3),
round(sd(X2$cbc),3),round(min(X2$cbc),3),round(max(X2$cbc),3),
c("brr1y"),nrow(X2), round(mean(X2$brr1y),3), round(sd(X2$brr1y),3),
round(min(X2$brr1y),3),round(max(X2$brr1y),3),
c("ttr"),nrow(X2), round(mean(X2$ttr),3), round(sd(X2$ttr),3), round(min(X2$ttr),3),
round(max(X2$ttr),3),
c("inf_gdpd"),nrow(X2),round(mean(X2$inf_gdpd),3),round(sd(X2$inf_gdpd),3),
round(min(X2$inf_gdpd),3),round(max(X2$inf_gdpd),3)
),
ncol = 6, byrow = T)
colnames(Descriptive1) <- c("Statistic","N", "Mean", "SD","Min","Max")
#rownames(Descriptive1) <- c("hur","taxcorp","ud","pslm","epl","wsc","cbc","brr1y","ttr","inf_gdpd")
Descriptive1_V <- kbl(Descriptive1, caption = "Table 2: Variable Summary Statistics, All Observations") %>%
kable_styling(bootstrap_options = c("striped", "hover","condensed"))
Descriptive1_V
Table 2: Variable Summary Statistics, All Observations
| Statistic |
N |
Mean |
SD |
Min |
Max |
| hur |
357 |
7.348 |
3.575 |
2.267 |
26.117 |
| taxcorp |
357 |
3.022 |
1.347 |
0.583 |
11.992 |
| ud |
357 |
28.066 |
18.819 |
4.5 |
78 |
| pslm |
357 |
1.488 |
0.986 |
0.24 |
4.3 |
| epl |
357 |
2.117 |
0.901 |
0.093 |
4.583 |
| wsc |
357 |
2.672 |
1.366 |
1 |
5 |
| cbc |
357 |
55.636 |
31.007 |
6.1 |
100 |
| brr1y |
357 |
46.947 |
24.117 |
0 |
86 |
| ttr |
357 |
34.776 |
6.54 |
19.613 |
48.531 |
| inf_gdpd |
357 |
1.834 |
1.679 |
-4.62 |
11.047 |
Figures 8-14: Facet Wrapped Charts for paper
#Facet charts
#hur vs taxcorp by year
FW1 <- ggplot(data=X2)+
geom_point(mapping=aes(x=taxcorp,y=hur), color = "#EED311" ) +
geom_smooth(mapping=aes(x=taxcorp,y=hur), method = lm, se=F, color = "#112CEE")+
facet_wrap(~time1) + labs(x = "taxcorp", y="hur", title = "Figure 8: hur vs taxcorp time factor")
FW11 <- ggplot(data=X2)+
geom_point(mapping=aes(x=taxcorp,y=hur), color = "#EED311" ) +
geom_smooth(mapping=aes(x=taxcorp,y=hur), method = lm, se=F, color = "#112CEE")+
facet_wrap(~location1) + labs(x = "taxcorp", y="hur", title = "Figure 8: hur vs taxcorp location factor")
#hur vs ud
FW2 <- ggplot(data=X2)+
geom_point(mapping=aes(x=ud,y=hur), color = "#EED311")+
geom_smooth(mapping=aes(x=ud,y=hur), method = lm, se=F, color = "#112CEE")+
facet_wrap(~time1)+ labs(x = "ud", y="hur", title = "Figure 9: hur vs ud time factor")
FW21 <- ggplot(data=X2)+
geom_point(mapping=aes(x=ud,y=hur), color = "#EED311")+
geom_smooth(mapping=aes(x=ud,y=hur), method = lm, se=F, color = "#112CEE")+
facet_wrap(~location1)+ labs(x = "ud", y="hur", title = "Figure 9: hur vs ud location factor")
#hur vs cbc
FW3 <- ggplot(data=X2)+
geom_point(mapping=aes(x=cbc,y=hur), color = "#EED311")+
geom_smooth(mapping=aes(x=cbc,y=hur), method = lm, se=F, color = "#112CEE")+
facet_wrap(~time1)+ labs(x = "cbc", y="hur", title = "Figure 10: hur vs cbc time factor")
FW31 <- ggplot(data=X2)+
geom_point(mapping=aes(x=cbc,y=hur), color = "#EED311")+
geom_smooth(mapping=aes(x=cbc,y=hur), method = lm, se=F, color = "#112CEE")+
facet_wrap(~location1)+ labs(x = "cbc", y="hur", title = "Figure 10: hur vs cbc location factor")
#hur vs brr
FW4 <- ggplot(data=X2)+
geom_point(mapping=aes(x=brr1y,y=hur), color = "#EED311")+
geom_smooth(mapping=aes(x=brr1y,y=hur), method = lm, se=F, color = "#112CEE")+
facet_wrap(~time1)+ labs(x = "brr", y="hur", title = "Figure 11: hur vs brr1y time factor")
FW41 <- ggplot(data=X2)+
geom_point(mapping=aes(x=brr1y,y=hur), color = "#EED311")+
geom_smooth(mapping=aes(x=brr1y,y=hur), method = lm, se=F, color = "#112CEE")+
facet_wrap(~location1)+ labs(x = "brr", y="hur", title = "Figure 11: hur vs brr1y location factor")
#hur vs pslm
FW5 <- ggplot(data=X2)+
geom_point(mapping= aes(x=pslm,y=hur), color = "#EED311")+
geom_smooth(mapping=aes(x=pslm, y=hur), method = lm, se=F, color = "#112CEE")+
facet_wrap(~time1)+ labs(x="pslm", y="hur", title = "Figure 12: ur vs pslm time factor")
FW51 <- ggplot(data=X2)+
geom_point(mapping= aes(x=pslm,y=hur), color = "#EED311")+
geom_smooth(mapping=aes(x=pslm, y=hur), method = lm, se=F, color = "#112CEE")+
facet_wrap(~location1)+ labs(x="pslm", y="hur", title = "Figure 12: ur vs pslm location factor")
#hur vs ttr
FW7 <- ggplot(data=X2)+
geom_point(mapping=aes(x=ttr,y=hur), color = "#EED311")+
geom_smooth(mapping=aes(x=ttr,y=hur), method = lm, se=F, color = "#112CEE")+
facet_wrap(~time1)+ labs(x = "ttr", y="hur", title = "Figure 13: hur vs ttr time factor")
FW71 <- ggplot(data=X2)+
geom_point(mapping=aes(x=ttr,y=hur), color = "#EED311")+
geom_smooth(mapping=aes(x=ttr,y=hur), method = lm, se=F, color = "#112CEE")+
facet_wrap(~location1)+ labs(x = "ttr", y="hur", title = "Figure 13: hur vs ttr location factor")
#hur vs delta_i
FW8 <- ggplot(data=X2)+
geom_point(mapping=aes(x=inf_gdpd,y=hur), color = "#EED311")+
geom_smooth(mapping=aes(x=inf_gdpd,y=hur), method = lm, se=F, color = "#112CEE")+
facet_wrap(~time1)+ labs(x = "inf_gdpd", y="hur", title = "Figure 14: hur vs inf_gdpd time factor")
FW81 <- ggplot(data=X2)+
geom_point(mapping=aes(x=inf_gdpd,y=hur), color = "#EED311")+
geom_smooth(mapping=aes(x=inf_gdpd,y=hur), method = lm, se=F, color = "#112CEE")+
facet_wrap(~location1)+ labs(x = "inf_gdpd", y="hur", title = "Figure 14: hur vs inf_gdpd location factor")
FW1; FW11;FW2;FW21;FW3;FW31;FW4;FW41;FW5;FW51;FW7;FW71;FW8;FW81
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'














Data Analysis, with experiments and tests
#P_data created in step 1
# Experiments with lm and plm
#OLS per "getting started"
#require(car)
# ols <- lm(hur~taxcorp, data = P_data)
# yhat <- ols$fitted
# Plot1 <- ggplot(data=P_data, aes(x=taxcorp))+
# geom_point(mapping=aes(x=taxcorp,y=hur), color = "#EED311" ) +
# geom_smooth(mapping=aes(x=taxcorp,y=hur), method = lm, se=F, color = "#112CEE")
# fixed.dum <- lm(hur~taxcorp + factor(location1)-1, data=P_data)
# yhat1 <- fixed.dum$fitted
# Plot2 <- sp(yhat1~P_data$taxcorp|P_data$location1, xlab="taxcorp", ylab=yhat,
# boxplot=F, smooth=F)
# Plot3 <- scatterplot(data=P_data,hur~time1|location1)
#OLS with LM, no instruments from X2
Reg01 <- lm(hur~taxcorp + ud + cbc + brr1y + pslm +ttr + inf_gdpd, data=X2)
#OLS with plm, no instruments from P_data
Reg02 <- plm(hur~taxcorp + ud + cbc + brr1y + pslm + ttr + inf_gdpd, data=P_data)
#OLS with lm, instruments, from P_data - include as baseline
Reg03 <- lm(hur~taxcorp + ud + cbc + brr1y + pslm +wsc +epl +ttr + inf_gdpd, data=P_data)
#OLS with plm, instruments from P_data
Reg04 <- plm(hur~taxcorp + ud + cbc + brr1y + pslm +wsc +epl +ttr + inf_gdpd, data=P_data)
#Pooled, preferred model (clean)
P2SLS <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
|.-taxcorp + epl + wsc,
data = P_data, model = "pooling")
#Pooled Experiment, verified all the same as P2SLS
P2SLS_1 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
|. -taxcorp + epl + wsc,
data = X2, model = "pooling")
P2SLS_2 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
| + ud + cbc + brr1y + ttr + inf_gdpd + pslm + epl + wsc,
data = P_data, model = "pooling")
#Between estimator
B2SLS <- update(P2SLS, model="between") #this is bad
#FE or within estimator
FE2SLS <- update(P2SLS, model="within")
#Random estimator
RE2SLS <- update(P2SLS, model="random")
#Check output - looks bad lol
# stargazer(Reg01,Reg02,Reg03,Reg04,P2SLS,B2SLS, type = "text")
#Time Fixed Effects and experiments
#Original Form - best fit model
FE2SLS_T <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd +pslm
+factor(time1)
|. -taxcorp + epl + wsc,
data = P_data, model = "within")
#added "time" effect to arguments
FE2SLS_T1 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd +pslm
+factor(time1)
|. -taxcorp + epl + wsc,
data = P_data, model = "within", effect = "time")
#Streamlined
TFE2SLS1 <- update(P2SLS, model="within", effect = "time")
#check output
#stargazer(FE2SLS_T,FE2SLS_T1,TFE2SLS1, type = "text")
#Individual FE and Experiments
#Original model - all are exact same
FE2SLS_L <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
+factor(location1)
|. -taxcorp + epl + wsc,
data = P_data, model = "within")
# added "individual" effect to arguments - specifying effect prevents same results as_TL regs
FE2SLS_L1 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
+factor(location1)
|. -taxcorp + epl + wsc,
data = P_data, model = "within", effect = "individual")
# streamlined, yes same as above
FE2SLS_L2 <- update(P2SLS, model="within", effect="individual")
#check output
#stargazer(FE2SLS_L, FE2SLS_L1, FE2SLS_L2, type = "text")
#Time and Individual FE
#Original model
FE2SLS_TL <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
+factor(time1) +factor(location1)
|. -taxcorp + epl + wsc,
data = P_data, model = "within", effect = "twoways")
# removed twoways effect from argument - best fit
FE2SLS_TL1 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
+factor(time1) +factor(location1)
|. -taxcorp + epl + wsc,
data = P_data, model = "within")
#streamlined1, added back effect
FE2SLS_TL2 <- update(P2SLS, model="within", effect="twoways")
#streamlined2, removed effect from update
FE2SLS_TL3 <- update(P2SLS, model="within")
#check output
#stargazer(FE2SLS_TL, FE2SLS_TL1, FE2SLS_TL2, FE2SLS_TL3, type = "text")
#LM for random effects vs OLS
PX1 <- plmtest(P2SLS)
##BP LM for random effects, null is no panel effect, include
PX11 <- plmtest(P2SLS, type=c("bp"))
# Testing time-fixed effects, null is no FTE
PX12 <- plmtest(FE2SLS, c("time"), type=c("bp"))
#LM for fixed effects vs OLS, generic
PX2 <- pFtest(FE2SLS,P2SLS)
#LM for fixed effectsw vs OLS, Time specific
PX21 <- pFtest(TFE2SLS1, FE2SLS)
Warning in pf(stat, df1, df2, lower.tail = FALSE) : NaNs produced
#hausman test for fixed vs random effects, null=random preferred
PX6 <- phtest(RE2SLS,FE2SLS)
#Cross sectional dependance, null is that residuals across entities are not
#correlated. Used to test whether residuals are correlated across entities. CS dep
#can lead to bias in test results
#BPLM for cross sectional dependence in panels
PX7 <- pcdtest(FE2SLS, test=c("lm"))
Warning in pcdres(tres = tres, n = n, w = w, form = paste(deparse(x$formula)), :
Some pairs of individuals (20 percent) do not have any or just one time period in common and have been omitted from calculation
# Pesaran cross sectional dependence
PX71 <- pcdtest(FE2SLS, test=c("cd"))
Warning in pcdres(tres = tres, n = n, w = w, form = paste(deparse(x$formula)), :
Some pairs of individuals (20 percent) do not have any or just one time period in common and have been omitted from calculation
# Breusch Godfrey for SC in panels. Null is no SC. May not matter as a relativily
# short study period T=19
PX8 <- pbgtest(FE2SLS)
#ADF for stochastic trends, Null- series has unit root.
require(tseries)
Loading required package: tseries
Warning: package ‘tseries’ was built under R version 4.0.5
Registered S3 method overwritten by 'quantmod':
method from
as.zoo.data.frame zoo
‘tseries’ version: 0.10-50
‘tseries’ is a package for time series analysis and computational finance.
See ‘library(help="tseries")’ for details.
PX9 <- adf.test(P_data$hur, k=2)
Warning in adf.test(P_data$hur, k = 2) :
p-value smaller than printed p-value
#BP null is homoskedasticity, alternative: heteroskadasticity
PX0 <- bptest(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
+factor(location1), data=P_data, studentize = F)
PX11;PX2;PX6;PX7;PX71;PX8;PX9;PX0
Lagrange Multiplier Test - (Breusch-Pagan) for unbalanced panels
data: hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm | . - ...
chisq = 571.15, df = 1, p-value < 2.2e-16
alternative hypothesis: significant effects
F test for individual effects
data: hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm | . - ...
F = 378.68, df1 = 31, df2 = 318, p-value < 2.2e-16
alternative hypothesis: significant effects
Hausman Test
data: hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm | . - ...
chisq = 3.5086, df = 7, p-value = 0.8343
alternative hypothesis: one model is inconsistent
Breusch-Pagan LM test for cross-sectional dependence in panels
data: hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm | . - taxcorp + epl + wsc
chisq = 926.79, df = 447, p-value < 2.2e-16
alternative hypothesis: cross-sectional dependence
Pesaran CD test for cross-sectional dependence in panels
data: hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm | . - taxcorp + epl + wsc
z = 4.1856, p-value = 2.844e-05
alternative hypothesis: cross-sectional dependence
Breusch-Godfrey/Wooldridge test for serial correlation in panel models
data: hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm | . - ...
chisq = 188.9, df = 1, p-value < 2.2e-16
alternative hypothesis: serial correlation in idiosyncratic errors
Augmented Dickey-Fuller Test
data: P_data$hur
Dickey-Fuller = -4.7542, Lag order = 2, p-value = 0.01
alternative hypothesis: stationary
Breusch-Pagan test
data: hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm + factor(location1)
BP = 474.14, df = 38, p-value < 2.2e-16
Final Output Table 3
*NOTE: the p values at the bottom of the table have to be fixed in the output document, with code (to copypasta): * to replace asterisks that cause bolding
FOutput <- stargazer(Reg03, P2SLS, FE2SLS_T1, FE2SLS_L1, FE2SLS_TL, RE2SLS,
type = "html", title = "Table 3: Regressions to Explain the Unemployment Rate",
model.names = F,
keep = c("taxcorp", "ud","cbc","brr1y", "ttr","inf_gdpd", "hur", "pslm", "epl","wsc", "Constant"),
column.labels = c("OLS", "2SLS Pooled",
"2SLS Time Fixed Effects", "2SLS Individual Fixed Effects", "2SLS Time and Individual Fixed Effects", "2SLS Random Effects")
)
# run in console, it's messy
Table 3: Regressions to Explain the Unemployment Rate
|
|
|
|
Dependent variable:
|
|
|
|
|
|
hur
|
|
|
OLS
|
2SLS Pooled
|
2SLS Time Fixed Effects
|
2SLS Individual Fixed Effects
|
2SLS Time and Individual Fixed Effects
|
2SLS Random Effects
|
|
|
(1)
|
(2)
|
(3)
|
(4)
|
(5)
|
(6)
|
|
|
|
taxcorp
|
-0.501***
|
-12.904
|
-3.267
|
-3.752*
|
-1.992
|
-3.208
|
|
|
(0.134)
|
(14.516)
|
(4.088)
|
(2.026)
|
(1.930)
|
(2.238)
|
|
|
|
|
|
|
|
|
|
ud
|
-0.004
|
0.263
|
0.033
|
0.011
|
0.119
|
-0.001
|
|
|
(0.014)
|
(0.351)
|
(0.102)
|
(0.068)
|
(0.129)
|
(0.052)
|
|
|
|
|
|
|
|
|
|
cbc
|
0.042***
|
0.158
|
0.054
|
0.057
|
0.075**
|
-0.002
|
|
|
(0.011)
|
(0.160)
|
(0.044)
|
(0.049)
|
(0.037)
|
(0.038)
|
|
|
|
|
|
|
|
|
|
brr1y
|
-0.006
|
0.246
|
0.038
|
0.039**
|
0.036***
|
0.039***
|
|
|
(0.010)
|
(0.321)
|
(0.091)
|
(0.016)
|
(0.013)
|
(0.014)
|
|
|
|
|
|
|
|
|
|
pslm
|
1.727***
|
-9.934
|
-0.885
|
1.457*
|
2.180***
|
1.637**
|
|
|
(0.320)
|
(13.666)
|
(3.725)
|
(0.779)
|
(0.558)
|
(0.825)
|
|
|
|
|
|
|
|
|
|
wsc
|
-1.306***
|
|
|
|
|
|
|
|
(0.174)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
epl
|
0.543***
|
|
|
|
|
|
|
|
(0.202)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ttr
|
-0.137***
|
-0.700
|
-0.260
|
0.518
|
0.220
|
0.322
|
|
|
(0.048)
|
(0.704)
|
(0.229)
|
(0.343)
|
(0.356)
|
(0.331)
|
|
|
|
|
|
|
|
|
|
inf_gdpd
|
-0.207**
|
2.078
|
0.526
|
0.156
|
0.019
|
0.123
|
|
|
(0.102)
|
(2.461)
|
(0.649)
|
(0.279)
|
(0.198)
|
(0.304)
|
|
|
|
|
|
|
|
|
|
Constant
|
11.818***
|
53.930
|
|
|
|
2.135
|
|
|
(1.309)
|
(50.013)
|
|
|
|
(4.595)
|
|
|
|
|
|
|
|
|
|
|
|
Observations
|
357
|
357
|
357
|
357
|
357
|
357
|
|
R2
|
0.367
|
0.049
|
0.103
|
0.189
|
0.264
|
0.202
|
|
Adjusted R2
|
0.350
|
0.030
|
0.035
|
0.092
|
0.127
|
0.186
|
|
Residual Std. Error
|
2.881 (df = 347)
|
|
|
|
|
|
|
F Statistic
|
22.338*** (df = 9; 347)
|
5.469
|
43.285***
|
96.725***
|
144.909***
|
113.648***
|
|
|
|
Note:
|
*p<0.1; **p<0.05; ***p<0.01
|
---
title: "ECON 145 Data Project Final"
output: 
  html_notebook: default
  always_allow_html: true
---

## First Steps
### Overall Project Stuff
* Data File: master.xlsx
* Gold: #EED311
* Blue: #112CEE
* Green: #11EE65
* Red: #EE119A

### Read in required libraries
```{r packages, echo=F, warning=F}
require(readxl)
require(tidyverse) # i think this gives me dplyr and tidyr
require(janitor) # clean names
require(ggplot2) # data viz
require(kableExtra) #Stat tables
require(stargazer) #Regression tables
# require(ivreg) #Maybe in the future
require(plm) #panel linear models
require(lmtest)


```

### Read in Data
* original df was cleaned wholly in Excel and produced "X1.csv"
* we recreated X1 as X2 with thru data manipulation through R
* the df "DirtyD" was an attempt to recreate a dataset ran by Prof K from master.xlsx, R don't like it
```{r data read in}
# attempting 540 observations
#DirtyD <- read_excel("C:\\Users\\hisjo\\OneDrive\\Documents\\Sac State\\ECON 145 Economic Research Methods\\Data\\master.xlsx") %>% 
#  clean_names() %>% #piping prevents an umlaut over the i
#  select(-c(l1_inf_gdpd)) %>% #Extranious data, just in case         
#  #transform(cbc = as.numeric(cbc)) %>% #cbc had some char, convert and coerce NAs
#  filter(time1!=2020) %>%  #Remove 2020 Entries
#  drop_na()
#N = 546
  
X2 <- read_excel("C:\\Users\\hisjo\\OneDrive\\Documents\\Sac State\\ECON 145 Economic Research Methods\\Data\\master.xlsx") %>% 
  clean_names() %>% 
  transform(cbc = as.numeric(cbc)) %>% #cbc had some char, convert and coerce NAs
  filter(time1 != 2020) %>% # Remove 2020 from our dataset
  select(-c(l1_inf_gdpd)) %>% #Extraneous data dropped (lagged gdpd)
  drop_na() # remove NA
#N=357, nCol=12 

#Tell R and PLM that X2 is panel data with time1=time and location1=id
#creates a new data frame with index column, moving forward I think this is
#the correct way to handle this with PLM
P_data <- pdata.frame(X2, index = c("location1", "time1"))
# display to show transformation
X2
P_data
```

### Variables Descriptions and Source
```{r Variables Table}
VarTable <- data.frame(
  Variable = c("hur", "taxcorp","ud","cbc","brr1y","pslm","epl","wsc","ttr","inf_gdpd"),
  Description = c("Annual Harmonized Unemployment Rate, %",
                  "Tax on Corporate Profits, % GDP",
                  "Union Density Ratio",
                  "Collective Bargaining Coverage, %",
                  "Benefit Replacement Rate at 1 year, %",
                  "Public Spending on Labor Markets, %GDP",
                  "Employment Protection Legislation, Instrumented",
                  "Wage Setting Coordination, Instrumented",
                  "Total Tax Revenue, %GDP",
                  "Change in Inflation with GDP Deflator, %"),
  Source = c("OECD 2022a","OECD 2022b","OECD 2022f","OECD 2022g","OECD 2022d",
             "OECD 2022e","OECD 2022c","OECD 2022h","OECD 2022i","World Bank 2022")
)
# create the object to output in plots 
VarTable_V <- kbl(VarTable, caption = "Table 1: Variables") %>% 
  kable_styling(bootstrap_options = c("striped", "hover","condensed"))
VarTable_V
```


### Overall Summary Statistics
```{r summary stats }
#stargazer wont make a table if there are NA or type char
# I don't like the way it outputs, even in html. Using Kable
#stargazer(X2, title = "Table 2: Summary Statistics", type = "html", omit = c("time1") )

Descriptive1 <- matrix( 
  c(
      c("hur"),nrow(X2), round(mean(X2$hur),3),round(sd(X2$hur),3),round(min(X2$hur),3),
      round(max(X2$hur),3),
      
      c("taxcorp"),nrow(X2),round(mean(X2$taxcorp),3),  round(sd(X2$taxcorp),3), 
      round(min(X2$taxcorp),3),round(max(X2$taxcorp),3),
      
      c("ud"),nrow(X2),round(mean(X2$ud),3),round(sd(X2$ud),3), round(min(X2$ud),3),
      round(max(X2$ud),3),
      
      c("pslm"),nrow(X2),round(mean(X2$pslm),3),round(sd(X2$pslm),3),round(min(X2$pslm),3),
      round(max(X2$pslm),3),
      
      c("epl"),nrow(X2),round(mean(X2$epl),3), round(sd(X2$epl),3),
      round(min(X2$epl),3),round(max(X2$epl),3),
      
      c("wsc"),nrow(X2),round(mean(X2$wsc),3), round(sd(X2$wsc),3),
      round(min(X2$wsc),3),round(max(X2$wsc),3),
      
      c("cbc"),nrow(X2),round(mean(X2$cbc),3),
      round(sd(X2$cbc),3),round(min(X2$cbc),3),round(max(X2$cbc),3),
      
      c("brr1y"),nrow(X2), round(mean(X2$brr1y),3), round(sd(X2$brr1y),3),
      round(min(X2$brr1y),3),round(max(X2$brr1y),3),
      
      c("ttr"),nrow(X2), round(mean(X2$ttr),3), round(sd(X2$ttr),3), round(min(X2$ttr),3),
      round(max(X2$ttr),3),
      
      c("inf_gdpd"),nrow(X2),round(mean(X2$inf_gdpd),3),round(sd(X2$inf_gdpd),3),
      round(min(X2$inf_gdpd),3),round(max(X2$inf_gdpd),3)
    ),
  ncol = 6, byrow = T)

colnames(Descriptive1) <- c("Statistic","N", "Mean", "SD","Min","Max")
#rownames(Descriptive1) <- c("hur","taxcorp","ud","pslm","epl","wsc","cbc","brr1y","ttr","inf_gdpd")

Descriptive1_V <- kbl(Descriptive1, caption = "Table 2: Variable Summary Statistics, All Observations") %>% 
  kable_styling(bootstrap_options = c("striped", "hover","condensed")) 
Descriptive1_V
```

###  Figures 3 and 4, HUR vs. Taxcorp, by year
```{r, HUR v taxcorp annual Figure 2}
HVTAM <- X2 %>% select(time1, hur, taxcorp) %>% 
  group_by(time1) %>% 
  drop_na() %>% 
  summarize(hur_mean=mean(hur), taxcorp_mean = mean(taxcorp))

colors <- c("hur" = "#EE119A", "taxcorp" = "#11EE65")

HVTAMV <- ggplot(HVTAM, aes(x=time1))+
  geom_line(aes(y=hur_mean, color = "hur"), show.legend = T)+
  geom_line(aes(y=taxcorp_mean, color = "taxcorp"), show.legend = T)+
  labs(x="Year", y="Mean, %", 
       title = "Figure 2: hur and taxcorp, means by year",
       color = "Legend", caption = "32 OECD Countries from 2001-2019 N = 357")+
  ylim(0,10)
HVTAMV # Fix Legend and colors

X3 <- X2
X3$time1 <- as.factor(X3$time1)
HVTAMV1 <- ggplot(data = X3, aes(x=time1))+
  geom_boxplot(aes(y=hur, color="hur"), show.legend = T)+
  geom_boxplot(aes(y=taxcorp, color="taxcorp"), show.legend = T)+
  theme(axis.text.x = element_text(angle = 90, size = 7))+
  labs(title = "Figure 3: Time Factor hur and taxcorp boxplots", x="Year", y="Percent" )
HVTAMV1
```

### Figures 5 and 6: HUR vs. taxcorp, by country
```{r}
HVTIM <- X2 %>% select(location1, hur, taxcorp, wsc, epl) %>% 
  group_by(location1) %>% 
  drop_na() %>% 
  summarise(hur_mean1=mean(hur), taxcorp_mean1= mean(taxcorp), wsc_mean=mean(wsc),
            epl_mean=mean(epl))

HVTIMV <- ggplot(HVTIM, aes(x=location1, group=1))+
  geom_line(aes(y=hur_mean1, color = "hur"), show.legend = T)+
  geom_line(aes(y=taxcorp_mean1, color= "taxcorp"), show.legend = T)+
  #geom_col(aes(y=wsc_mean, color = "wsc"), alpha = 0.2, position = position_dodge2(preserve = "single"), show.legend = T)+
  geom_col(aes(y=epl_mean, color = "epl"), color= "#2CEE11", alpha = 0.5)+
  labs(x="Location / Individual", y="Mean, %", 
       title = "Figure 4: hur and taxcorp, means by location",
       subtitle = "with mean of wsc instrument for location",
       color = "Legend",caption = "32 OECD Countries from 2001-2019 N = 357")+
  theme(axis.text.x = element_text(angle = 90, size = 7))
HVTIMV

X3$location1 <- as.factor(X3$location1)
HVTIMV1 <- ggplot(data = X3, aes(x=location1))+
  geom_boxplot(aes(y=hur, color="hur"), show.legend = T)+
  geom_boxplot(aes(y=taxcorp, color="taxcorp"), show.legend = T)+
  theme(axis.text.x = element_text(angle = 90, size = 7))+
  labs(title = "Figure 5: Location Factor hur and taxcorp", x="Location", y="Percent" )
HVTIMV1
```
### Figures 7 and 8
* Histograms showing distribution of instruments
* geom_bar counts the number of occurence of each instrumented value through the dataset.
```{r WSC bar chart}
epl_bar <- ggplot(data = X2)+
  geom_bar(aes(epl), width = 0.1, fill = "#112CEE", show.legend = F)+
  labs(x= "epl Instrument", y="Count",title = "Figure 6: Employment Protection Legislation, occurence of levels", caption = "N=357")
wsc_bar <- ggplot(data=X2)+
  geom_bar(aes(wsc), fill = "#112CEE", show.legend = F)+
  labs(x="wsc Instrument", y="Count", title = "Figure 7: Wage Setting Coordination, occurence of type", caption = "N=357")

epl_bar;wsc_bar

```


### Figures 8-14: Facet Wrapped Charts for paper
* check figure numbers
```{r Facet wrapped charts}
#Facet charts
#hur vs taxcorp by year
FW1 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=taxcorp,y=hur), color = "#EED311" ) + 
  geom_smooth(mapping=aes(x=taxcorp,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1) + labs(x = "taxcorp", y="hur", title = "Figure 8: hur vs taxcorp time factor")
FW11 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=taxcorp,y=hur), color = "#EED311" ) + 
  geom_smooth(mapping=aes(x=taxcorp,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1) + labs(x = "taxcorp", y="hur", title = "Figure 8: hur vs taxcorp location factor")

#hur vs ud
FW2 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=ud,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=ud,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1)+ labs(x = "ud", y="hur", title = "Figure 9: hur vs ud time factor")
FW21 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=ud,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=ud,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1)+ labs(x = "ud", y="hur", title = "Figure 9: hur vs ud location factor")

#hur vs cbc
FW3 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=cbc,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=cbc,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1)+ labs(x = "cbc", y="hur", title = "Figure 10: hur vs cbc time factor")
FW31 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=cbc,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=cbc,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1)+ labs(x = "cbc", y="hur", title = "Figure 10: hur vs cbc location factor")

#hur vs brr
FW4 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=brr1y,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=brr1y,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1)+ labs(x = "brr", y="hur", title = "Figure 11: hur vs brr1y time factor")
FW41 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=brr1y,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=brr1y,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1)+ labs(x = "brr", y="hur", title = "Figure 11: hur vs brr1y location factor")

#hur vs pslm
FW5 <- ggplot(data=X2)+
  geom_point(mapping= aes(x=pslm,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=pslm, y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1)+ labs(x="pslm", y="hur", title = "Figure 12: ur vs pslm time factor")
FW51 <- ggplot(data=X2)+
  geom_point(mapping= aes(x=pslm,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=pslm, y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1)+ labs(x="pslm", y="hur", title = "Figure 12: ur vs pslm location factor")


#hur vs ttr
FW7 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=ttr,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=ttr,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1)+ labs(x = "ttr", y="hur", title = "Figure 13: hur vs ttr time factor")
FW71 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=ttr,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=ttr,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1)+ labs(x = "ttr", y="hur", title = "Figure 13: hur vs ttr location factor")

#hur vs delta_i
FW8 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=inf_gdpd,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=inf_gdpd,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1)+ labs(x = "inf_gdpd", y="hur", title = "Figure 14: hur vs inf_gdpd time factor")
FW81 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=inf_gdpd,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=inf_gdpd,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1)+ labs(x = "inf_gdpd", y="hur", title = "Figure 14: hur vs inf_gdpd location factor")

FW1; FW11;FW2;FW21;FW3;FW31;FW4;FW41;FW5;FW51;FW7;FW71;FW8;FW81
```
## Data Analysis, with experiments and tests

* details of use of the plm package are found here <https://cran.r-project.org/web/packages/plm/plm.pdf>
* "getting started in Fixed/Random effects models using R"
<https://rstudio-pubs-static.s3.amazonaws.com/372492_3e05f38dd3f248e89cdedd317d603b9a.html>

```{r Data Analysis with tests and Experiments}
#P_data created in step 1
# Experiments with lm and plm
#OLS per "getting started"
#require(car)
# ols <- lm(hur~taxcorp, data = P_data)
# yhat <- ols$fitted
# Plot1 <- ggplot(data=P_data, aes(x=taxcorp))+
#   geom_point(mapping=aes(x=taxcorp,y=hur), color = "#EED311" ) + 
#   geom_smooth(mapping=aes(x=taxcorp,y=hur), method = lm, se=F, color = "#112CEE")
# fixed.dum <- lm(hur~taxcorp + factor(location1)-1, data=P_data)
# yhat1 <- fixed.dum$fitted
# Plot2 <- sp(yhat1~P_data$taxcorp|P_data$location1, xlab="taxcorp", ylab=yhat,
#            boxplot=F, smooth=F)

# Plot3 <- scatterplot(data=P_data,hur~time1|location1)
#OLS with LM, no instruments from X2
Reg01 <- lm(hur~taxcorp + ud + cbc + brr1y + pslm +ttr + inf_gdpd, data=X2)
#OLS with plm, no instruments from P_data
Reg02 <- plm(hur~taxcorp + ud + cbc + brr1y + pslm + ttr + inf_gdpd, data=P_data)
#OLS with lm, instruments, from P_data - include as baseline
Reg03 <- lm(hur~taxcorp + ud + cbc + brr1y + pslm +wsc +epl +ttr + inf_gdpd, data=P_data)
#OLS with plm, instruments from P_data
Reg04 <- plm(hur~taxcorp + ud + cbc + brr1y + pslm +wsc +epl +ttr + inf_gdpd, data=P_data)
#Pooled, preferred model (clean)
P2SLS <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                |.-taxcorp + epl + wsc, 
                data = P_data, model = "pooling")
#Pooled Experiment, verified all the same as P2SLS
P2SLS_1 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                 |. -taxcorp + epl + wsc, 
                 data = X2, model = "pooling")
P2SLS_2 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                 | + ud + cbc + brr1y + ttr + inf_gdpd + pslm + epl + wsc, 
                 data = P_data, model = "pooling")
#Between estimator 
B2SLS <- update(P2SLS, model="between") #this is bad
#FE or within estimator
FE2SLS <- update(P2SLS, model="within")
#Random estimator
RE2SLS <- update(P2SLS, model="random")
#Check output - looks bad lol
# stargazer(Reg01,Reg02,Reg03,Reg04,P2SLS,B2SLS, type = "text")


#Time Fixed Effects and experiments
#Original Form - best fit model
FE2SLS_T <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd +pslm
                +factor(time1)
                |. -taxcorp + epl + wsc, 
                data = P_data, model = "within")
#added "time" effect to arguments
FE2SLS_T1 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd +pslm
                +factor(time1)
                |. -taxcorp + epl + wsc, 
                data = P_data, model = "within", effect = "time")
#Streamlined
TFE2SLS1 <- update(P2SLS, model="within", effect = "time")

#check output
#stargazer(FE2SLS_T,FE2SLS_T1,TFE2SLS1, type = "text")

#Individual FE and Experiments
#Original model - all are exact same
FE2SLS_L <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                +factor(location1)
                |. -taxcorp + epl + wsc, 
                data = P_data, model = "within")
# added "individual" effect to arguments - specifying effect prevents same results as_TL regs
FE2SLS_L1 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                +factor(location1)
                |. -taxcorp + epl + wsc, 
                data = P_data, model = "within", effect = "individual")
# streamlined, yes same as above
FE2SLS_L2 <- update(P2SLS, model="within", effect="individual")
#check output
#stargazer(FE2SLS_L, FE2SLS_L1, FE2SLS_L2, type = "text")

#Time and Individual FE
#Original model
FE2SLS_TL <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                 +factor(time1) +factor(location1)
                 |. -taxcorp + epl + wsc, 
                 data = P_data, model = "within", effect = "twoways")
# removed twoways effect from argument - best fit
FE2SLS_TL1 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                 +factor(time1) +factor(location1)
                 |. -taxcorp + epl + wsc, 
                 data = P_data, model = "within")
#streamlined1, added back effect
FE2SLS_TL2 <- update(P2SLS, model="within", effect="twoways")
#streamlined2, removed effect from update 
FE2SLS_TL3 <- update(P2SLS, model="within")
#check output
#stargazer(FE2SLS_TL, FE2SLS_TL1, FE2SLS_TL2, FE2SLS_TL3, type = "text")

#LM for random effects vs OLS
PX1 <- plmtest(P2SLS)
##BP LM for random effects, null is no panel effect, include
PX11 <- plmtest(P2SLS, type=c("bp"))

# Testing time-fixed effects, null is no FTE
PX12 <- plmtest(FE2SLS, c("time"), type=c("bp"))

#LM for fixed effects vs OLS, generic 
PX2 <- pFtest(FE2SLS,P2SLS)
#LM for fixed effectsw vs OLS, Time specific
PX21 <- pFtest(TFE2SLS1, FE2SLS)
#hausman test for fixed vs random effects, null=random preferred
PX6 <- phtest(RE2SLS,FE2SLS)

#Cross sectional dependance, null is that residuals across entities are not
#correlated. Used to test whether residuals are correlated across entities. CS dep 
#can lead to bias in test results
#BPLM for cross sectional dependence in panels
PX7 <- pcdtest(FE2SLS, test=c("lm"))
# Pesaran cross sectional dependence
PX71 <- pcdtest(FE2SLS, test=c("cd"))

# Breusch Godfrey for SC in panels. Null is no SC. May not matter as a relativily 
# short study period T=19
PX8 <- pbgtest(FE2SLS)
#ADF for stochastic trends, Null- series has unit root.
require(tseries)
PX9 <- adf.test(P_data$hur, k=2)
#BP null is homoskedasticity, alternative: heteroskadasticity
PX0 <- bptest(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                +factor(location1), data=P_data, studentize = F)

PX11;PX2;PX6;PX7;PX71;PX8;PX9;PX0
```


### Final Output Table 3
*NOTE: the p values at the bottom of the table have to be fixed in the output document, with code (to copypasta): 
<span>&#42;</span> to replace asterisks that cause bolding
```{r Regression Output}
FOutput <- stargazer(Reg03, P2SLS,  FE2SLS_T1, FE2SLS_L1,  FE2SLS_TL, RE2SLS,
          type = "html", title = "Table 3: Regressions to Explain the Unemployment Rate", 
          model.names = F,
          keep = c("taxcorp", "ud","cbc","brr1y", "ttr","inf_gdpd", "hur", "pslm", "epl","wsc", "Constant"),
          column.labels = c("OLS", "2SLS Pooled", 
                            "2SLS Time Fixed Effects", "2SLS Individual Fixed Effects", "2SLS Time and Individual Fixed Effects", "2SLS Random Effects")
          )
# run in console, it's messy
```
<table style="text-align:center"><caption><strong>Table 3: Regressions to Explain the Unemployment Rate</strong></caption>
<tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="6"><em>Dependent variable:</em></td></tr>
<tr><td></td><td colspan="6" style="border-bottom: 1px solid black"></td></tr>
<tr><td style="text-align:left"></td><td colspan="6">hur</td></tr>
<tr><td style="text-align:left"></td><td>OLS</td><td>2SLS Pooled</td><td>2SLS Time Fixed Effects</td><td>2SLS Individual Fixed Effects</td><td>2SLS Time and Individual Fixed Effects</td><td>2SLS Random Effects</td></tr>
<tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td><td>(4)</td><td>(5)</td><td>(6)</td></tr>
<tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">taxcorp</td><td>-0.501<sup>***</sup></td><td>-12.904</td><td>-3.267</td><td>-3.752<sup>*</sup></td><td>-1.992</td><td>-3.208</td></tr>
<tr><td style="text-align:left"></td><td>(0.134)</td><td>(14.516)</td><td>(4.088)</td><td>(2.026)</td><td>(1.930)</td><td>(2.238)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">ud</td><td>-0.004</td><td>0.263</td><td>0.033</td><td>0.011</td><td>0.119</td><td>-0.001</td></tr>
<tr><td style="text-align:left"></td><td>(0.014)</td><td>(0.351)</td><td>(0.102)</td><td>(0.068)</td><td>(0.129)</td><td>(0.052)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">cbc</td><td>0.042<sup>***</sup></td><td>0.158</td><td>0.054</td><td>0.057</td><td>0.075<sup>**</sup></td><td>-0.002</td></tr>
<tr><td style="text-align:left"></td><td>(0.011)</td><td>(0.160)</td><td>(0.044)</td><td>(0.049)</td><td>(0.037)</td><td>(0.038)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">brr1y</td><td>-0.006</td><td>0.246</td><td>0.038</td><td>0.039<sup>**</sup></td><td>0.036<sup>***</sup></td><td>0.039<sup>***</sup></td></tr>
<tr><td style="text-align:left"></td><td>(0.010)</td><td>(0.321)</td><td>(0.091)</td><td>(0.016)</td><td>(0.013)</td><td>(0.014)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">pslm</td><td>1.727<sup>***</sup></td><td>-9.934</td><td>-0.885</td><td>1.457<sup>*</sup></td><td>2.180<sup>***</sup></td><td>1.637<sup>**</sup></td></tr>
<tr><td style="text-align:left"></td><td>(0.320)</td><td>(13.666)</td><td>(3.725)</td><td>(0.779)</td><td>(0.558)</td><td>(0.825)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">wsc</td><td>-1.306<sup>***</sup></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left"></td><td>(0.174)</td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">epl</td><td>0.543<sup>***</sup></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left"></td><td>(0.202)</td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">ttr</td><td>-0.137<sup>***</sup></td><td>-0.700</td><td>-0.260</td><td>0.518</td><td>0.220</td><td>0.322</td></tr>
<tr><td style="text-align:left"></td><td>(0.048)</td><td>(0.704)</td><td>(0.229)</td><td>(0.343)</td><td>(0.356)</td><td>(0.331)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">inf_gdpd</td><td>-0.207<sup>**</sup></td><td>2.078</td><td>0.526</td><td>0.156</td><td>0.019</td><td>0.123</td></tr>
<tr><td style="text-align:left"></td><td>(0.102)</td><td>(2.461)</td><td>(0.649)</td><td>(0.279)</td><td>(0.198)</td><td>(0.304)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">Constant</td><td>11.818<sup>***</sup></td><td>53.930</td><td></td><td></td><td></td><td>2.135</td></tr>
<tr><td style="text-align:left"></td><td>(1.309)</td><td>(50.013)</td><td></td><td></td><td></td><td>(4.595)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Observations</td><td>357</td><td>357</td><td>357</td><td>357</td><td>357</td><td>357</td></tr>
<tr><td style="text-align:left">R<sup>2</sup></td><td>0.367</td><td>0.049</td><td>0.103</td><td>0.189</td><td>0.264</td><td>0.202</td></tr>
<tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.350</td><td>0.030</td><td>0.035</td><td>0.092</td><td>0.127</td><td>0.186</td></tr>
<tr><td style="text-align:left">Residual Std. Error</td><td>2.881 (df = 347)</td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">F Statistic</td><td>22.338<sup>***</sup> (df = 9; 347)</td><td>5.469</td><td>43.285<sup>***</sup></td><td>96.725<sup>***</sup></td><td>144.909<sup>***</sup></td><td>113.648<sup>***</sup></td></tr>
<tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="6" style="text-align:right"><sup><span>&#42;</span></sup>p<0.1; <sup><span>&#42;</span><span>&#42;</span></sup>p<0.05; <sup><span>&#42;</span><span>&#42;</span><span>&#42;</span></sup>p<0.01</td></tr>
</table>