First Steps

Overall Project Stuff

  • Data File: master.xlsx
  • Gold: #EED311
  • Blue: #112CEE
  • Green: #11EE65
  • Red: #EE119A

Read in required libraries

Loading required package: readxl
Loading required package: tidyverse
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
-- Attaching packages ---------------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.1 --
v ggplot2 3.3.5     v purrr   0.3.4
v tibble  3.1.6     v dplyr   1.0.7
v tidyr   1.1.4     v stringr 1.4.0
v readr   2.1.1     v forcats 0.5.1
-- Conflicts ------------------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
Loading required package: janitor

Attaching package: ‘janitor’

The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test

Loading required package: kableExtra

Attaching package: ‘kableExtra’

The following object is masked from ‘package:dplyr’:

    group_rows

Loading required package: stargazer

Please cite as: 

 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer 

Loading required package: plm

Attaching package: ‘plm’

The following objects are masked from ‘package:dplyr’:

    between, lag, lead

Loading required package: lmtest
Loading required package: zoo

Attaching package: ‘zoo’

The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric

Read in Data

  • original df was cleaned wholly in Excel and produced “X1.csv”
  • we recreated X1 as X2 with thru data manipulation through R
  • the df “DirtyD” was an attempt to recreate a dataset ran by Prof K from master.xlsx, R don’t like it
# attempting 540 observations
#DirtyD <- read_excel("C:\\Users\\hisjo\\OneDrive\\Documents\\Sac State\\ECON 145 Economic Research Methods\\Data\\master.xlsx") %>% 
#  clean_names() %>% #piping prevents an umlaut over the i
#  select(-c(l1_inf_gdpd)) %>% #Extranious data, just in case         
#  #transform(cbc = as.numeric(cbc)) %>% #cbc had some char, convert and coerce NAs
#  filter(time1!=2020) %>%  #Remove 2020 Entries
#  drop_na()
#N = 546
  
X2 <- read_excel("C:\\Users\\hisjo\\OneDrive\\Documents\\Sac State\\ECON 145 Economic Research Methods\\Data\\master.xlsx") %>% 
  clean_names() %>% 
  transform(cbc = as.numeric(cbc)) %>% #cbc had some char, convert and coerce NAs
  filter(time1 != 2020) %>% # Remove 2020 from our dataset
  select(-c(l1_inf_gdpd)) %>% #Extraneous data dropped (lagged gdpd)
  drop_na() # remove NA
Warning in eval(substitute(list(...)), `_data`, parent.frame()) :
  NAs introduced by coercion
#N=357, nCol=12 

#Tell R and PLM that X2 is panel data with time1=time and location1=id
#creates a new data frame with index column, moving forward I think this is
#the correct way to handle this with PLM
P_data <- pdata.frame(X2, index = c("location1", "time1"))
# display to show transformation
X2
P_data

Variables Descriptions and Source

VarTable <- data.frame(
  Variable = c("hur", "taxcorp","ud","cbc","brr1y","pslm","epl","wsc","ttr","inf_gdpd"),
  Description = c("Annual Harmonized Unemployment Rate, %",
                  "Tax on Corporate Profits, % GDP",
                  "Union Density Ratio",
                  "Collective Bargaining Coverage, %",
                  "Benefit Replacement Rate at 1 year, %",
                  "Public Spending on Labor Markets, %GDP",
                  "Employment Protection Legislation, Instrumented",
                  "Wage Setting Coordination, Instrumented",
                  "Total Tax Revenue, %GDP",
                  "Change in Inflation with GDP Deflator, %"),
  Source = c("OECD 2022a","OECD 2022b","OECD 2022f","OECD 2022g","OECD 2022d",
             "OECD 2022e","OECD 2022c","OECD 2022h","OECD 2022i","World Bank 2022")
)
# create the object to output in plots 
VarTable_V <- kbl(VarTable, caption = "Table 1: Variables") %>% 
  kable_styling(bootstrap_options = c("striped", "hover","condensed"))
VarTable_V
Table 1: Variables
Variable Description Source
hur Annual Harmonized Unemployment Rate, % OECD 2022a
taxcorp Tax on Corporate Profits, % GDP OECD 2022b
ud Union Density Ratio OECD 2022f
cbc Collective Bargaining Coverage, % OECD 2022g
brr1y Benefit Replacement Rate at 1 year, % OECD 2022d
pslm Public Spending on Labor Markets, %GDP OECD 2022e
epl Employment Protection Legislation, Instrumented OECD 2022c
wsc Wage Setting Coordination, Instrumented OECD 2022h
ttr Total Tax Revenue, %GDP OECD 2022i
inf_gdpd Change in Inflation with GDP Deflator, % World Bank 2022

Overall Summary Statistics

#stargazer wont make a table if there are NA or type char
# I don't like the way it outputs, even in html. Using Kable
#stargazer(X2, title = "Table 2: Summary Statistics", type = "html", omit = c("time1") )

Descriptive1 <- matrix( 
  c(
      c("hur"),nrow(X2), round(mean(X2$hur),3),round(sd(X2$hur),3),round(min(X2$hur),3),
      round(max(X2$hur),3),
      
      c("taxcorp"),nrow(X2),round(mean(X2$taxcorp),3),  round(sd(X2$taxcorp),3), 
      round(min(X2$taxcorp),3),round(max(X2$taxcorp),3),
      
      c("ud"),nrow(X2),round(mean(X2$ud),3),round(sd(X2$ud),3), round(min(X2$ud),3),
      round(max(X2$ud),3),
      
      c("pslm"),nrow(X2),round(mean(X2$pslm),3),round(sd(X2$pslm),3),round(min(X2$pslm),3),
      round(max(X2$pslm),3),
      
      c("epl"),nrow(X2),round(mean(X2$epl),3), round(sd(X2$epl),3),
      round(min(X2$epl),3),round(max(X2$epl),3),
      
      c("wsc"),nrow(X2),round(mean(X2$wsc),3), round(sd(X2$wsc),3),
      round(min(X2$wsc),3),round(max(X2$wsc),3),
      
      c("cbc"),nrow(X2),round(mean(X2$cbc),3),
      round(sd(X2$cbc),3),round(min(X2$cbc),3),round(max(X2$cbc),3),
      
      c("brr1y"),nrow(X2), round(mean(X2$brr1y),3), round(sd(X2$brr1y),3),
      round(min(X2$brr1y),3),round(max(X2$brr1y),3),
      
      c("ttr"),nrow(X2), round(mean(X2$ttr),3), round(sd(X2$ttr),3), round(min(X2$ttr),3),
      round(max(X2$ttr),3),
      
      c("inf_gdpd"),nrow(X2),round(mean(X2$inf_gdpd),3),round(sd(X2$inf_gdpd),3),
      round(min(X2$inf_gdpd),3),round(max(X2$inf_gdpd),3)
    ),
  ncol = 6, byrow = T)

colnames(Descriptive1) <- c("Statistic","N", "Mean", "SD","Min","Max")
#rownames(Descriptive1) <- c("hur","taxcorp","ud","pslm","epl","wsc","cbc","brr1y","ttr","inf_gdpd")

Descriptive1_V <- kbl(Descriptive1, caption = "Table 2: Variable Summary Statistics, All Observations") %>% 
  kable_styling(bootstrap_options = c("striped", "hover","condensed")) 
Descriptive1_V
Table 2: Variable Summary Statistics, All Observations
Statistic N Mean SD Min Max
hur 357 7.348 3.575 2.267 26.117
taxcorp 357 3.022 1.347 0.583 11.992
ud 357 28.066 18.819 4.5 78
pslm 357 1.488 0.986 0.24 4.3
epl 357 2.117 0.901 0.093 4.583
wsc 357 2.672 1.366 1 5
cbc 357 55.636 31.007 6.1 100
brr1y 357 46.947 24.117 0 86
ttr 357 34.776 6.54 19.613 48.531
inf_gdpd 357 1.834 1.679 -4.62 11.047

Figures 3 and 4, HUR vs. Taxcorp, by year

HVTAM <- X2 %>% select(time1, hur, taxcorp) %>% 
  group_by(time1) %>% 
  drop_na() %>% 
  summarize(hur_mean=mean(hur), taxcorp_mean = mean(taxcorp))

colors <- c("hur" = "#EE119A", "taxcorp" = "#11EE65")

HVTAMV <- ggplot(HVTAM, aes(x=time1))+
  geom_line(aes(y=hur_mean, color = "hur"), show.legend = T)+
  geom_line(aes(y=taxcorp_mean, color = "taxcorp"), show.legend = T)+
  labs(x="Year", y="Mean, %", 
       title = "Figure 2: hur and taxcorp, means by year",
       color = "Legend", caption = "32 OECD Countries from 2001-2019 N = 357")+
  ylim(0,10)
HVTAMV # Fix Legend and colors

X3 <- X2
X3$time1 <- as.factor(X3$time1)
HVTAMV1 <- ggplot(data = X3, aes(x=time1))+
  geom_boxplot(aes(y=hur, color="hur"), show.legend = T)+
  geom_boxplot(aes(y=taxcorp, color="taxcorp"), show.legend = T)+
  theme(axis.text.x = element_text(angle = 90, size = 7))+
  labs(title = "Figure 3: Time Factor hur and taxcorp boxplots", x="Year", y="Percent" )
HVTAMV1

Figures 5 and 6: HUR vs. taxcorp, by country

HVTAM <- X2 %>% select(time1, hur, taxcorp) %>% 
  group_by(time1) %>% 
  drop_na() %>% 
  summarize(hur_mean=mean(hur), taxcorp_mean = mean(taxcorp))

colors <- c("hur" = "#EE119A", "taxcorp" = "#11EE65")

HVTAMV <- ggplot(HVTAM, aes(x=time1))+
  geom_line(aes(y=hur_mean, color = "hur"), show.legend = T)+
  geom_line(aes(y=taxcorp_mean, color = "taxcorp"), show.legend = T)+
  labs(x="Year", y="Mean, %", 
       title = "Figure 2: hur and taxcorp, means by year",
       color = "Legend", caption = "32 OECD Countries from 2001-2019 N = 357")+
  ylim(0,10)
HVTAMV # Fix Legend and colors


X3 <- X2
X3$time1 <- as.factor(X3$time1)
HVTAMV1 <- ggplot(data = X3, aes(x=time1))+
  geom_boxplot(aes(y=hur, color="hur"), show.legend = T)+
  geom_boxplot(aes(y=taxcorp, color="taxcorp"), show.legend = T)+
  theme(axis.text.x = element_text(angle = 90, size = 7))+
  labs(title = "Figure 3: Time Factor hur and taxcorp boxplots", x="Year", y="Percent" )
HVTAMV1

Figures 7 and 8

  • Histograms showing distribution of instruments
  • geom_bar counts the number of occurence of each instrumented value through the dataset.
epl_bar <- ggplot(data = X2)+
  geom_bar(aes(epl), width = 0.1, fill = "#112CEE", show.legend = F)+
  labs(x= "epl Instrument", y="Count",title = "Figure 6: Employment Protection Legislation, occurence of levels", caption = "N=357")
wsc_bar <- ggplot(data=X2)+
  geom_bar(aes(wsc), fill = "#112CEE", show.legend = F)+
  labs(x="wsc Instrument", y="Count", title = "Figure 7: Wage Setting Coordination, occurence of type", caption = "N=357")

epl_bar;wsc_bar
Warning: position_stack requires non-overlapping x intervals

Figures 8-14: Facet Wrapped Charts for paper

  • check figure numbers
#Facet charts
#hur vs taxcorp by year
FW1 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=taxcorp,y=hur), color = "#EED311" ) + 
  geom_smooth(mapping=aes(x=taxcorp,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1) + labs(x = "taxcorp", y="hur", title = "Figure 8: hur vs taxcorp time factor")
FW11 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=taxcorp,y=hur), color = "#EED311" ) + 
  geom_smooth(mapping=aes(x=taxcorp,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1) + labs(x = "taxcorp", y="hur", title = "Figure 8: hur vs taxcorp location factor")

#hur vs ud
FW2 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=ud,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=ud,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1)+ labs(x = "ud", y="hur", title = "Figure 9: hur vs ud time factor")
FW21 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=ud,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=ud,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1)+ labs(x = "ud", y="hur", title = "Figure 9: hur vs ud location factor")

#hur vs cbc
FW3 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=cbc,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=cbc,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1)+ labs(x = "cbc", y="hur", title = "Figure 10: hur vs cbc time factor")
FW31 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=cbc,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=cbc,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1)+ labs(x = "cbc", y="hur", title = "Figure 10: hur vs cbc location factor")

#hur vs brr
FW4 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=brr1y,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=brr1y,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1)+ labs(x = "brr", y="hur", title = "Figure 11: hur vs brr1y time factor")
FW41 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=brr1y,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=brr1y,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1)+ labs(x = "brr", y="hur", title = "Figure 11: hur vs brr1y location factor")

#hur vs pslm
FW5 <- ggplot(data=X2)+
  geom_point(mapping= aes(x=pslm,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=pslm, y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1)+ labs(x="pslm", y="hur", title = "Figure 12: ur vs pslm time factor")
FW51 <- ggplot(data=X2)+
  geom_point(mapping= aes(x=pslm,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=pslm, y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1)+ labs(x="pslm", y="hur", title = "Figure 12: ur vs pslm location factor")


#hur vs ttr
FW7 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=ttr,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=ttr,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1)+ labs(x = "ttr", y="hur", title = "Figure 13: hur vs ttr time factor")
FW71 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=ttr,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=ttr,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1)+ labs(x = "ttr", y="hur", title = "Figure 13: hur vs ttr location factor")

#hur vs delta_i
FW8 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=inf_gdpd,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=inf_gdpd,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1)+ labs(x = "inf_gdpd", y="hur", title = "Figure 14: hur vs inf_gdpd time factor")
FW81 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=inf_gdpd,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=inf_gdpd,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1)+ labs(x = "inf_gdpd", y="hur", title = "Figure 14: hur vs inf_gdpd location factor")

FW1; FW11;FW2;FW21;FW3;FW31;FW4;FW41;FW5;FW51;FW7;FW71;FW8;FW81
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'

Data Analysis, with experiments and tests

#P_data created in step 1
# Experiments with lm and plm
#OLS per "getting started"
#require(car)
# ols <- lm(hur~taxcorp, data = P_data)
# yhat <- ols$fitted
# Plot1 <- ggplot(data=P_data, aes(x=taxcorp))+
#   geom_point(mapping=aes(x=taxcorp,y=hur), color = "#EED311" ) + 
#   geom_smooth(mapping=aes(x=taxcorp,y=hur), method = lm, se=F, color = "#112CEE")
# fixed.dum <- lm(hur~taxcorp + factor(location1)-1, data=P_data)
# yhat1 <- fixed.dum$fitted
# Plot2 <- sp(yhat1~P_data$taxcorp|P_data$location1, xlab="taxcorp", ylab=yhat,
#            boxplot=F, smooth=F)

# Plot3 <- scatterplot(data=P_data,hur~time1|location1)
#OLS with LM, no instruments from X2
Reg01 <- lm(hur~taxcorp + ud + cbc + brr1y + pslm +ttr + inf_gdpd, data=X2)
#OLS with plm, no instruments from P_data
Reg02 <- plm(hur~taxcorp + ud + cbc + brr1y + pslm + ttr + inf_gdpd, data=P_data)
#OLS with lm, instruments, from P_data - include as baseline
Reg03 <- lm(hur~taxcorp + ud + cbc + brr1y + pslm +wsc +epl +ttr + inf_gdpd, data=P_data)
#OLS with plm, instruments from P_data
Reg04 <- plm(hur~taxcorp + ud + cbc + brr1y + pslm +wsc +epl +ttr + inf_gdpd, data=P_data)
#Pooled, preferred model (clean)
P2SLS <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                |.-taxcorp + epl + wsc, 
                data = P_data, model = "pooling")
#Pooled Experiment, verified all the same as P2SLS
P2SLS_1 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                 |. -taxcorp + epl + wsc, 
                 data = X2, model = "pooling")
P2SLS_2 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                 | + ud + cbc + brr1y + ttr + inf_gdpd + pslm + epl + wsc, 
                 data = P_data, model = "pooling")
#Between estimator 
B2SLS <- update(P2SLS, model="between") #this is bad
#FE or within estimator
FE2SLS <- update(P2SLS, model="within")
#Random estimator
RE2SLS <- update(P2SLS, model="random")
#Check output - looks bad lol
# stargazer(Reg01,Reg02,Reg03,Reg04,P2SLS,B2SLS, type = "text")


#Time Fixed Effects and experiments
#Original Form - best fit model
FE2SLS_T <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd +pslm
                +factor(time1)
                |. -taxcorp + epl + wsc, 
                data = P_data, model = "within")
#added "time" effect to arguments
FE2SLS_T1 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd +pslm
                +factor(time1)
                |. -taxcorp + epl + wsc, 
                data = P_data, model = "within", effect = "time")
#Streamlined
TFE2SLS1 <- update(P2SLS, model="within", effect = "time")

#check output
#stargazer(FE2SLS_T,FE2SLS_T1,TFE2SLS1, type = "text")

#Individual FE and Experiments
#Original model - all are exact same
FE2SLS_L <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                +factor(location1)
                |. -taxcorp + epl + wsc, 
                data = P_data, model = "within")
# added "individual" effect to arguments - specifying effect prevents same results as_TL regs
FE2SLS_L1 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                +factor(location1)
                |. -taxcorp + epl + wsc, 
                data = P_data, model = "within", effect = "individual")
# streamlined, yes same as above
FE2SLS_L2 <- update(P2SLS, model="within", effect="individual")
#check output
#stargazer(FE2SLS_L, FE2SLS_L1, FE2SLS_L2, type = "text")

#Time and Individual FE
#Original model
FE2SLS_TL <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                 +factor(time1) +factor(location1)
                 |. -taxcorp + epl + wsc, 
                 data = P_data, model = "within", effect = "twoways")
# removed twoways effect from argument - best fit
FE2SLS_TL1 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                 +factor(time1) +factor(location1)
                 |. -taxcorp + epl + wsc, 
                 data = P_data, model = "within")
#streamlined1, added back effect
FE2SLS_TL2 <- update(P2SLS, model="within", effect="twoways")
#streamlined2, removed effect from update 
FE2SLS_TL3 <- update(P2SLS, model="within")
#check output
#stargazer(FE2SLS_TL, FE2SLS_TL1, FE2SLS_TL2, FE2SLS_TL3, type = "text")

#LM for random effects vs OLS
PX1 <- plmtest(P2SLS)
##BP LM for random effects, null is no panel effect, include
PX11 <- plmtest(P2SLS, type=c("bp"))

# Testing time-fixed effects, null is no FTE
PX12 <- plmtest(FE2SLS, c("time"), type=c("bp"))

#LM for fixed effects vs OLS, generic 
PX2 <- pFtest(FE2SLS,P2SLS)
#LM for fixed effectsw vs OLS, Time specific
PX21 <- pFtest(TFE2SLS1, FE2SLS)
Warning in pf(stat, df1, df2, lower.tail = FALSE) : NaNs produced
#hausman test for fixed vs random effects, null=random preferred
PX6 <- phtest(RE2SLS,FE2SLS)

#Cross sectional dependance, null is that residuals across entities are not
#correlated. Used to test whether residuals are correlated across entities. CS dep 
#can lead to bias in test results
#BPLM for cross sectional dependence in panels
PX7 <- pcdtest(FE2SLS, test=c("lm"))
Warning in pcdres(tres = tres, n = n, w = w, form = paste(deparse(x$formula)),  :
  Some pairs of individuals (20 percent) do not have any or just one time period in common and have been omitted from calculation
# Pesaran cross sectional dependence
PX71 <- pcdtest(FE2SLS, test=c("cd"))
Warning in pcdres(tres = tres, n = n, w = w, form = paste(deparse(x$formula)),  :
  Some pairs of individuals (20 percent) do not have any or just one time period in common and have been omitted from calculation
# Breusch Godfrey for SC in panels. Null is no SC. May not matter as a relativily 
# short study period T=19
PX8 <- pbgtest(FE2SLS)
#ADF for stochastic trends, Null- series has unit root.
require(tseries)
Loading required package: tseries
Warning: package ‘tseries’ was built under R version 4.0.5
Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo 

    ‘tseries’ version: 0.10-50

    ‘tseries’ is a package for time series analysis and computational finance.

    See ‘library(help="tseries")’ for details.
PX9 <- adf.test(P_data$hur, k=2)
Warning in adf.test(P_data$hur, k = 2) :
  p-value smaller than printed p-value
#BP null is homoskedasticity, alternative: heteroskadasticity
PX0 <- bptest(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                +factor(location1), data=P_data, studentize = F)

PX11;PX2;PX6;PX7;PX71;PX8;PX9;PX0

    Lagrange Multiplier Test - (Breusch-Pagan) for unbalanced panels

data:  hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm | . -  ...
chisq = 571.15, df = 1, p-value < 2.2e-16
alternative hypothesis: significant effects


    F test for individual effects

data:  hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm | . -  ...
F = 378.68, df1 = 31, df2 = 318, p-value < 2.2e-16
alternative hypothesis: significant effects


    Hausman Test

data:  hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm | . -  ...
chisq = 3.5086, df = 7, p-value = 0.8343
alternative hypothesis: one model is inconsistent


    Breusch-Pagan LM test for cross-sectional dependence in panels

data:  hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm | . -     taxcorp + epl + wsc
chisq = 926.79, df = 447, p-value < 2.2e-16
alternative hypothesis: cross-sectional dependence


    Pesaran CD test for cross-sectional dependence in panels

data:  hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm | . -     taxcorp + epl + wsc
z = 4.1856, p-value = 2.844e-05
alternative hypothesis: cross-sectional dependence


    Breusch-Godfrey/Wooldridge test for serial correlation in panel models

data:  hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm | . -  ...
chisq = 188.9, df = 1, p-value < 2.2e-16
alternative hypothesis: serial correlation in idiosyncratic errors


    Augmented Dickey-Fuller Test

data:  P_data$hur
Dickey-Fuller = -4.7542, Lag order = 2, p-value = 0.01
alternative hypothesis: stationary


    Breusch-Pagan test

data:  hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm + factor(location1)
BP = 474.14, df = 38, p-value < 2.2e-16

Final Output Table 3

*NOTE: the p values at the bottom of the table have to be fixed in the output document, with code (to copypasta): * to replace asterisks that cause bolding

FOutput <- stargazer(Reg03, P2SLS,  FE2SLS_T1, FE2SLS_L1,  FE2SLS_TL, RE2SLS,
          type = "html", title = "Table 3: Regressions to Explain the Unemployment Rate", 
          model.names = F,
          keep = c("taxcorp", "ud","cbc","brr1y", "ttr","inf_gdpd", "hur", "pslm", "epl","wsc", "Constant"),
          column.labels = c("OLS", "2SLS Pooled", 
                            "2SLS Time Fixed Effects", "2SLS Individual Fixed Effects", "2SLS Time and Individual Fixed Effects", "2SLS Random Effects")
          )
# run in console, it's messy
Table 3: Regressions to Explain the Unemployment Rate
Dependent variable:
hur
OLS 2SLS Pooled 2SLS Time Fixed Effects 2SLS Individual Fixed Effects 2SLS Time and Individual Fixed Effects 2SLS Random Effects
(1) (2) (3) (4) (5) (6)
taxcorp -0.501*** -12.904 -3.267 -3.752* -1.992 -3.208
(0.134) (14.516) (4.088) (2.026) (1.930) (2.238)
ud -0.004 0.263 0.033 0.011 0.119 -0.001
(0.014) (0.351) (0.102) (0.068) (0.129) (0.052)
cbc 0.042*** 0.158 0.054 0.057 0.075** -0.002
(0.011) (0.160) (0.044) (0.049) (0.037) (0.038)
brr1y -0.006 0.246 0.038 0.039** 0.036*** 0.039***
(0.010) (0.321) (0.091) (0.016) (0.013) (0.014)
pslm 1.727*** -9.934 -0.885 1.457* 2.180*** 1.637**
(0.320) (13.666) (3.725) (0.779) (0.558) (0.825)
wsc -1.306***
(0.174)
epl 0.543***
(0.202)
ttr -0.137*** -0.700 -0.260 0.518 0.220 0.322
(0.048) (0.704) (0.229) (0.343) (0.356) (0.331)
inf_gdpd -0.207** 2.078 0.526 0.156 0.019 0.123
(0.102) (2.461) (0.649) (0.279) (0.198) (0.304)
Constant 11.818*** 53.930 2.135
(1.309) (50.013) (4.595)
Observations 357 357 357 357 357 357
R2 0.367 0.049 0.103 0.189 0.264 0.202
Adjusted R2 0.350 0.030 0.035 0.092 0.127 0.186
Residual Std. Error 2.881 (df = 347)
F Statistic 22.338*** (df = 9; 347) 5.469 43.285*** 96.725*** 144.909*** 113.648***
Note: *p<0.1; **p<0.05; ***p<0.01
---
title: "ECON 145 Data Project Final"
output: 
  html_notebook: default
  always_allow_html: true
---

## First Steps
### Overall Project Stuff
* Data File: master.xlsx
* Gold: #EED311
* Blue: #112CEE
* Green: #11EE65
* Red: #EE119A

### Read in required libraries
```{r packages, echo=F, warning=F}
require(readxl)
require(tidyverse) # i think this gives me dplyr and tidyr
require(janitor) # clean names
require(ggplot2) # data viz
require(kableExtra) #Stat tables
require(stargazer) #Regression tables
# require(ivreg) #Maybe in the future
require(plm) #panel linear models
require(lmtest)


```

### Read in Data
* original df was cleaned wholly in Excel and produced "X1.csv"
* we recreated X1 as X2 with thru data manipulation through R
* the df "DirtyD" was an attempt to recreate a dataset ran by Prof K from master.xlsx, R don't like it
```{r data read in}
# attempting 540 observations
#DirtyD <- read_excel("C:\\Users\\hisjo\\OneDrive\\Documents\\Sac State\\ECON 145 Economic Research Methods\\Data\\master.xlsx") %>% 
#  clean_names() %>% #piping prevents an umlaut over the i
#  select(-c(l1_inf_gdpd)) %>% #Extranious data, just in case         
#  #transform(cbc = as.numeric(cbc)) %>% #cbc had some char, convert and coerce NAs
#  filter(time1!=2020) %>%  #Remove 2020 Entries
#  drop_na()
#N = 546
  
X2 <- read_excel("C:\\Users\\hisjo\\OneDrive\\Documents\\Sac State\\ECON 145 Economic Research Methods\\Data\\master.xlsx") %>% 
  clean_names() %>% 
  transform(cbc = as.numeric(cbc)) %>% #cbc had some char, convert and coerce NAs
  filter(time1 != 2020) %>% # Remove 2020 from our dataset
  select(-c(l1_inf_gdpd)) %>% #Extraneous data dropped (lagged gdpd)
  drop_na() # remove NA
#N=357, nCol=12 

#Tell R and PLM that X2 is panel data with time1=time and location1=id
#creates a new data frame with index column, moving forward I think this is
#the correct way to handle this with PLM
P_data <- pdata.frame(X2, index = c("location1", "time1"))
# display to show transformation
X2
P_data
```

### Variables Descriptions and Source
```{r Variables Table}
VarTable <- data.frame(
  Variable = c("hur", "taxcorp","ud","cbc","brr1y","pslm","epl","wsc","ttr","inf_gdpd"),
  Description = c("Annual Harmonized Unemployment Rate, %",
                  "Tax on Corporate Profits, % GDP",
                  "Union Density Ratio",
                  "Collective Bargaining Coverage, %",
                  "Benefit Replacement Rate at 1 year, %",
                  "Public Spending on Labor Markets, %GDP",
                  "Employment Protection Legislation, Instrumented",
                  "Wage Setting Coordination, Instrumented",
                  "Total Tax Revenue, %GDP",
                  "Change in Inflation with GDP Deflator, %"),
  Source = c("OECD 2022a","OECD 2022b","OECD 2022f","OECD 2022g","OECD 2022d",
             "OECD 2022e","OECD 2022c","OECD 2022h","OECD 2022i","World Bank 2022")
)
# create the object to output in plots 
VarTable_V <- kbl(VarTable, caption = "Table 1: Variables") %>% 
  kable_styling(bootstrap_options = c("striped", "hover","condensed"))
VarTable_V
```


### Overall Summary Statistics
```{r summary stats }
#stargazer wont make a table if there are NA or type char
# I don't like the way it outputs, even in html. Using Kable
#stargazer(X2, title = "Table 2: Summary Statistics", type = "html", omit = c("time1") )

Descriptive1 <- matrix( 
  c(
      c("hur"),nrow(X2), round(mean(X2$hur),3),round(sd(X2$hur),3),round(min(X2$hur),3),
      round(max(X2$hur),3),
      
      c("taxcorp"),nrow(X2),round(mean(X2$taxcorp),3),  round(sd(X2$taxcorp),3), 
      round(min(X2$taxcorp),3),round(max(X2$taxcorp),3),
      
      c("ud"),nrow(X2),round(mean(X2$ud),3),round(sd(X2$ud),3), round(min(X2$ud),3),
      round(max(X2$ud),3),
      
      c("pslm"),nrow(X2),round(mean(X2$pslm),3),round(sd(X2$pslm),3),round(min(X2$pslm),3),
      round(max(X2$pslm),3),
      
      c("epl"),nrow(X2),round(mean(X2$epl),3), round(sd(X2$epl),3),
      round(min(X2$epl),3),round(max(X2$epl),3),
      
      c("wsc"),nrow(X2),round(mean(X2$wsc),3), round(sd(X2$wsc),3),
      round(min(X2$wsc),3),round(max(X2$wsc),3),
      
      c("cbc"),nrow(X2),round(mean(X2$cbc),3),
      round(sd(X2$cbc),3),round(min(X2$cbc),3),round(max(X2$cbc),3),
      
      c("brr1y"),nrow(X2), round(mean(X2$brr1y),3), round(sd(X2$brr1y),3),
      round(min(X2$brr1y),3),round(max(X2$brr1y),3),
      
      c("ttr"),nrow(X2), round(mean(X2$ttr),3), round(sd(X2$ttr),3), round(min(X2$ttr),3),
      round(max(X2$ttr),3),
      
      c("inf_gdpd"),nrow(X2),round(mean(X2$inf_gdpd),3),round(sd(X2$inf_gdpd),3),
      round(min(X2$inf_gdpd),3),round(max(X2$inf_gdpd),3)
    ),
  ncol = 6, byrow = T)

colnames(Descriptive1) <- c("Statistic","N", "Mean", "SD","Min","Max")
#rownames(Descriptive1) <- c("hur","taxcorp","ud","pslm","epl","wsc","cbc","brr1y","ttr","inf_gdpd")

Descriptive1_V <- kbl(Descriptive1, caption = "Table 2: Variable Summary Statistics, All Observations") %>% 
  kable_styling(bootstrap_options = c("striped", "hover","condensed")) 
Descriptive1_V
```

###  Figures 3 and 4, HUR vs. Taxcorp, by year
```{r, HUR v taxcorp annual Figure 2}
HVTAM <- X2 %>% select(time1, hur, taxcorp) %>% 
  group_by(time1) %>% 
  drop_na() %>% 
  summarize(hur_mean=mean(hur), taxcorp_mean = mean(taxcorp))

colors <- c("hur" = "#EE119A", "taxcorp" = "#11EE65")

HVTAMV <- ggplot(HVTAM, aes(x=time1))+
  geom_line(aes(y=hur_mean, color = "hur"), show.legend = T)+
  geom_line(aes(y=taxcorp_mean, color = "taxcorp"), show.legend = T)+
  labs(x="Year", y="Mean, %", 
       title = "Figure 2: hur and taxcorp, means by year",
       color = "Legend", caption = "32 OECD Countries from 2001-2019 N = 357")+
  ylim(0,10)
HVTAMV # Fix Legend and colors

X3 <- X2
X3$time1 <- as.factor(X3$time1)
HVTAMV1 <- ggplot(data = X3, aes(x=time1))+
  geom_boxplot(aes(y=hur, color="hur"), show.legend = T)+
  geom_boxplot(aes(y=taxcorp, color="taxcorp"), show.legend = T)+
  theme(axis.text.x = element_text(angle = 90, size = 7))+
  labs(title = "Figure 3: Time Factor hur and taxcorp boxplots", x="Year", y="Percent" )
HVTAMV1
```

### Figures 5 and 6: HUR vs. taxcorp, by country
```{r}
HVTIM <- X2 %>% select(location1, hur, taxcorp, wsc, epl) %>% 
  group_by(location1) %>% 
  drop_na() %>% 
  summarise(hur_mean1=mean(hur), taxcorp_mean1= mean(taxcorp), wsc_mean=mean(wsc),
            epl_mean=mean(epl))

HVTIMV <- ggplot(HVTIM, aes(x=location1, group=1))+
  geom_line(aes(y=hur_mean1, color = "hur"), show.legend = T)+
  geom_line(aes(y=taxcorp_mean1, color= "taxcorp"), show.legend = T)+
  #geom_col(aes(y=wsc_mean, color = "wsc"), alpha = 0.2, position = position_dodge2(preserve = "single"), show.legend = T)+
  geom_col(aes(y=epl_mean, color = "epl"), color= "#2CEE11", alpha = 0.5)+
  labs(x="Location / Individual", y="Mean, %", 
       title = "Figure 4: hur and taxcorp, means by location",
       subtitle = "with mean of wsc instrument for location",
       color = "Legend",caption = "32 OECD Countries from 2001-2019 N = 357")+
  theme(axis.text.x = element_text(angle = 90, size = 7))
HVTIMV

X3$location1 <- as.factor(X3$location1)
HVTIMV1 <- ggplot(data = X3, aes(x=location1))+
  geom_boxplot(aes(y=hur, color="hur"), show.legend = T)+
  geom_boxplot(aes(y=taxcorp, color="taxcorp"), show.legend = T)+
  theme(axis.text.x = element_text(angle = 90, size = 7))+
  labs(title = "Figure 5: Location Factor hur and taxcorp", x="Location", y="Percent" )
HVTIMV1
```
### Figures 7 and 8
* Histograms showing distribution of instruments
* geom_bar counts the number of occurence of each instrumented value through the dataset.
```{r WSC bar chart}
epl_bar <- ggplot(data = X2)+
  geom_bar(aes(epl), width = 0.1, fill = "#112CEE", show.legend = F)+
  labs(x= "epl Instrument", y="Count",title = "Figure 6: Employment Protection Legislation, occurence of levels", caption = "N=357")
wsc_bar <- ggplot(data=X2)+
  geom_bar(aes(wsc), fill = "#112CEE", show.legend = F)+
  labs(x="wsc Instrument", y="Count", title = "Figure 7: Wage Setting Coordination, occurence of type", caption = "N=357")

epl_bar;wsc_bar

```


### Figures 8-14: Facet Wrapped Charts for paper
* check figure numbers
```{r Facet wrapped charts}
#Facet charts
#hur vs taxcorp by year
FW1 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=taxcorp,y=hur), color = "#EED311" ) + 
  geom_smooth(mapping=aes(x=taxcorp,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1) + labs(x = "taxcorp", y="hur", title = "Figure 8: hur vs taxcorp time factor")
FW11 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=taxcorp,y=hur), color = "#EED311" ) + 
  geom_smooth(mapping=aes(x=taxcorp,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1) + labs(x = "taxcorp", y="hur", title = "Figure 8: hur vs taxcorp location factor")

#hur vs ud
FW2 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=ud,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=ud,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1)+ labs(x = "ud", y="hur", title = "Figure 9: hur vs ud time factor")
FW21 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=ud,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=ud,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1)+ labs(x = "ud", y="hur", title = "Figure 9: hur vs ud location factor")

#hur vs cbc
FW3 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=cbc,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=cbc,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1)+ labs(x = "cbc", y="hur", title = "Figure 10: hur vs cbc time factor")
FW31 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=cbc,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=cbc,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1)+ labs(x = "cbc", y="hur", title = "Figure 10: hur vs cbc location factor")

#hur vs brr
FW4 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=brr1y,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=brr1y,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1)+ labs(x = "brr", y="hur", title = "Figure 11: hur vs brr1y time factor")
FW41 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=brr1y,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=brr1y,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1)+ labs(x = "brr", y="hur", title = "Figure 11: hur vs brr1y location factor")

#hur vs pslm
FW5 <- ggplot(data=X2)+
  geom_point(mapping= aes(x=pslm,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=pslm, y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1)+ labs(x="pslm", y="hur", title = "Figure 12: ur vs pslm time factor")
FW51 <- ggplot(data=X2)+
  geom_point(mapping= aes(x=pslm,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=pslm, y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1)+ labs(x="pslm", y="hur", title = "Figure 12: ur vs pslm location factor")


#hur vs ttr
FW7 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=ttr,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=ttr,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1)+ labs(x = "ttr", y="hur", title = "Figure 13: hur vs ttr time factor")
FW71 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=ttr,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=ttr,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1)+ labs(x = "ttr", y="hur", title = "Figure 13: hur vs ttr location factor")

#hur vs delta_i
FW8 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=inf_gdpd,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=inf_gdpd,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~time1)+ labs(x = "inf_gdpd", y="hur", title = "Figure 14: hur vs inf_gdpd time factor")
FW81 <- ggplot(data=X2)+
  geom_point(mapping=aes(x=inf_gdpd,y=hur), color = "#EED311")+
  geom_smooth(mapping=aes(x=inf_gdpd,y=hur), method = lm, se=F, color = "#112CEE")+
  facet_wrap(~location1)+ labs(x = "inf_gdpd", y="hur", title = "Figure 14: hur vs inf_gdpd location factor")

FW1; FW11;FW2;FW21;FW3;FW31;FW4;FW41;FW5;FW51;FW7;FW71;FW8;FW81
```
## Data Analysis, with experiments and tests

* details of use of the plm package are found here <https://cran.r-project.org/web/packages/plm/plm.pdf>
* "getting started in Fixed/Random effects models using R"
<https://rstudio-pubs-static.s3.amazonaws.com/372492_3e05f38dd3f248e89cdedd317d603b9a.html>

```{r Data Analysis with tests and Experiments}
#P_data created in step 1
# Experiments with lm and plm
#OLS per "getting started"
#require(car)
# ols <- lm(hur~taxcorp, data = P_data)
# yhat <- ols$fitted
# Plot1 <- ggplot(data=P_data, aes(x=taxcorp))+
#   geom_point(mapping=aes(x=taxcorp,y=hur), color = "#EED311" ) + 
#   geom_smooth(mapping=aes(x=taxcorp,y=hur), method = lm, se=F, color = "#112CEE")
# fixed.dum <- lm(hur~taxcorp + factor(location1)-1, data=P_data)
# yhat1 <- fixed.dum$fitted
# Plot2 <- sp(yhat1~P_data$taxcorp|P_data$location1, xlab="taxcorp", ylab=yhat,
#            boxplot=F, smooth=F)

# Plot3 <- scatterplot(data=P_data,hur~time1|location1)
#OLS with LM, no instruments from X2
Reg01 <- lm(hur~taxcorp + ud + cbc + brr1y + pslm +ttr + inf_gdpd, data=X2)
#OLS with plm, no instruments from P_data
Reg02 <- plm(hur~taxcorp + ud + cbc + brr1y + pslm + ttr + inf_gdpd, data=P_data)
#OLS with lm, instruments, from P_data - include as baseline
Reg03 <- lm(hur~taxcorp + ud + cbc + brr1y + pslm +wsc +epl +ttr + inf_gdpd, data=P_data)
#OLS with plm, instruments from P_data
Reg04 <- plm(hur~taxcorp + ud + cbc + brr1y + pslm +wsc +epl +ttr + inf_gdpd, data=P_data)
#Pooled, preferred model (clean)
P2SLS <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                |.-taxcorp + epl + wsc, 
                data = P_data, model = "pooling")
#Pooled Experiment, verified all the same as P2SLS
P2SLS_1 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                 |. -taxcorp + epl + wsc, 
                 data = X2, model = "pooling")
P2SLS_2 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                 | + ud + cbc + brr1y + ttr + inf_gdpd + pslm + epl + wsc, 
                 data = P_data, model = "pooling")
#Between estimator 
B2SLS <- update(P2SLS, model="between") #this is bad
#FE or within estimator
FE2SLS <- update(P2SLS, model="within")
#Random estimator
RE2SLS <- update(P2SLS, model="random")
#Check output - looks bad lol
# stargazer(Reg01,Reg02,Reg03,Reg04,P2SLS,B2SLS, type = "text")


#Time Fixed Effects and experiments
#Original Form - best fit model
FE2SLS_T <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd +pslm
                +factor(time1)
                |. -taxcorp + epl + wsc, 
                data = P_data, model = "within")
#added "time" effect to arguments
FE2SLS_T1 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd +pslm
                +factor(time1)
                |. -taxcorp + epl + wsc, 
                data = P_data, model = "within", effect = "time")
#Streamlined
TFE2SLS1 <- update(P2SLS, model="within", effect = "time")

#check output
#stargazer(FE2SLS_T,FE2SLS_T1,TFE2SLS1, type = "text")

#Individual FE and Experiments
#Original model - all are exact same
FE2SLS_L <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                +factor(location1)
                |. -taxcorp + epl + wsc, 
                data = P_data, model = "within")
# added "individual" effect to arguments - specifying effect prevents same results as_TL regs
FE2SLS_L1 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                +factor(location1)
                |. -taxcorp + epl + wsc, 
                data = P_data, model = "within", effect = "individual")
# streamlined, yes same as above
FE2SLS_L2 <- update(P2SLS, model="within", effect="individual")
#check output
#stargazer(FE2SLS_L, FE2SLS_L1, FE2SLS_L2, type = "text")

#Time and Individual FE
#Original model
FE2SLS_TL <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                 +factor(time1) +factor(location1)
                 |. -taxcorp + epl + wsc, 
                 data = P_data, model = "within", effect = "twoways")
# removed twoways effect from argument - best fit
FE2SLS_TL1 <- plm(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                 +factor(time1) +factor(location1)
                 |. -taxcorp + epl + wsc, 
                 data = P_data, model = "within")
#streamlined1, added back effect
FE2SLS_TL2 <- update(P2SLS, model="within", effect="twoways")
#streamlined2, removed effect from update 
FE2SLS_TL3 <- update(P2SLS, model="within")
#check output
#stargazer(FE2SLS_TL, FE2SLS_TL1, FE2SLS_TL2, FE2SLS_TL3, type = "text")

#LM for random effects vs OLS
PX1 <- plmtest(P2SLS)
##BP LM for random effects, null is no panel effect, include
PX11 <- plmtest(P2SLS, type=c("bp"))

# Testing time-fixed effects, null is no FTE
PX12 <- plmtest(FE2SLS, c("time"), type=c("bp"))

#LM for fixed effects vs OLS, generic 
PX2 <- pFtest(FE2SLS,P2SLS)
#LM for fixed effectsw vs OLS, Time specific
PX21 <- pFtest(TFE2SLS1, FE2SLS)
#hausman test for fixed vs random effects, null=random preferred
PX6 <- phtest(RE2SLS,FE2SLS)

#Cross sectional dependance, null is that residuals across entities are not
#correlated. Used to test whether residuals are correlated across entities. CS dep 
#can lead to bias in test results
#BPLM for cross sectional dependence in panels
PX7 <- pcdtest(FE2SLS, test=c("lm"))
# Pesaran cross sectional dependence
PX71 <- pcdtest(FE2SLS, test=c("cd"))

# Breusch Godfrey for SC in panels. Null is no SC. May not matter as a relativily 
# short study period T=19
PX8 <- pbgtest(FE2SLS)
#ADF for stochastic trends, Null- series has unit root.
require(tseries)
PX9 <- adf.test(P_data$hur, k=2)
#BP null is homoskedasticity, alternative: heteroskadasticity
PX0 <- bptest(hur ~ taxcorp + ud + cbc + brr1y + ttr + inf_gdpd + pslm
                +factor(location1), data=P_data, studentize = F)

PX11;PX2;PX6;PX7;PX71;PX8;PX9;PX0
```


### Final Output Table 3
*NOTE: the p values at the bottom of the table have to be fixed in the output document, with code (to copypasta): 
<span>&#42;</span> to replace asterisks that cause bolding
```{r Regression Output}
FOutput <- stargazer(Reg03, P2SLS,  FE2SLS_T1, FE2SLS_L1,  FE2SLS_TL, RE2SLS,
          type = "html", title = "Table 3: Regressions to Explain the Unemployment Rate", 
          model.names = F,
          keep = c("taxcorp", "ud","cbc","brr1y", "ttr","inf_gdpd", "hur", "pslm", "epl","wsc", "Constant"),
          column.labels = c("OLS", "2SLS Pooled", 
                            "2SLS Time Fixed Effects", "2SLS Individual Fixed Effects", "2SLS Time and Individual Fixed Effects", "2SLS Random Effects")
          )
# run in console, it's messy
```
<table style="text-align:center"><caption><strong>Table 3: Regressions to Explain the Unemployment Rate</strong></caption>
<tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="6"><em>Dependent variable:</em></td></tr>
<tr><td></td><td colspan="6" style="border-bottom: 1px solid black"></td></tr>
<tr><td style="text-align:left"></td><td colspan="6">hur</td></tr>
<tr><td style="text-align:left"></td><td>OLS</td><td>2SLS Pooled</td><td>2SLS Time Fixed Effects</td><td>2SLS Individual Fixed Effects</td><td>2SLS Time and Individual Fixed Effects</td><td>2SLS Random Effects</td></tr>
<tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td><td>(4)</td><td>(5)</td><td>(6)</td></tr>
<tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">taxcorp</td><td>-0.501<sup>***</sup></td><td>-12.904</td><td>-3.267</td><td>-3.752<sup>*</sup></td><td>-1.992</td><td>-3.208</td></tr>
<tr><td style="text-align:left"></td><td>(0.134)</td><td>(14.516)</td><td>(4.088)</td><td>(2.026)</td><td>(1.930)</td><td>(2.238)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">ud</td><td>-0.004</td><td>0.263</td><td>0.033</td><td>0.011</td><td>0.119</td><td>-0.001</td></tr>
<tr><td style="text-align:left"></td><td>(0.014)</td><td>(0.351)</td><td>(0.102)</td><td>(0.068)</td><td>(0.129)</td><td>(0.052)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">cbc</td><td>0.042<sup>***</sup></td><td>0.158</td><td>0.054</td><td>0.057</td><td>0.075<sup>**</sup></td><td>-0.002</td></tr>
<tr><td style="text-align:left"></td><td>(0.011)</td><td>(0.160)</td><td>(0.044)</td><td>(0.049)</td><td>(0.037)</td><td>(0.038)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">brr1y</td><td>-0.006</td><td>0.246</td><td>0.038</td><td>0.039<sup>**</sup></td><td>0.036<sup>***</sup></td><td>0.039<sup>***</sup></td></tr>
<tr><td style="text-align:left"></td><td>(0.010)</td><td>(0.321)</td><td>(0.091)</td><td>(0.016)</td><td>(0.013)</td><td>(0.014)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">pslm</td><td>1.727<sup>***</sup></td><td>-9.934</td><td>-0.885</td><td>1.457<sup>*</sup></td><td>2.180<sup>***</sup></td><td>1.637<sup>**</sup></td></tr>
<tr><td style="text-align:left"></td><td>(0.320)</td><td>(13.666)</td><td>(3.725)</td><td>(0.779)</td><td>(0.558)</td><td>(0.825)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">wsc</td><td>-1.306<sup>***</sup></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left"></td><td>(0.174)</td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">epl</td><td>0.543<sup>***</sup></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left"></td><td>(0.202)</td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">ttr</td><td>-0.137<sup>***</sup></td><td>-0.700</td><td>-0.260</td><td>0.518</td><td>0.220</td><td>0.322</td></tr>
<tr><td style="text-align:left"></td><td>(0.048)</td><td>(0.704)</td><td>(0.229)</td><td>(0.343)</td><td>(0.356)</td><td>(0.331)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">inf_gdpd</td><td>-0.207<sup>**</sup></td><td>2.078</td><td>0.526</td><td>0.156</td><td>0.019</td><td>0.123</td></tr>
<tr><td style="text-align:left"></td><td>(0.102)</td><td>(2.461)</td><td>(0.649)</td><td>(0.279)</td><td>(0.198)</td><td>(0.304)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">Constant</td><td>11.818<sup>***</sup></td><td>53.930</td><td></td><td></td><td></td><td>2.135</td></tr>
<tr><td style="text-align:left"></td><td>(1.309)</td><td>(50.013)</td><td></td><td></td><td></td><td>(4.595)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Observations</td><td>357</td><td>357</td><td>357</td><td>357</td><td>357</td><td>357</td></tr>
<tr><td style="text-align:left">R<sup>2</sup></td><td>0.367</td><td>0.049</td><td>0.103</td><td>0.189</td><td>0.264</td><td>0.202</td></tr>
<tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.350</td><td>0.030</td><td>0.035</td><td>0.092</td><td>0.127</td><td>0.186</td></tr>
<tr><td style="text-align:left">Residual Std. Error</td><td>2.881 (df = 347)</td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">F Statistic</td><td>22.338<sup>***</sup> (df = 9; 347)</td><td>5.469</td><td>43.285<sup>***</sup></td><td>96.725<sup>***</sup></td><td>144.909<sup>***</sup></td><td>113.648<sup>***</sup></td></tr>
<tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="6" style="text-align:right"><sup><span>&#42;</span></sup>p<0.1; <sup><span>&#42;</span><span>&#42;</span></sup>p<0.05; <sup><span>&#42;</span><span>&#42;</span><span>&#42;</span></sup>p<0.01</td></tr>
</table>