Question 1
I donโt think we need to include the main effect term for Import Shock because if the goal of the regression is to see if RD tax credit can absorb import shocks, we have already controlled for import shocks in the interaction terms. Including the main effect term will only change the constant terms or the interpretation of the other coefficients.
Question 2
library(haven)
library(tidyverse)
library(stringr)
library(formattable)
library(plm)
compustat <- read_dta('compustat.dta')
patent <- read_dta('pdpcohdr.dta')
inventor <- read_dta('inventors.dta')
tax_credit <- read_dta('rd_tax_credit_state.dta')
patassg <- read_dta('patassg.dta')
dynass <- read_dta('dynass.dta')
2.1
gvkey <- compustat$gvkey
gvkey <- str_remove(gvkey, "^0+") #remove the leading 0s to merge
compustat$gvkey <- gvkey
compustat_patent <- merge(compustat, patent, by = 'gvkey')
compustat_patent$gvkey <- as.numeric(compustat_patent$gvkey)
compustat_patent <- compustat_patent %>% select(-2,-12:-14,-18:-20,-23:-25,-33) #drop some cols
formattable(head(compustat_patent,10))
|
gvkey
|
fyear
|
indfmt
|
conm
|
fyr
|
at
|
capx
|
ebitda
|
ppent
|
sale
|
sic
|
state
|
sic4
|
year
|
tag
|
cusip
|
firstyr
|
lastyr
|
pdpco
|
pdpseq
|
begyr
|
endyr
|
|
10000
|
2011
|
INDL
|
STANDARD MOTOR PRODS
|
12
|
550.722
|
11.037
|
75.847
|
64.039
|
874.625
|
3690
|
NY
|
3690
|
2011
|
0
|
853666
|
1960
|
2006
|
10000
|
1
|
1960
|
2006
|
|
10000
|
2014
|
INDL
|
STANDARD MOTOR PRODS
|
12
|
673.551
|
13.904
|
113.400
|
64.611
|
980.392
|
3690
|
NY
|
3690
|
2014
|
0
|
853666
|
1960
|
2006
|
10000
|
1
|
1960
|
2006
|
|
10000
|
2018
|
INDL
|
STANDARD MOTOR PRODS
|
12
|
843.132
|
20.141
|
105.555
|
90.754
|
1092.051
|
3690
|
NY
|
3690
|
2018
|
0
|
853666
|
1960
|
2006
|
10000
|
1
|
1960
|
2006
|
|
10000
|
2010
|
INDL
|
STANDARD MOTOR PRODS
|
12
|
492.801
|
10.806
|
62.873
|
60.666
|
810.910
|
3690
|
NY
|
3690
|
2010
|
0
|
853666
|
1960
|
2006
|
10000
|
1
|
1960
|
2006
|
|
10000
|
2009
|
INDL
|
STANDARD MOTOR PRODS
|
12
|
484.459
|
7.174
|
46.082
|
61.478
|
735.424
|
3690
|
NY
|
3690
|
2009
|
0
|
853666
|
1960
|
2006
|
10000
|
1
|
1960
|
2006
|
|
10000
|
1997
|
INDL
|
STANDARD MOTOR PRODS
|
12
|
577.137
|
15.597
|
28.435
|
126.024
|
559.823
|
3690
|
NY
|
3690
|
1997
|
0
|
853666
|
1960
|
2006
|
10000
|
1
|
1960
|
2006
|
|
10000
|
2004
|
INDL
|
STANDARD MOTOR PRODS
|
12
|
656.569
|
9.774
|
35.154
|
97.425
|
824.283
|
3690
|
NY
|
3690
|
2004
|
0
|
853666
|
1960
|
2006
|
10000
|
1
|
1960
|
2006
|
|
10000
|
2008
|
INDL
|
STANDARD MOTOR PRODS
|
12
|
575.027
|
10.500
|
32.657
|
66.901
|
775.241
|
3690
|
NY
|
3690
|
2008
|
0
|
853666
|
1960
|
2006
|
10000
|
1
|
1960
|
2006
|
|
10000
|
1991
|
INDL
|
STANDARD MOTOR PRODS
|
12
|
392.755
|
12.049
|
33.407
|
96.203
|
534.808
|
3690
|
NY
|
3690
|
1991
|
0
|
853666
|
1960
|
2006
|
10000
|
1
|
1960
|
2006
|
|
10000
|
1999
|
INDL
|
STANDARD MOTOR PRODS
|
12
|
556.021
|
14.423
|
46.774
|
106.578
|
658.241
|
3690
|
NY
|
3690
|
1999
|
0
|
853666
|
1960
|
2006
|
10000
|
1
|
1960
|
2006
|
2.2
#merge inventors to patents to firms
inventor <- inventor %>% rename(patnum = patent) %>% filter(country == 'US')
patent2 <- inner_join(inventor, patassg, by = 'patnum')
patent3 <- inner_join(patent2, dynass, by = 'pdpass')
patent3 <- patent3 %>% select(-6,-8,-10,-19:-34) #drop unnecessary columns
patent3 <- patent3 %>% rename(gvkey = pdpco1)
compustat_pat_inv <- inner_join(compustat_patent, patent3, by = 'gvkey')
compustat_pat_inv <- compustat_pat_inv %>% rename(state = state.y)
compustat_pat_inv <- compustat_pat_inv %>% select(-3,-5:-6,-15:-18,-21:-22,-28:-36)
formattable(head(compustat_pat_inv, 10))
|
gvkey
|
fyear
|
conm
|
capx
|
ebitda
|
ppent
|
sale
|
sic
|
state.x
|
sic4
|
year
|
pdpco
|
pdpseq
|
patnum
|
firstname
|
lastname
|
state
|
country
|
|
10004
|
1999
|
STANDARD PRODUCTS CO
|
67.214
|
122.344
|
359.624
|
1083.785
|
3714
|
MI
|
3714
|
1999
|
10004
|
1
|
3934385
|
AARON J
|
UNGERER
|
NY
|
US
|
|
10004
|
1999
|
STANDARD PRODUCTS CO
|
67.214
|
122.344
|
359.624
|
1083.785
|
3714
|
MI
|
3714
|
1999
|
10004
|
1
|
3934385
|
PETER V
|
PAULUS
|
MI
|
US
|
|
10004
|
1999
|
STANDARD PRODUCTS CO
|
67.214
|
122.344
|
359.624
|
1083.785
|
3714
|
MI
|
3714
|
1999
|
10004
|
1
|
3944693
|
AARON J
|
UNGERER
|
NY
|
US
|
|
10004
|
1999
|
STANDARD PRODUCTS CO
|
67.214
|
122.344
|
359.624
|
1083.785
|
3714
|
MI
|
3714
|
1999
|
10004
|
1
|
3959538
|
THEODORE
|
LOEW
|
NY
|
US
|
|
10004
|
1999
|
STANDARD PRODUCTS CO
|
67.214
|
122.344
|
359.624
|
1083.785
|
3714
|
MI
|
3714
|
1999
|
10004
|
1
|
3959537
|
THEODORE
|
LOEW
|
NY
|
US
|
|
10004
|
1999
|
STANDARD PRODUCTS CO
|
67.214
|
122.344
|
359.624
|
1083.785
|
3714
|
MI
|
3714
|
1999
|
10004
|
1
|
3958383
|
JOHN A
|
DALLEN
|
OH
|
US
|
|
10004
|
1999
|
STANDARD PRODUCTS CO
|
67.214
|
122.344
|
359.624
|
1083.785
|
3714
|
MI
|
3714
|
1999
|
10004
|
1
|
3959941
|
LEONARD W
|
SMITH
|
OH
|
US
|
|
10004
|
1999
|
STANDARD PRODUCTS CO
|
67.214
|
122.344
|
359.624
|
1083.785
|
3714
|
MI
|
3714
|
1999
|
10004
|
1
|
3970498
|
THEODORE
|
LOEW
|
NY
|
US
|
|
10004
|
1999
|
STANDARD PRODUCTS CO
|
67.214
|
122.344
|
359.624
|
1083.785
|
3714
|
MI
|
3714
|
1999
|
10004
|
1
|
4044177
|
PETER V
|
PAULUS
|
MI
|
US
|
|
10004
|
1999
|
STANDARD PRODUCTS CO
|
67.214
|
122.344
|
359.624
|
1083.785
|
3714
|
MI
|
3714
|
1999
|
10004
|
1
|
4044177
|
THEODORE
|
LOEW
|
NY
|
US
|
2.3
#count the number of inventors for at firm, state, year level
inventor_count <- compustat_pat_inv %>%
group_by(conm, year, state, sic) %>%
summarise(count_inv = sum(pdpseq)) #use pdpseq to count because this is a column of all 1s
#count the number of state-total inventors at firm, year level
inventor_count_sum <- inventor_count %>% group_by(conm, year) %>% summarise(sum_count = sum(count_inv))
inventor_share <- inner_join(inventor_count, inventor_count_sum, by = c('conm','year'))
#caculate the share of inventors across states for each firm. Here, I cannot calculate the 10-year rolling window though
inventor_share <- inventor_share %>% mutate(share = round(count_inv/sum_count,2))
#merge back to the dataset
inventor_share <- inner_join(inventor_share, compustat_patent, by = c('year','conm'))
inventor_share <- inventor_share %>% select(1:8,13,15) %>% rename(state = state.x, sic = sic.x) %>% ungroup()
formattable(head(inventor_share, 10))
|
conm
|
year
|
state
|
sic
|
count_inv
|
sum_count
|
share
|
gvkey
|
capx
|
ppent
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2003
|
AR
|
6200
|
1
|
13
|
0.08
|
65525
|
0.000
|
0.009
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2003
|
MA
|
6200
|
3
|
13
|
0.23
|
65525
|
0.000
|
0.009
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2003
|
NV
|
6200
|
1
|
13
|
0.08
|
65525
|
0.000
|
0.009
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2003
|
NY
|
6200
|
8
|
13
|
0.62
|
65525
|
0.000
|
0.009
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2007
|
AR
|
6200
|
1
|
13
|
0.08
|
65525
|
0.156
|
0.143
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2007
|
MA
|
6200
|
3
|
13
|
0.23
|
65525
|
0.156
|
0.143
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2007
|
NV
|
6200
|
1
|
13
|
0.08
|
65525
|
0.156
|
0.143
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2007
|
NY
|
6200
|
8
|
13
|
0.62
|
65525
|
0.156
|
0.143
|
|
24/7 REAL MEDIA INC
|
1996
|
VA
|
7370
|
2
|
2
|
1.00
|
113498
|
1.537
|
1.679
|
|
24/7 REAL MEDIA INC
|
1997
|
VA
|
7370
|
2
|
2
|
1.00
|
113498
|
0.042
|
0.591
|
2.4
#change state full name to abbreviations to merge
state_name <- tax_credit$statename
state <- state.abb[match(state_name,state.name)]
tax_credit$state <- state
compustat_pat_tax <- inner_join(inventor_share, tax_credit, by = c('state','year'))
formattable(head(compustat_pat_tax, 10))
|
conm
|
year
|
state
|
sic
|
count_inv
|
sum_count
|
share
|
gvkey
|
capx
|
ppent
|
statename
|
rd_taxcredit
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2003
|
AR
|
6200
|
1
|
13
|
0.08
|
65525
|
0.000
|
0.009
|
Arkansas
|
1.199619
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2003
|
MA
|
6200
|
3
|
13
|
0.23
|
65525
|
0.000
|
0.009
|
Massachusetts
|
1.136985
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2003
|
NV
|
6200
|
1
|
13
|
0.08
|
65525
|
0.000
|
0.009
|
Nevada
|
1.155769
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2003
|
NY
|
6200
|
8
|
13
|
0.62
|
65525
|
0.000
|
0.009
|
New York
|
1.206913
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2007
|
AR
|
6200
|
1
|
13
|
0.08
|
65525
|
0.156
|
0.143
|
Arkansas
|
1.199619
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2007
|
MA
|
6200
|
3
|
13
|
0.23
|
65525
|
0.156
|
0.143
|
Massachusetts
|
1.136985
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2007
|
NV
|
6200
|
1
|
13
|
0.08
|
65525
|
0.156
|
0.143
|
Nevada
|
1.155769
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2007
|
NY
|
6200
|
8
|
13
|
0.62
|
65525
|
0.156
|
0.143
|
New York
|
1.203977
|
|
24/7 REAL MEDIA INC
|
1996
|
VA
|
7370
|
2
|
2
|
1.00
|
113498
|
1.537
|
1.679
|
Virginia
|
1.196031
|
|
24/7 REAL MEDIA INC
|
1997
|
VA
|
7370
|
2
|
2
|
1.00
|
113498
|
0.042
|
0.591
|
Virginia
|
1.196031
|
2.5
compustat_weighted_tax <- compustat_pat_tax %>% group_by(conm, year) %>%
summarize(weighted_tax = sum(share*rd_taxcredit)) %>% ungroup() #calculate the weighted tax rate
#merge back to the dataset
compustat_weighted_tax <- inner_join(compustat_weighted_tax, compustat_pat_tax, by = c('year','conm'))
compustat_weighted_tax <- compustat_weighted_tax %>% select(1:5,10,11)
formattable(head(compustat_weighted_tax,10))
|
conm
|
year
|
weighted_tax
|
state
|
sic
|
capx
|
ppent
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2003
|
1.198223
|
AR
|
6200
|
0.000
|
0.009
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2003
|
1.198223
|
MA
|
6200
|
0.000
|
0.009
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2003
|
1.198223
|
NV
|
6200
|
0.000
|
0.009
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2003
|
1.198223
|
NY
|
6200
|
0.000
|
0.009
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2007
|
1.196403
|
AR
|
6200
|
0.156
|
0.143
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2007
|
1.196403
|
MA
|
6200
|
0.156
|
0.143
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2007
|
1.196403
|
NV
|
6200
|
0.156
|
0.143
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2007
|
1.196403
|
NY
|
6200
|
0.156
|
0.143
|
|
24/7 REAL MEDIA INC
|
1996
|
1.196031
|
VA
|
7370
|
1.537
|
1.679
|
|
24/7 REAL MEDIA INC
|
1997
|
1.196031
|
VA
|
7370
|
0.042
|
0.591
|
Question 3
3.1
cw <- read_dta('cw_hs6_sic87dd.dta')
trade <- read_dta('import_china_HS_classification.dta')
cw <- cw %>% rename(productcode = commoditycode)
merge_trade_cw <- merge(cw, trade, by = 'productcode')
formattable(head(merge_trade_cw,10))
|
productcode
|
sic87dd
|
share
|
weights_method
|
tradevaluein1000usd
|
quantity
|
|
100190
|
111
|
1
|
0
|
23.928
|
4400
|
|
100190
|
111
|
1
|
0
|
38.820
|
40735
|
|
100190
|
111
|
1
|
0
|
12.348
|
2520
|
|
100190
|
111
|
1
|
0
|
323.681
|
362698
|
|
100190
|
111
|
1
|
0
|
1.705
|
1812
|
|
100190
|
111
|
1
|
0
|
3.059
|
1600
|
|
100190
|
111
|
1
|
0
|
5.720
|
1749
|
|
100190
|
111
|
1
|
0
|
69.281
|
199426
|
|
100190
|
111
|
1
|
0
|
341.591
|
445091
|
|
100300
|
119
|
1
|
0
|
29.897
|
18238
|
3.2
#remove the last 0 digit if there are 4 digits
vec <- compustat_weighted_tax$sic #store the column values in a vector
new_sic <- str_remove(vec, "0$") #remove last 0 digit
#longer way: for loop to remove the last 0 digit
#new_sic <- c()
#counter = 1
#for (i in vec){
#if (str_sub(i, str_length(i), str_length(i)) == '0'){
#new_sic[counter] <- str_sub(i, 1, str_length(i)-1)
#counter = counter + 1
#} else {
#new_sic[counter] <- i
#counter = counter + 1
#}
#}
#create a new column after having removed the 0 digit
compustat_weighted_tax$sic87 <- as.numeric(new_sic)
write_dta(compustat_weighted_tax, 'merged_data_1.dta') #save the data file to use the dofile in Stata
formattable(head(compustat_weighted_tax,10))
|
conm
|
year
|
weighted_tax
|
state
|
sic
|
capx
|
ppent
|
sic87
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2003
|
1.198223
|
AR
|
6200
|
0.000
|
0.009
|
620
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2003
|
1.198223
|
MA
|
6200
|
0.000
|
0.009
|
620
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2003
|
1.198223
|
NV
|
6200
|
0.000
|
0.009
|
620
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2003
|
1.198223
|
NY
|
6200
|
0.000
|
0.009
|
620
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2007
|
1.196403
|
AR
|
6200
|
0.156
|
0.143
|
620
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2007
|
1.196403
|
MA
|
6200
|
0.156
|
0.143
|
620
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2007
|
1.196403
|
NV
|
6200
|
0.156
|
0.143
|
620
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2007
|
1.196403
|
NY
|
6200
|
0.156
|
0.143
|
620
|
|
24/7 REAL MEDIA INC
|
1996
|
1.196031
|
VA
|
7370
|
1.537
|
1.679
|
737
|
|
24/7 REAL MEDIA INC
|
1997
|
1.196031
|
VA
|
7370
|
0.042
|
0.591
|
737
|
#Data after converted by Do-file
data <- read_dta('compustat_sicmod.dta')
#final_data <- inner_join(data, merge_trade_cw, by = 'sic87dd') : takes too long to run
#Here, I use Stata to merge the file instead because it takes a long time to merge in R
import <- read_dta('import_data.dta') #file after m:m merged in Stata
formattable(head(import,10))
|
conm
|
year
|
weighted_tax
|
state
|
sic
|
capx
|
ppent
|
sic87
|
sic87dd
|
productcode
|
share
|
weights_method
|
tradevaluein1000usd
|
quantity
|
|
HILLSHIRE BRANDS CO
|
2008
|
1.192283
|
MS
|
2013
|
454.000
|
2519.000
|
2013
|
2011
|
410130
|
1.00000000
|
0
|
44.607
|
3432
|
|
HILLSHIRE BRANDS CO
|
1996
|
1.203889
|
AL
|
2013
|
542.000
|
3007.000
|
2013
|
2011
|
410121
|
1.00000000
|
0
|
0.480
|
195
|
|
HILLSHIRE BRANDS CO
|
2009
|
1.192191
|
VA
|
2013
|
357.000
|
2356.000
|
2013
|
2011
|
410121
|
1.00000000
|
0
|
4.598
|
2026
|
|
BRIDGFORD FOODS CORP
|
1998
|
1.132321
|
CA
|
2013
|
2.285
|
16.197
|
2013
|
2011
|
410129
|
1.00000000
|
0
|
159.901
|
31605
|
|
TYSON FOODS INC -CL A
|
2002
|
1.189733
|
FL
|
2011
|
506.000
|
4038.000
|
2011
|
2011
|
160100
|
1.00000000
|
0
|
8.626
|
990
|
|
HILLSHIRE BRANDS CO
|
2006
|
1.193954
|
WI
|
2013
|
625.000
|
2942.000
|
2013
|
2011
|
410110
|
1.00000000
|
0
|
214.301
|
73027
|
|
HILLSHIRE BRANDS CO
|
2002
|
1.185731
|
SC
|
2013
|
669.000
|
3155.000
|
2013
|
2011
|
391710
|
0.95735645
|
1
|
208.218
|
15014
|
|
HORMEL FOODS CORP
|
1990
|
1.202108
|
CT
|
2011
|
34.988
|
235.026
|
2011
|
2011
|
410121
|
1.00000000
|
0
|
12.339
|
2875
|
|
HILLSHIRE BRANDS CO
|
2011
|
1.190193
|
GA
|
2013
|
337.000
|
1648.000
|
2013
|
2011
|
410129
|
1.00000000
|
0
|
93.065
|
27906
|
|
HORMEL FOODS CORP
|
2008
|
1.212460
|
IA
|
2011
|
125.890
|
977.657
|
2011
|
2011
|
510119
|
0.09350958
|
1
|
4.061
|
133
|
3.3
import_state_year <- import %>%
group_by(state, year) %>% #sum by state and year
summarise(sum_trade = sum(tradevaluein1000usd)) %>% drop_na()
final_data <- inner_join(compustat_weighted_tax, import_state_year, by = c('year','state'))
formattable(head(final_data,10))
|
conm
|
year
|
weighted_tax
|
state
|
sic
|
capx
|
ppent
|
sic87
|
sum_trade
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2003
|
1.198223
|
AR
|
6200
|
0.000
|
0.009
|
620
|
25890618
|
|
1ST PRESTIGE WEALTH MGMT INC
|
2007
|
1.196403
|
AR
|
6200
|
0.156
|
0.143
|
620
|
25285322
|
|
3-DIMENSIONAL PHARMACEUTICAL
|
2001
|
1.151313
|
DE
|
2834
|
9.090
|
11.735
|
2834
|
35417462
|
|
3M CO
|
1990
|
1.162646
|
AL
|
2670
|
1337.000
|
4389.000
|
267
|
57134235
|
|
3M CO
|
1990
|
1.162646
|
AR
|
2670
|
1337.000
|
4389.000
|
267
|
68336776
|
|
3M CO
|
1990
|
1.162646
|
DE
|
2670
|
1337.000
|
4389.000
|
267
|
26059905
|
|
3M CO
|
1990
|
1.162646
|
IA
|
2670
|
1337.000
|
4389.000
|
267
|
15579231
|
|
3M CO
|
1990
|
1.162646
|
LA
|
2670
|
1337.000
|
4389.000
|
267
|
23275225
|
|
3M CO
|
1990
|
1.162646
|
MS
|
2670
|
1337.000
|
4389.000
|
267
|
5561636
|
|
3M CO
|
1990
|
1.162646
|
MT
|
2670
|
1337.000
|
4389.000
|
267
|
11325592
|
Question 4
I cannot do this part, both because some of the above parts may be incorrect and because I am very inexperienced with panel data regression. I have tried the below code but it gives error output for the regression.
#demean the variable
#sample_mean_data <- final_data %>% group_by(conm) %>% summarize(sample_mean = mean(sum_trade))
#data_before_demean <- inner_join(final_data, sample_mean_data, by = 'conm')
#demeaned_data <- data_before_demean %>% mutate(demeaned_trade = sum_trade - sample_mean)
#create the dependent variable
#final_data_reg <- demeaned_data %>% mutate(investment = capx/ppent)
#model <- plm(investment ~ weighted_tax*demeaned_trade + weighted_tax + as.factor(year), data = final_data_reg, index=c('year', 'state'), model="with", effect="twoways")