Question 1

I donโ€™t think we need to include the main effect term for Import Shock because if the goal of the regression is to see if RD tax credit can absorb import shocks, we have already controlled for import shocks in the interaction terms. Including the main effect term will only change the constant terms or the interpretation of the other coefficients.

Question 2

library(haven)
library(tidyverse)
library(stringr)
library(formattable)
library(plm)

compustat <- read_dta('compustat.dta')
patent <- read_dta('pdpcohdr.dta')
inventor <- read_dta('inventors.dta')
tax_credit <- read_dta('rd_tax_credit_state.dta')
patassg <- read_dta('patassg.dta')
dynass <- read_dta('dynass.dta')

2.1

gvkey <- compustat$gvkey
gvkey <- str_remove(gvkey, "^0+") #remove the leading 0s to merge
compustat$gvkey <- gvkey
compustat_patent <- merge(compustat, patent, by = 'gvkey')
compustat_patent$gvkey <- as.numeric(compustat_patent$gvkey)
compustat_patent <- compustat_patent %>% select(-2,-12:-14,-18:-20,-23:-25,-33) #drop some cols
formattable(head(compustat_patent,10))
gvkey fyear indfmt conm fyr at capx ebitda ppent sale sic state sic4 year tag cusip firstyr lastyr pdpco pdpseq begyr endyr
10000 2011 INDL STANDARD MOTOR PRODS 12 550.722 11.037 75.847 64.039 874.625 3690 NY 3690 2011 0 853666 1960 2006 10000 1 1960 2006
10000 2014 INDL STANDARD MOTOR PRODS 12 673.551 13.904 113.400 64.611 980.392 3690 NY 3690 2014 0 853666 1960 2006 10000 1 1960 2006
10000 2018 INDL STANDARD MOTOR PRODS 12 843.132 20.141 105.555 90.754 1092.051 3690 NY 3690 2018 0 853666 1960 2006 10000 1 1960 2006
10000 2010 INDL STANDARD MOTOR PRODS 12 492.801 10.806 62.873 60.666 810.910 3690 NY 3690 2010 0 853666 1960 2006 10000 1 1960 2006
10000 2009 INDL STANDARD MOTOR PRODS 12 484.459 7.174 46.082 61.478 735.424 3690 NY 3690 2009 0 853666 1960 2006 10000 1 1960 2006
10000 1997 INDL STANDARD MOTOR PRODS 12 577.137 15.597 28.435 126.024 559.823 3690 NY 3690 1997 0 853666 1960 2006 10000 1 1960 2006
10000 2004 INDL STANDARD MOTOR PRODS 12 656.569 9.774 35.154 97.425 824.283 3690 NY 3690 2004 0 853666 1960 2006 10000 1 1960 2006
10000 2008 INDL STANDARD MOTOR PRODS 12 575.027 10.500 32.657 66.901 775.241 3690 NY 3690 2008 0 853666 1960 2006 10000 1 1960 2006
10000 1991 INDL STANDARD MOTOR PRODS 12 392.755 12.049 33.407 96.203 534.808 3690 NY 3690 1991 0 853666 1960 2006 10000 1 1960 2006
10000 1999 INDL STANDARD MOTOR PRODS 12 556.021 14.423 46.774 106.578 658.241 3690 NY 3690 1999 0 853666 1960 2006 10000 1 1960 2006

2.2

#merge inventors to patents to firms
inventor <- inventor %>% rename(patnum = patent) %>% filter(country == 'US')
patent2 <- inner_join(inventor, patassg, by = 'patnum')
patent3 <- inner_join(patent2, dynass, by = 'pdpass')
patent3 <- patent3 %>% select(-6,-8,-10,-19:-34) #drop unnecessary columns
patent3 <- patent3 %>% rename(gvkey = pdpco1)
compustat_pat_inv <- inner_join(compustat_patent, patent3, by = 'gvkey')
compustat_pat_inv <- compustat_pat_inv %>% rename(state = state.y)
compustat_pat_inv <- compustat_pat_inv %>% select(-3,-5:-6,-15:-18,-21:-22,-28:-36)
formattable(head(compustat_pat_inv, 10))
gvkey fyear conm capx ebitda ppent sale sic state.x sic4 year pdpco pdpseq patnum firstname lastname state country
10004 1999 STANDARD PRODUCTS CO 67.214 122.344 359.624 1083.785 3714 MI 3714 1999 10004 1 3934385 AARON J UNGERER NY US
10004 1999 STANDARD PRODUCTS CO 67.214 122.344 359.624 1083.785 3714 MI 3714 1999 10004 1 3934385 PETER V PAULUS MI US
10004 1999 STANDARD PRODUCTS CO 67.214 122.344 359.624 1083.785 3714 MI 3714 1999 10004 1 3944693 AARON J UNGERER NY US
10004 1999 STANDARD PRODUCTS CO 67.214 122.344 359.624 1083.785 3714 MI 3714 1999 10004 1 3959538 THEODORE LOEW NY US
10004 1999 STANDARD PRODUCTS CO 67.214 122.344 359.624 1083.785 3714 MI 3714 1999 10004 1 3959537 THEODORE LOEW NY US
10004 1999 STANDARD PRODUCTS CO 67.214 122.344 359.624 1083.785 3714 MI 3714 1999 10004 1 3958383 JOHN A DALLEN OH US
10004 1999 STANDARD PRODUCTS CO 67.214 122.344 359.624 1083.785 3714 MI 3714 1999 10004 1 3959941 LEONARD W SMITH OH US
10004 1999 STANDARD PRODUCTS CO 67.214 122.344 359.624 1083.785 3714 MI 3714 1999 10004 1 3970498 THEODORE LOEW NY US
10004 1999 STANDARD PRODUCTS CO 67.214 122.344 359.624 1083.785 3714 MI 3714 1999 10004 1 4044177 PETER V PAULUS MI US
10004 1999 STANDARD PRODUCTS CO 67.214 122.344 359.624 1083.785 3714 MI 3714 1999 10004 1 4044177 THEODORE LOEW NY US

2.3

#count the number of inventors for at firm, state, year level
inventor_count <- compustat_pat_inv %>% 
  group_by(conm, year, state, sic) %>%                        
  summarise(count_inv = sum(pdpseq)) #use pdpseq to count because this is a column of all 1s

#count the number of state-total inventors at firm, year level
inventor_count_sum <- inventor_count %>% group_by(conm, year) %>% summarise(sum_count = sum(count_inv))
inventor_share <- inner_join(inventor_count, inventor_count_sum, by = c('conm','year'))

#caculate the share of inventors across states for each firm. Here, I cannot calculate the 10-year rolling window though
inventor_share <- inventor_share %>% mutate(share = round(count_inv/sum_count,2))

#merge back to the dataset
inventor_share <- inner_join(inventor_share, compustat_patent, by = c('year','conm'))
inventor_share <- inventor_share %>% select(1:8,13,15) %>% rename(state = state.x, sic = sic.x) %>% ungroup()
formattable(head(inventor_share, 10))
conm year state sic count_inv sum_count share gvkey capx ppent
1ST PRESTIGE WEALTH MGMT INC 2003 AR 6200 1 13 0.08 65525 0.000 0.009
1ST PRESTIGE WEALTH MGMT INC 2003 MA 6200 3 13 0.23 65525 0.000 0.009
1ST PRESTIGE WEALTH MGMT INC 2003 NV 6200 1 13 0.08 65525 0.000 0.009
1ST PRESTIGE WEALTH MGMT INC 2003 NY 6200 8 13 0.62 65525 0.000 0.009
1ST PRESTIGE WEALTH MGMT INC 2007 AR 6200 1 13 0.08 65525 0.156 0.143
1ST PRESTIGE WEALTH MGMT INC 2007 MA 6200 3 13 0.23 65525 0.156 0.143
1ST PRESTIGE WEALTH MGMT INC 2007 NV 6200 1 13 0.08 65525 0.156 0.143
1ST PRESTIGE WEALTH MGMT INC 2007 NY 6200 8 13 0.62 65525 0.156 0.143
24/7 REAL MEDIA INC 1996 VA 7370 2 2 1.00 113498 1.537 1.679
24/7 REAL MEDIA INC 1997 VA 7370 2 2 1.00 113498 0.042 0.591

2.4

#change state full name to abbreviations to merge
state_name <- tax_credit$statename
state <- state.abb[match(state_name,state.name)]
tax_credit$state <- state
compustat_pat_tax <- inner_join(inventor_share, tax_credit, by = c('state','year'))
formattable(head(compustat_pat_tax, 10))
conm year state sic count_inv sum_count share gvkey capx ppent statename rd_taxcredit
1ST PRESTIGE WEALTH MGMT INC 2003 AR 6200 1 13 0.08 65525 0.000 0.009 Arkansas 1.199619
1ST PRESTIGE WEALTH MGMT INC 2003 MA 6200 3 13 0.23 65525 0.000 0.009 Massachusetts 1.136985
1ST PRESTIGE WEALTH MGMT INC 2003 NV 6200 1 13 0.08 65525 0.000 0.009 Nevada 1.155769
1ST PRESTIGE WEALTH MGMT INC 2003 NY 6200 8 13 0.62 65525 0.000 0.009 New York 1.206913
1ST PRESTIGE WEALTH MGMT INC 2007 AR 6200 1 13 0.08 65525 0.156 0.143 Arkansas 1.199619
1ST PRESTIGE WEALTH MGMT INC 2007 MA 6200 3 13 0.23 65525 0.156 0.143 Massachusetts 1.136985
1ST PRESTIGE WEALTH MGMT INC 2007 NV 6200 1 13 0.08 65525 0.156 0.143 Nevada 1.155769
1ST PRESTIGE WEALTH MGMT INC 2007 NY 6200 8 13 0.62 65525 0.156 0.143 New York 1.203977
24/7 REAL MEDIA INC 1996 VA 7370 2 2 1.00 113498 1.537 1.679 Virginia 1.196031
24/7 REAL MEDIA INC 1997 VA 7370 2 2 1.00 113498 0.042 0.591 Virginia 1.196031

2.5

compustat_weighted_tax <- compustat_pat_tax %>% group_by(conm, year) %>% 
  summarize(weighted_tax = sum(share*rd_taxcredit)) %>% ungroup() #calculate the weighted tax rate
#merge back to the dataset
compustat_weighted_tax <- inner_join(compustat_weighted_tax, compustat_pat_tax, by = c('year','conm'))
compustat_weighted_tax <- compustat_weighted_tax %>% select(1:5,10,11)
formattable(head(compustat_weighted_tax,10))
conm year weighted_tax state sic capx ppent
1ST PRESTIGE WEALTH MGMT INC 2003 1.198223 AR 6200 0.000 0.009
1ST PRESTIGE WEALTH MGMT INC 2003 1.198223 MA 6200 0.000 0.009
1ST PRESTIGE WEALTH MGMT INC 2003 1.198223 NV 6200 0.000 0.009
1ST PRESTIGE WEALTH MGMT INC 2003 1.198223 NY 6200 0.000 0.009
1ST PRESTIGE WEALTH MGMT INC 2007 1.196403 AR 6200 0.156 0.143
1ST PRESTIGE WEALTH MGMT INC 2007 1.196403 MA 6200 0.156 0.143
1ST PRESTIGE WEALTH MGMT INC 2007 1.196403 NV 6200 0.156 0.143
1ST PRESTIGE WEALTH MGMT INC 2007 1.196403 NY 6200 0.156 0.143
24/7 REAL MEDIA INC 1996 1.196031 VA 7370 1.537 1.679
24/7 REAL MEDIA INC 1997 1.196031 VA 7370 0.042 0.591

Question 3

3.1

cw <- read_dta('cw_hs6_sic87dd.dta')
trade <- read_dta('import_china_HS_classification.dta')
cw <- cw %>% rename(productcode = commoditycode)
merge_trade_cw <- merge(cw, trade, by = 'productcode')
formattable(head(merge_trade_cw,10))
productcode sic87dd share weights_method tradevaluein1000usd quantity
100190 111 1 0 23.928 4400
100190 111 1 0 38.820 40735
100190 111 1 0 12.348 2520
100190 111 1 0 323.681 362698
100190 111 1 0 1.705 1812
100190 111 1 0 3.059 1600
100190 111 1 0 5.720 1749
100190 111 1 0 69.281 199426
100190 111 1 0 341.591 445091
100300 119 1 0 29.897 18238

3.2

#remove the last 0 digit if there are 4 digits 
vec <- compustat_weighted_tax$sic #store the column values in a vector
new_sic <- str_remove(vec, "0$") #remove last 0 digit

#longer way: for loop to remove the last 0 digit
#new_sic <- c()
#counter = 1
#for (i in vec){
  #if (str_sub(i, str_length(i), str_length(i)) == '0'){
    #new_sic[counter] <- str_sub(i, 1, str_length(i)-1)
    #counter = counter + 1
  #} else {
    #new_sic[counter] <- i
    #counter = counter + 1
  #}
#}

#create a new column after having removed the 0 digit
compustat_weighted_tax$sic87 <- as.numeric(new_sic)
write_dta(compustat_weighted_tax, 'merged_data_1.dta') #save the data file to use the dofile in Stata
formattable(head(compustat_weighted_tax,10))
conm year weighted_tax state sic capx ppent sic87
1ST PRESTIGE WEALTH MGMT INC 2003 1.198223 AR 6200 0.000 0.009 620
1ST PRESTIGE WEALTH MGMT INC 2003 1.198223 MA 6200 0.000 0.009 620
1ST PRESTIGE WEALTH MGMT INC 2003 1.198223 NV 6200 0.000 0.009 620
1ST PRESTIGE WEALTH MGMT INC 2003 1.198223 NY 6200 0.000 0.009 620
1ST PRESTIGE WEALTH MGMT INC 2007 1.196403 AR 6200 0.156 0.143 620
1ST PRESTIGE WEALTH MGMT INC 2007 1.196403 MA 6200 0.156 0.143 620
1ST PRESTIGE WEALTH MGMT INC 2007 1.196403 NV 6200 0.156 0.143 620
1ST PRESTIGE WEALTH MGMT INC 2007 1.196403 NY 6200 0.156 0.143 620
24/7 REAL MEDIA INC 1996 1.196031 VA 7370 1.537 1.679 737
24/7 REAL MEDIA INC 1997 1.196031 VA 7370 0.042 0.591 737
#Data after converted by Do-file
data <- read_dta('compustat_sicmod.dta')
#final_data <- inner_join(data, merge_trade_cw, by = 'sic87dd') : takes too long to run
#Here, I use Stata to merge the file instead because it takes a long time to merge in R
import <- read_dta('import_data.dta') #file after m:m merged in Stata
formattable(head(import,10))
conm year weighted_tax state sic capx ppent sic87 sic87dd productcode share weights_method tradevaluein1000usd quantity
HILLSHIRE BRANDS CO 2008 1.192283 MS 2013 454.000 2519.000 2013 2011 410130 1.00000000 0 44.607 3432
HILLSHIRE BRANDS CO 1996 1.203889 AL 2013 542.000 3007.000 2013 2011 410121 1.00000000 0 0.480 195
HILLSHIRE BRANDS CO 2009 1.192191 VA 2013 357.000 2356.000 2013 2011 410121 1.00000000 0 4.598 2026
BRIDGFORD FOODS CORP 1998 1.132321 CA 2013 2.285 16.197 2013 2011 410129 1.00000000 0 159.901 31605
TYSON FOODS INC -CL A 2002 1.189733 FL 2011 506.000 4038.000 2011 2011 160100 1.00000000 0 8.626 990
HILLSHIRE BRANDS CO 2006 1.193954 WI 2013 625.000 2942.000 2013 2011 410110 1.00000000 0 214.301 73027
HILLSHIRE BRANDS CO 2002 1.185731 SC 2013 669.000 3155.000 2013 2011 391710 0.95735645 1 208.218 15014
HORMEL FOODS CORP 1990 1.202108 CT 2011 34.988 235.026 2011 2011 410121 1.00000000 0 12.339 2875
HILLSHIRE BRANDS CO 2011 1.190193 GA 2013 337.000 1648.000 2013 2011 410129 1.00000000 0 93.065 27906
HORMEL FOODS CORP 2008 1.212460 IA 2011 125.890 977.657 2011 2011 510119 0.09350958 1 4.061 133

3.3

import_state_year <- import %>% 
  group_by(state, year) %>%   #sum by state and year                     
  summarise(sum_trade = sum(tradevaluein1000usd)) %>% drop_na()

final_data <- inner_join(compustat_weighted_tax, import_state_year, by = c('year','state'))
formattable(head(final_data,10))
conm year weighted_tax state sic capx ppent sic87 sum_trade
1ST PRESTIGE WEALTH MGMT INC 2003 1.198223 AR 6200 0.000 0.009 620 25890618
1ST PRESTIGE WEALTH MGMT INC 2007 1.196403 AR 6200 0.156 0.143 620 25285322
3-DIMENSIONAL PHARMACEUTICAL 2001 1.151313 DE 2834 9.090 11.735 2834 35417462
3M CO 1990 1.162646 AL 2670 1337.000 4389.000 267 57134235
3M CO 1990 1.162646 AR 2670 1337.000 4389.000 267 68336776
3M CO 1990 1.162646 DE 2670 1337.000 4389.000 267 26059905
3M CO 1990 1.162646 IA 2670 1337.000 4389.000 267 15579231
3M CO 1990 1.162646 LA 2670 1337.000 4389.000 267 23275225
3M CO 1990 1.162646 MS 2670 1337.000 4389.000 267 5561636
3M CO 1990 1.162646 MT 2670 1337.000 4389.000 267 11325592

Question 4

I cannot do this part, both because some of the above parts may be incorrect and because I am very inexperienced with panel data regression. I have tried the below code but it gives error output for the regression.

#demean the variable
#sample_mean_data <- final_data %>% group_by(conm) %>% summarize(sample_mean = mean(sum_trade))
#data_before_demean <- inner_join(final_data, sample_mean_data, by = 'conm')
#demeaned_data <- data_before_demean %>% mutate(demeaned_trade = sum_trade - sample_mean)

#create the dependent variable
#final_data_reg <- demeaned_data %>% mutate(investment = capx/ppent)

#model <- plm(investment ~ weighted_tax*demeaned_trade + weighted_tax + as.factor(year), data = final_data_reg, index=c('year', 'state'), model="with", effect="twoways")