library(haven)
library(sandwich)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ lubridate 1.9.5 ✔ tibble 3.3.1
## ✔ purrr 1.2.1 ✔ tidyr 1.3.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(marginaleffects)
library(dplyr)
library(fixest)
library(modelsummary)
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(tinytable)
irs_county = read_dta("/Users/cartercrowley/Documents/Economics 970/ps3/rawdata/irssoi_county.dta")
irs_county = irs_county %>% mutate(incomebin = as_factor(incomebin))
datasummary_balance(capital_share + salary_share + business_share + partnership_share + agi_per_return + contrib_per_return + cg_per_return + wages_per_return + capital_per_return + business_per_return + partnership_per_return + anycg + anywages + anybusiness + anypartnership +anycharitable~incomebin, data = irs_county, weights=irs_county$returns, dinm=FALSE, fmt=1, title = "Table 1: Composition of Income by Size of Adjusted Gross Income", notes = "NOTE--Table reports return-weighted means and standard deviations by income bin.")
| All (N=21714) | $1 under $25,000 (N=21714) | $25,000 under $50,000 (N=21714) | $50,000 under $75,000 (N=21696) | $75,000 under $100,000 (N=21632) | $100,000 under $200,000 (N=21517) | $200,000 or more (N=18299) | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Mean | Std. Dev. | Mean | Std. Dev. | Mean | Std. Dev. | Mean | Std. Dev. | Mean | Std. Dev. | Mean | Std. Dev. | Mean | Std. Dev. | |
| NOTE--Table reports return-weighted means and standard deviations by income bin. | ||||||||||||||
| capital_share | 5.8 | 3.8 | 4.6 | 5.3 | 3.2 | 3.4 | 3.8 | 2.8 | 4.5 | 5.2 | 7.9 | 6.8 | 17.8 | 8.9 |
| salary_share | 69.4 | 7.9 | 74.3 | 6.3 | 78.4 | 6.2 | 75.6 | 6.8 | 74.0 | 8.3 | 66.8 | 12.0 | 43.5 | 12.7 |
| business_share | 3.1 | 1.2 | 7.2 | 3.1 | 3.1 | 1.8 | 2.6 | 1.7 | 2.8 | 2.5 | 4.0 | 3.3 | 5.2 | 3.3 |
| partnership_share | 3.7 | 2.4 | 0.8 | 3.4 | 1.0 | 2.4 | 1.5 | 2.6 | 2.2 | 4.7 | 5.1 | 5.5 | 20.4 | 8.6 |
| agi_per_return | 49.8 | 14.8 | 12.3 | 0.7 | 36.2 | 5.7 | 62.1 | 2.7 | 87.6 | 5.2 | 140.1 | 19.0 | 431.6 | 191.6 |
| contrib_per_return | 0.8 | 0.6 | 0.1 | 0.1 | 0.4 | 0.3 | 1.0 | 0.5 | 1.7 | 2.4 | 3.4 | 1.8 | 12.9 | 8.9 |
| cg_per_return | 1.6 | 3.4 | 0.1 | 0.4 | 0.4 | 3.8 | 0.9 | 2.2 | 1.8 | 4.0 | 5.5 | 8.3 | 47.8 | 58.8 |
| wages_per_return | 33.9 | 8.7 | 9.2 | 1.1 | 28.3 | 2.1 | 46.8 | 4.1 | 64.3 | 7.1 | 91.3 | 13.6 | 179.1 | 55.0 |
| capital_per_return | 3.4 | 5.6 | 0.6 | 0.6 | 1.2 | 5.7 | 2.3 | 1.9 | 3.9 | 4.6 | 11.2 | 11.1 | 84.4 | 83.1 |
| business_per_return | 1.6 | 0.8 | 0.9 | 0.4 | 1.1 | 0.7 | 1.6 | 1.1 | 2.4 | 2.2 | 5.4 | 4.5 | 22.1 | 13.8 |
| partnership_per_return | 2.3 | 2.2 | 0.1 | 0.4 | 0.4 | 0.9 | 0.9 | 1.6 | 1.9 | 4.1 | 7.1 | 7.9 | 88.8 | 47.6 |
| anycg | 13.6 | 5.8 | 8.0 | 4.6 | 12.6 | 8.7 | 19.5 | 10.3 | 24.8 | 12.2 | 35.2 | 10.2 | 63.5 | 9.1 |
| anywages | 82.5 | 4.2 | 77.1 | 5.2 | 85.9 | 4.3 | 87.1 | 5.2 | 88.2 | 6.2 | 88.0 | 6.5 | 84.3 | 8.7 |
| anybusiness | 15.8 | 3.5 | 15.1 | 5.1 | 15.7 | 6.7 | 19.7 | 7.3 | 21.9 | 8.9 | 23.8 | 6.6 | 30.2 | 8.2 |
| anypartnership | 5.1 | 2.8 | 1.9 | 2.1 | 4.3 | 4.3 | 7.8 | 7.3 | 9.3 | 10.7 | 16.8 | 9.4 | 47.7 | 12.8 |
| anycharitable | 17.9 | 8.5 | 4.3 | 2.9 | 14.8 | 6.8 | 28.8 | 12.0 | 41.5 | 15.9 | 61.4 | 15.9 | 83.7 | 11.5 |
Partnerships per return is much higher for higher earners relative to
lower earners, exhibiting a nearly 900-fold increase over the various
income bins. This makes intuitive sense as partnerships are investment
and business structures typically only accessible to individuals who
have a modest amount of capital on hand or access to it via secondary
sources, meaning lower-income households are almost entirely excluded
from this income source.
health = read_dta("/Users/cartercrowley/Documents/Economics 970/ps3/rawdata/health_county_2026.dta")
health = pivot_longer(data = health, cols = c("healthy_days2009", "healthy_days2010", "healthy_days2011","healthy_days2012","healthy_days2012", "healthy_days2013", "healthy_days2014", "healthy_days2015"), names_to = "year",names_prefix = "healthy_days", values_to = "healthy_days")
irs_county$year = as.numeric(irs_county$year)
health$year = as.numeric(health$year)
irs_health = left_join(irs_county,health, by = c("CountyFIPS", "year"))
#Number of observations from both the CDC and the IRS
both_irs_CDC = inner_join(irs_county,health, by = c("CountyFIPS", "year"))
nrow(both_irs_CDC)
## [1] 148188
#Number of observations from just the IRS
just_irs = anti_join(irs_county,health, by = c("CountyFIPS", "year"))
nrow(just_irs)
## [1] 98
#Number of observations from just the CDC
just_CDC = anti_join(health, irs_county, by = c("CountyFIPS", "year"))
nrow(just_CDC)
## [1] 266
irs_health = irs_health %>% filter(incomebin == "All")
mod1=feols(healthy_days~wages_per_return + capital_per_return, data = irs_health, weights = irs_health$returns, cluster = irs_health$CountyFIPS)
## NOTE: 1,458 observations removed because of NA values (LHS: 1,458).
mod2=feols(healthy_days~wages_per_return + capital_per_return | year, data = irs_health, weights = irs_health$returns, cluster = irs_health$CountyFIPS)
## NOTE: 1,458 observations removed because of NA values (LHS: 1,458).
mod3=feols(healthy_days~wages_per_return + capital_per_return | year+CountyFIPS, data = irs_health, weights = irs_health$returns, cluster = irs_health$CountyFIPS)
## NOTES: 1,458 observations removed because of NA values (LHS: 1,458).
## 0/145 fixed-effect singletons were removed (145 observations).
modelsummary(list(mod1,mod2,mod3), coef_rename = c("wages_per_return" = "Wages ($1000s)","capital_per_return" = "Capital Income ($1000s)"), coef_omit = "Intercept", gof_omit = "R2|R2 Adj.|R2 Within|R2 Within Adj.|AIC|BIC|RMSE|Std", title = "Table 2: The Effect of Wealth on Health", notes = "NOTE--Table reports regression coefficients with standard errors clustered by county in parentheses. The dependent variable
in each regression is the average number of healthy days per month. Columns 2 and 3 include year fixed effects. Column 3
includes year and county fixed effects. All regressions are weighted by the number of tax returns in each county. Data cover
2009-15. Capital income includes interest income, capital gains, and dividends. Wage and capital income are measured in
$1000s and taken from IRS form 1040.")
| (1) | (2) | (3) | |
|---|---|---|---|
| NOTE--Table reports regression coefficients with standard errors clustered by county in parentheses. The dependent variable in each regression is the average number of healthy days per month. Columns 2 and 3 include year fixed effects. Column 3 includes year and county fixed effects. All regressions are weighted by the number of tax returns in each county. Data cover 2009-15. Capital income includes interest income, capital gains, and dividends. Wage and capital income are measured in $1000s and taken from IRS form 1040. | |||
| Wages ($1000s) | 0.029 | 0.030 | 0.005 |
| (0.002) | (0.002) | (0.003) | |
| Capital Income ($1000s) | -0.011 | -0.011 | -0.007 |
| (0.008) | (0.009) | (0.003) | |
| Num.Obs. | 20256 | 20256 | 20111 |
| FE: year | X | X | |
| FE: CountyFIPS | X | ||
Adding county fixed effects accounts for certain idiosyncratic features
that are time-invariant and fixed at the county level that might impact
an individual’s health outcomes, thereby obscuring the causal effect of
wages and capital income that we are trying to estimate. County fixed
effects might account for differences in wealth between counties that
might lead one to have better hospitals than the other and overstate the
true causal effect of income on health were they not accounted for. In
this data specifically, the addition of county fixed effects decreases
the wage coefficient from 0.03 to 0.005 and increases the capital income
coefficient from -0.011 to -0.007 (both relative to Model 2). The fact
that the inclusion of county fixed effects reduced the predictive power
of both regression coefficients is a literal representation of our
abstract prediction that higher wealth in certain counties would skew
our coefficient estimates upwards and accounting for them would help
address omitted variable bias.