library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1.9000 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
## Loading required package: robustbase
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
flextable,
rms
)
##
## Attaching package: 'flextable'
##
## The following object is masked from 'package:purrr':
##
## compose
##
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
##
## The following object is masked from 'package:psych':
##
## describe
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
## The following objects are masked from 'package:base':
##
## format.pval, units
theme_set(theme_bw())
options(
digits = 3
)
#multithreading
#library(future)
#plan(multisession(workers = 8))
#pet stuff
d_study = read_csv("data/study1_counties.csv") %>%
df_legalize_names() %>%
mutate(
FIPS = STCOUNTYFP,
state = name
)
## New names:
## Rows: 563 Columns: 25
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (11): Location, Geographic.Area.Name..NAME., Meaning.of.NAICS.code..NAIC... dbl
## (14): ...1, X2017.NAICS.code..NAICS2017., Year..YEAR., Number.of.establi...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
fips_included = d_study$STCOUNTYFP
#covars
d_pop = read_csv("data/PopulationEstimates.csv")
## Rows: 208225 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): State, Area_Name, Attribute
## dbl (2): FIPStxt, Value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#race props
d_race = read_csv("data/Population-by-Race/Population by Race - US, States, Counties.csv") %>%
df_legalize_names() %>%
filter(Year == 2020) %>%
filter(IBRC_Geo_ID %in% fips_included) %>%
mutate(
FIPS = IBRC_Geo_ID
) %>%
mutate(
white_prop = as.numeric(White_Alone) / Total_Population,
black_prop = as.numeric(Black_Alone) / Total_Population,
amer_prop = as.numeric(American_Indian_or_Alaskan_Native) / Total_Population,
asian_prop = as.numeric(Asian_Alone) / Total_Population,
pacific_prop = as.numeric(Hawaiian_or_Pacific_Islander_Alone) / Total_Population,
hispanic_prop = as.numeric(Hispanic) / Total_Population,
multi_prop = as.numeric(Two_or_More_Races) / Total_Population
)
## Rows: 111815 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Statefips, Countyfips, Description, White Alone, Black Alone, Amer...
## dbl (3): IBRC_Geo_ID, Year, Total Population
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#momey
d_income = read_csv("data/Unemployment2023.csv") %>%
df_legalize_names() %>%
filter(FIPS_Code %in% fips_included) %>%
mutate(
FIPS = FIPS_Code
) %>%
pivot_wider(values_from = Value, names_from = Attribute)
## Rows: 329726 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): State, Area_Name, Attribute
## dbl (2): FIPS_Code, Value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#join
d = smart_join(
d_study %>% select(FIPS, Location, state, Number_of_establishments_ESTAB:Number_of_employees_EMP, Births:Fertility_Rate),
d_race %>% select(FIPS, white_prop:multi_prop),
id_col = "FIPS"
) %>%
smart_join(
d_income %>% select(FIPS, Unemployment_rate_2020, Median_Household_Income_2022),
id_col = "FIPS"
)
#per pet stuff per capita
d$pet_establishments_pc = d$Number_of_establishments_ESTAB / d$Total_Population
d$pet_earnings_pc = d$payroll / d$Total_Population
birth_rate_mods = list(
ols(Birth_Rate ~ pet_establishments_pc, data = d),
ols(Birth_Rate ~ pet_establishments_pc + white_prop + black_prop + hispanic_prop, data = d),
ols(Birth_Rate ~ pet_establishments_pc + white_prop + black_prop + hispanic_prop + Unemployment_rate_2020 + Median_Household_Income_2022, data = d),
ols(Birth_Rate ~ pet_establishments_pc + white_prop + black_prop + hispanic_prop + Unemployment_rate_2020 + Median_Household_Income_2022 + state, data = d)
)
birth_rate_mods %>% summarize_models(collapse_factors = T) %>% flextable::flextable()
Predictor/Model | 1 | 2 | 3 | 4 |
|---|---|---|---|---|
Intercept | 12.58 (0.186***) | 9.36 (0.870***) | 13.11 (1.200***) | 11.07 (1.631***) |
pet_establishments_pc | -23148.58 (2450.417***) | -13561.16 (2548.465***) | -14381.03 (2749.690***) | -10037.87 (2729.415***) |
white_prop | 1.70 (0.918) | 0.01 (1.023) | 0.26 (1.423) | |
black_prop | 5.66 (1.033***) | 4.31 (1.120***) | 4.70 (1.697*) | |
hispanic_prop | 3.51 (0.495***) | 4.64 (0.512***) | 5.13 (0.656***) | |
Unemployment_rate_2020 | -0.22 (0.036***) | -0.07 (0.047) | ||
Median_Household_Income_2022 | 0.00 (0.000) | 0.00 (0.000) | ||
state | (yes) | |||
R2 adj. | 0.136 | 0.249 | 0.298 | 0.480 |
N | 563 | 555 | 555 | 555 |
#fert rate
fert_rate_mods = list(
ols(Fertility_Rate ~ pet_establishments_pc, data = d),
ols(Fertility_Rate ~ pet_establishments_pc + white_prop + black_prop + hispanic_prop, data = d),
ols(Fertility_Rate ~ pet_establishments_pc + white_prop + black_prop + hispanic_prop + Unemployment_rate_2020 + Median_Household_Income_2022, data = d),
ols(Fertility_Rate ~ pet_establishments_pc + white_prop + black_prop + hispanic_prop + Unemployment_rate_2020 + Median_Household_Income_2022 + state, data = d)
)
fert_rate_mods %>% summarize_models(collapse_factors = T)
#versions
write_sessioninfo()
## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Brussels
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] rms_8.0-0 Hmisc_5.2-4 flextable_0.9.10
## [4] kirkegaard_2025-11-19 robustbase_0.99-6 psych_2.5.6
## [7] assertthat_0.2.1 weights_1.1.2 magrittr_2.0.4
## [10] lubridate_1.9.4 forcats_1.0.1 stringr_1.6.0
## [13] dplyr_1.1.4 purrr_1.2.0 readr_2.1.5
## [16] tidyr_1.3.1 tibble_3.3.0 ggplot2_4.0.1.9000
## [19] tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] Rdpack_2.6.4 mnormt_2.1.1 gridExtra_2.3
## [4] sandwich_3.1-1 rlang_1.1.6.9000 multcomp_1.4-28
## [7] polspline_1.1.25 compiler_4.5.2 gdata_3.0.1
## [10] systemfonts_1.3.1 vctrs_0.6.5 quantreg_6.1
## [13] crayon_1.5.3 pkgconfig_2.0.3 shape_1.4.6.1
## [16] fastmap_1.2.0 backports_1.5.0 rmarkdown_2.30
## [19] tzdb_0.5.0 nloptr_2.2.1 ragg_1.5.0
## [22] bit_4.6.0 MatrixModels_0.5-4 xfun_0.53
## [25] glmnet_4.1-10 jomo_2.7-6 cachem_1.1.0
## [28] jsonlite_2.0.0 uuid_1.2-1 pan_1.9
## [31] broom_1.0.11 parallel_4.5.2 cluster_2.1.8.1
## [34] R6_2.6.1 bslib_0.9.0 stringi_1.8.7
## [37] RColorBrewer_1.1-3 boot_1.3-32 rpart_4.1.24
## [40] jquerylib_0.1.4 Rcpp_1.1.0 iterators_1.0.14
## [43] knitr_1.50 zoo_1.8-14 base64enc_0.1-3
## [46] Matrix_1.7-4 splines_4.5.2 nnet_7.3-20
## [49] timechange_0.3.0 tidyselect_1.2.1 rstudioapi_0.17.1
## [52] yaml_2.3.10 codetools_0.2-20 plyr_1.8.9
## [55] lattice_0.22-7 withr_3.0.2 S7_0.2.1
## [58] askpass_1.2.1 evaluate_1.0.5 foreign_0.8-90
## [61] archive_1.1.12 survival_3.8-3 zip_2.3.3
## [64] xml2_1.4.0 pillar_1.11.1 mice_3.18.0
## [67] checkmate_2.3.3 foreach_1.5.2 reformulas_0.4.1
## [70] generics_0.1.4 vroom_1.6.6 hms_1.1.3
## [73] scales_1.4.0 minqa_1.2.8 gtools_3.9.5
## [76] glue_1.8.0 gdtools_0.4.4 tools_4.5.2
## [79] data.table_1.17.8 SparseM_1.84-2 lme4_1.1-37
## [82] mvtnorm_1.3-3 grid_4.5.2 rbibutils_2.3
## [85] colorspace_2.1-2 nlme_3.1-168 htmlTable_2.4.3
## [88] Formula_1.2-5 cli_3.6.5 textshaping_1.0.4
## [91] officer_0.7.0 fontBitstreamVera_0.1.1 gtable_0.3.6
## [94] DEoptimR_1.1-4 sass_0.4.10 digest_0.6.39
## [97] fontquiver_0.2.1 TH.data_1.1-4 htmlwidgets_1.6.4
## [100] farver_2.1.2 htmltools_0.5.8.1 lifecycle_1.0.4
## [103] mitml_0.4-5 bit64_4.6.0-1 fontLiberation_0.1.0
## [106] openssl_2.3.4 MASS_7.3-65
#write data to file for reuse
d %>% write_rds("data/data_for_reuse.rds")
#OSF
if (F) {
library(osfr)
#login
osf_auth(readr::read_lines("~/.config/osf_token"))
#the project we will use
osf_proj = osf_retrieve_node("https://osf.io/XXX/")
#upload all files in project
#overwrite existing (versioning)
osf_upload(
osf_proj,
path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"),
conflicts = "overwrite"
)
}