Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4          ✔ readr     2.1.5     
## ✔ forcats   1.0.1          ✔ stringr   1.6.0     
## ✔ ggplot2   4.0.1.9000     ✔ tibble    3.3.0     
## ✔ lubridate 1.9.4          ✔ tidyr     1.3.1     
## ✔ purrr     1.2.0          
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## Loading required package: robustbase
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  flextable,
  rms
)
## 
## Attaching package: 'flextable'
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## 
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## 
## The following object is masked from 'package:psych':
## 
##     describe
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
theme_set(theme_bw())

options(
    digits = 3
)

#multithreading
#library(future)
#plan(multisession(workers = 8))

Functions

Data

#pet stuff
d_study = read_csv("data/study1_counties.csv") %>% 
  df_legalize_names() %>% 
  mutate(
    FIPS = STCOUNTYFP,
    state = name
  )
## New names:
## Rows: 563 Columns: 25
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (11): Location, Geographic.Area.Name..NAME., Meaning.of.NAICS.code..NAIC... dbl
## (14): ...1, X2017.NAICS.code..NAICS2017., Year..YEAR., Number.of.establi...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
fips_included = d_study$STCOUNTYFP

#covars
d_pop = read_csv("data/PopulationEstimates.csv")
## Rows: 208225 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): State, Area_Name, Attribute
## dbl (2): FIPStxt, Value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#race props
d_race = read_csv("data/Population-by-Race/Population by Race - US, States, Counties.csv") %>% 
  df_legalize_names() %>% 
  filter(Year == 2020) %>% 
  filter(IBRC_Geo_ID %in% fips_included) %>% 
  mutate(
    FIPS = IBRC_Geo_ID
  ) %>% 
  mutate(
    white_prop = as.numeric(White_Alone) / Total_Population,
    black_prop = as.numeric(Black_Alone) / Total_Population,
    amer_prop = as.numeric(American_Indian_or_Alaskan_Native) / Total_Population,
    asian_prop = as.numeric(Asian_Alone) / Total_Population,
    pacific_prop = as.numeric(Hawaiian_or_Pacific_Islander_Alone) / Total_Population,
    hispanic_prop = as.numeric(Hispanic) / Total_Population,
    multi_prop = as.numeric(Two_or_More_Races) / Total_Population
  )
## Rows: 111815 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Statefips, Countyfips, Description, White Alone, Black Alone, Amer...
## dbl  (3): IBRC_Geo_ID, Year, Total Population
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#momey
d_income = read_csv("data/Unemployment2023.csv") %>% 
  df_legalize_names() %>% 
  filter(FIPS_Code %in% fips_included) %>% 
  mutate(
    FIPS = FIPS_Code
  ) %>% 
  pivot_wider(values_from = Value, names_from = Attribute)
## Rows: 329726 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): State, Area_Name, Attribute
## dbl (2): FIPS_Code, Value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#join
d = smart_join(
  d_study %>% select(FIPS, Location, state, Number_of_establishments_ESTAB:Number_of_employees_EMP, Births:Fertility_Rate),
  d_race %>% select(FIPS, white_prop:multi_prop),
  id_col = "FIPS"
) %>% 
  smart_join(
    d_income %>% select(FIPS, Unemployment_rate_2020, Median_Household_Income_2022),
    id_col = "FIPS"
  )

#per pet stuff per capita
d$pet_establishments_pc = d$Number_of_establishments_ESTAB / d$Total_Population
d$pet_earnings_pc = d$payroll / d$Total_Population

Analysis

birth_rate_mods = list(
  ols(Birth_Rate ~ pet_establishments_pc, data = d),
  ols(Birth_Rate ~ pet_establishments_pc + white_prop + black_prop + hispanic_prop, data = d),
  ols(Birth_Rate ~ pet_establishments_pc + white_prop + black_prop + hispanic_prop + Unemployment_rate_2020 + Median_Household_Income_2022, data = d),
  ols(Birth_Rate ~ pet_establishments_pc + white_prop + black_prop + hispanic_prop + Unemployment_rate_2020 + Median_Household_Income_2022 + state, data = d)
)

birth_rate_mods %>% summarize_models(collapse_factors = T) %>% flextable::flextable()

Predictor/Model

1

2

3

4

Intercept

12.58 (0.186***)

9.36 (0.870***)

13.11 (1.200***)

11.07 (1.631***)

pet_establishments_pc

-23148.58 (2450.417***)

-13561.16 (2548.465***)

-14381.03 (2749.690***)

-10037.87 (2729.415***)

white_prop

1.70 (0.918)

0.01 (1.023)

0.26 (1.423)

black_prop

5.66 (1.033***)

4.31 (1.120***)

4.70 (1.697*)

hispanic_prop

3.51 (0.495***)

4.64 (0.512***)

5.13 (0.656***)

Unemployment_rate_2020

-0.22 (0.036***)

-0.07 (0.047)

Median_Household_Income_2022

0.00 (0.000)

0.00 (0.000)

state

(yes)

R2 adj.

0.136

0.249

0.298

0.480

N

563

555

555

555

#fert rate
fert_rate_mods = list(
  ols(Fertility_Rate ~ pet_establishments_pc, data = d),
  ols(Fertility_Rate ~ pet_establishments_pc + white_prop + black_prop + hispanic_prop, data = d),
  ols(Fertility_Rate ~ pet_establishments_pc + white_prop + black_prop + hispanic_prop + Unemployment_rate_2020 + Median_Household_Income_2022, data = d),
  ols(Fertility_Rate ~ pet_establishments_pc + white_prop + black_prop + hispanic_prop + Unemployment_rate_2020 + Median_Household_Income_2022 + state, data = d)
)

fert_rate_mods %>% summarize_models(collapse_factors = T)

Meta

#versions
write_sessioninfo()
## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Brussels
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] rms_8.0-0             Hmisc_5.2-4           flextable_0.9.10     
##  [4] kirkegaard_2025-11-19 robustbase_0.99-6     psych_2.5.6          
##  [7] assertthat_0.2.1      weights_1.1.2         magrittr_2.0.4       
## [10] lubridate_1.9.4       forcats_1.0.1         stringr_1.6.0        
## [13] dplyr_1.1.4           purrr_1.2.0           readr_2.1.5          
## [16] tidyr_1.3.1           tibble_3.3.0          ggplot2_4.0.1.9000   
## [19] tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##   [1] Rdpack_2.6.4            mnormt_2.1.1            gridExtra_2.3          
##   [4] sandwich_3.1-1          rlang_1.1.6.9000        multcomp_1.4-28        
##   [7] polspline_1.1.25        compiler_4.5.2          gdata_3.0.1            
##  [10] systemfonts_1.3.1       vctrs_0.6.5             quantreg_6.1           
##  [13] crayon_1.5.3            pkgconfig_2.0.3         shape_1.4.6.1          
##  [16] fastmap_1.2.0           backports_1.5.0         rmarkdown_2.30         
##  [19] tzdb_0.5.0              nloptr_2.2.1            ragg_1.5.0             
##  [22] bit_4.6.0               MatrixModels_0.5-4      xfun_0.53              
##  [25] glmnet_4.1-10           jomo_2.7-6              cachem_1.1.0           
##  [28] jsonlite_2.0.0          uuid_1.2-1              pan_1.9                
##  [31] broom_1.0.11            parallel_4.5.2          cluster_2.1.8.1        
##  [34] R6_2.6.1                bslib_0.9.0             stringi_1.8.7          
##  [37] RColorBrewer_1.1-3      boot_1.3-32             rpart_4.1.24           
##  [40] jquerylib_0.1.4         Rcpp_1.1.0              iterators_1.0.14       
##  [43] knitr_1.50              zoo_1.8-14              base64enc_0.1-3        
##  [46] Matrix_1.7-4            splines_4.5.2           nnet_7.3-20            
##  [49] timechange_0.3.0        tidyselect_1.2.1        rstudioapi_0.17.1      
##  [52] yaml_2.3.10             codetools_0.2-20        plyr_1.8.9             
##  [55] lattice_0.22-7          withr_3.0.2             S7_0.2.1               
##  [58] askpass_1.2.1           evaluate_1.0.5          foreign_0.8-90         
##  [61] archive_1.1.12          survival_3.8-3          zip_2.3.3              
##  [64] xml2_1.4.0              pillar_1.11.1           mice_3.18.0            
##  [67] checkmate_2.3.3         foreach_1.5.2           reformulas_0.4.1       
##  [70] generics_0.1.4          vroom_1.6.6             hms_1.1.3              
##  [73] scales_1.4.0            minqa_1.2.8             gtools_3.9.5           
##  [76] glue_1.8.0              gdtools_0.4.4           tools_4.5.2            
##  [79] data.table_1.17.8       SparseM_1.84-2          lme4_1.1-37            
##  [82] mvtnorm_1.3-3           grid_4.5.2              rbibutils_2.3          
##  [85] colorspace_2.1-2        nlme_3.1-168            htmlTable_2.4.3        
##  [88] Formula_1.2-5           cli_3.6.5               textshaping_1.0.4      
##  [91] officer_0.7.0           fontBitstreamVera_0.1.1 gtable_0.3.6           
##  [94] DEoptimR_1.1-4          sass_0.4.10             digest_0.6.39          
##  [97] fontquiver_0.2.1        TH.data_1.1-4           htmlwidgets_1.6.4      
## [100] farver_2.1.2            htmltools_0.5.8.1       lifecycle_1.0.4        
## [103] mitml_0.4-5             bit64_4.6.0-1           fontLiberation_0.1.0   
## [106] openssl_2.3.4           MASS_7.3-65
#write data to file for reuse
d %>% write_rds("data/data_for_reuse.rds")

#OSF
if (F) {
  library(osfr)
  
  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/XXX/")
  
  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"), 
    conflicts = "overwrite"
    )
}