## Remove all Warning and Messages
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, error = FALSE)
## Packages and WD live here
library(pacman)
pacman::p_load(lubridate, tidyverse, stringi, tidycensus, tmap,
tm, sf, rio, leaflet, webshot2)Spatial and Temporal Trends in ED Visits
Spatial and Temporal Trends in ED Visits across Travis County
Aims:
To describe the spatial and temporal pattern of population-based asthma-related hospitalization rates and ED visit rates in children with asthma ages 5-17 years by week and across census tracts in Travis County.
To characterize the average annual temporal pattern of hospitalization and ED visit rates.
To understand which census tracts had a back-to-school peak in rates
To describe the distribution of the timing of the start of the back-to-school peak across census tracts (includes spatial mapping)
To describe the distribution of the amplitude of the back-to-school peak across census tracts (includes mapping)
To describe the distribution of the duration of the back-to-school peak across census tracts (includes mapping).
To evaluate how the spatial and temporal pattern of population-based asthma-related hospitalization rates and ED visit rates in children with asthma (ages 5-17) vary by census tract characteristics such as poverty, crowding, use of public transport, social vulnerability, socioeconomic deprivation, and historic redlining in Travis County.
To estimate the associations between census tract characteristics and whether there is a back-to-school peak.
To estimate the associations between census tract characteristics and initiation of the back-to-school peak
To estimate the associations between census tract characteristics and amplitude of the back-to-school peak
To estimate the associations between census-tract characteristics and duration of the back-to-school peak
To explore the extent of spatial autocorrelation (impact of neighboring census tracts) on initiation, amplitude, duration, and area under the back-to-school peak.
To explore the spatial and temporal patterns of hospitalizations and ED visits associated with bronchiolitis or wheezing in children younger than 5 years residing in Travis County.
To characterize the average annual temporal pattern of hospitalization and ED visit rates associated with bronchiolitis or wheezing.
To map the spatial pattern of hospitalization and ED visit rates associated with bronchiolitis or wheezing in time.
Data Importation and Cleaning
Create Dataset with ED Flags
The following code is drawn from ED_records_extract.Rmd by Sarah Chambliss (Oct. 21, 2023).
Extract asthma ED visit records from raw data
This follows the structure of the SAS code from Emily Hall. We have validated the ED flagging method against PUDF published totals - see ../AJRCCM update/ED_asthma_records_extract_QAQC.Rmd and ../Data/THCIC Data/ED filter validation.xlsx
Extract patient IDs with ED charges from raw charges data
Charge codes citation from DSHS: https://www.dshs.texas.gov/sites/default/files/thcic/hospitals/Data-Summary-for-IP_OP_ED.pdf
## Move WD to where ED_records_extract.Rmd is based
setwd("//corral-secure/utdmc/Biomedical-Data-Scie/THCIC/ED flagged data and documentation")
# Define ED charge codes
ED_charge_codes <- c('0450','0451','0452','0456','0459')
# Define function to isolate ED patient records
ED_charge_find <- function(charge_table){
charge_table %>%
dplyr::filter(REVENUE_CODE %in% ED_charge_codes) %>% #Filter all charge data by ED charges defined above
dplyr::select(RECORD_ID) %>% #select Record ID list for de-duplication
unique(.) #reduce records down to unique patient IDs
}
# Define function to read in annual charges, apply ED_charge_find, then clear charge file; necessary to manage RAM usage
# I'm using two functions in case someone else doesn't want the file cleared
extract_ED_IDs <- function(charge_file_path) #I'm using the file path for the argument so it still works if file naming conventions differ by year
{
raw_charges <- read_tsv(charge_file_path)
ED_patient_ids <- ED_charge_find(raw_charges)
rm(raw_charges)
gc()
return(ED_patient_ids)
}
# Run the nested functions for each OP and IP charges file
OP_2016_ED_ids <- extract_ED_IDs("../Outpatient THCIC data 2015Q4-2017/OP Decrypted/OP2016_Charges.txt") #2016 OP charges file has 9188959 unique IDs (patient IDs with ED charges)
OP_2017_ED_ids <- extract_ED_IDs("../Outpatient THCIC data 2015Q4-2017/OP Decrypted/OP2017_Charges.txt") #2017 OP charges file has 9404026 unique IDs
IP_2016_ED_ids <- extract_ED_IDs("../Outpatient THCIC data 2015Q4-2017/OP Decrypted/IP_2016_charges.txt") #2016 IP charges file has 1471871 unique IDs
#THIS MATCHES EMILY'S NOTES, HOORAY
IP_2017_ED_ids <- extract_ED_IDs("../Outpatient THCIC data 2015Q4-2017/OP Decrypted/IP_2017_charges.txt") #2017 IP charges file has 1522665 unique IDs
OP_2018_ED_ids <- extract_ED_IDs("../Outpatient THCIC data 2018-2020/OP_2018_Charges.txt") #2018 OP charges file has 9639368 unique IDs
OP_2019_ED_ids <- extract_ED_IDs("../Outpatient THCIC data 2018-2020/OP_2019_Charges.txt") #2019 OP charges file has 10093185 unique IDs
OP_2020_ED_ids <- extract_ED_IDs("../Outpatient THCIC data 2018-2020/OP_2020_Charges.txt") #2020 OP charges file has 8375065 unique IDs
## Note that we do not have IP charge data for 2018-2020
## Below is a placeholder if we do get it one day
#IP_2018_ED_ids <- extract_ED_IDs("../Inpatient THCIC data 2018-2020/Create patient files filtered by (1) ED, (2) Provider type, and (3) asthma
Create filter function
## Move WD to where ED_records_extract.Rmd is based
setwd("//corral-secure/utdmc/Biomedical-Data-Scie/THCIC/ED flagged data and documentation")
# provider table from: https://www.dshs.texas.gov/texas-health-care-information-collection/health-data-researcher-information/texas-emergency-department-data
provider_type_table <- read_csv("//corral-secure/utdmc/Biomedical-Data-Scie/THCIC/ED flagged data and documentation/FacilityList.csv")
harmonize_and_flag <- function(base_file_path, ED_ids,
ICD10_codes, year, type = c("OP","IP"),
provider_filepath, THCIC_filepath, stmtdate_filepath){ #These are only needed for select years or base data types
if(year<2018){
if(type == "OP"){
base_records <- read_tsv(base_file_path) %>%
dplyr::left_join(., provider_type_table,
by = c("PROVIDER_NAME" = "Facility")) %>%
dplyr::mutate(ADMIT_START_OF_CARE = NA)
}
if(type == "IP"){
provider_data <- read_tsv(provider_filepath)
base_records <- read_tsv(base_file_path) %>%
dplyr::left_join(., provider_data, by = "THCIC_ID") %>%
dplyr::left_join(., provider_type_table, by = "THCIC_ID") %>%
dplyr::mutate(STMT_PERIOD_FROM = NA)
}
}
if(year>=2018){
if(type == "OP"){
THCIC_ID_data <- read_tsv(THCIC_filepath)
STMTdate_data <- read_tsv(stmtdate_filepath)
base_records <- read_tsv(base_file_path) %>%
dplyr::left_join(., THCIC_ID_data, by = "RECORD_ID") %>%
dplyr::left_join(., STMTdate_data, by = "RECORD_ID") %>%
dplyr::left_join(., provider_type_table, by = "THCIC_ID") %>%
dplyr::mutate(PROVIDER_NAME = NA,
ADMIT_START_OF_CARE = NA)
}
if(type == "IP"){
THCIC_ID_data <- read_tsv(THCIC_filepath)
base_records <- read_tsv(base_file_path) %>%
dplyr::left_join(., THCIC_ID_data, by = "RECORD_ID") %>%
dplyr::left_join(., provider_type_table, by = "THCIC_ID") %>%
dplyr::mutate(PROVIDER_NAME = NA,
STMT_PERIOD_FROM = NA)
}
}
asthma_ED_records <- base_records %>%
dplyr::filter(str_detect(PRINC_DIAG_CODE, ICD10_codes)) %>%
dplyr::left_join(., ED_ids %>% dplyr::mutate(ED_Flag_internal = T),
by = "RECORD_ID") %>%
dplyr::mutate(ED_Flag_internal = ifelse(is.na(ED_Flag_internal),
F,ED_Flag_internal),
year = year,
filetype = type) %>%
dplyr::select(c("RECORD_ID","PAT_UNIQUE_INDEX", "PRINC_DIAG_CODE",
"SEX_CODE","PAT_AGE_YEARS","RACE","ETHNICITY",
"PAT_ZIP","PAT_COUNTY","PAT_ADDR_CENSUS_BLOCK_GROUP",
"PROVIDER_NAME","THCIC_ID","TYPE_OF_ADMISSION",
"STMT_PERIOD_FROM","ADMIT_START_OF_CARE",
"Facility_Type","ED_Flag_internal","year","filetype"))
return(asthma_ED_records)
rm(base_records)
gc()
}
## PWD Addition - Add second helper for data w/o ED Flags
harmonize_and_flag_noED <- function(base_file_path,
ICD10_codes, year,
provider_filepath, THCIC_filepath, stmtdate_filepath){
THCIC_ID_data <- read_tsv(THCIC_filepath)
base_records <- read_tsv(base_file_path) %>%
dplyr::left_join(., THCIC_ID_data, by = "RECORD_ID") %>%
dplyr::left_join(., provider_type_table, by = "THCIC_ID") %>%
dplyr::mutate(PROVIDER_NAME = NA,
STMT_PERIOD_FROM = NA)
asthma_noED_records <- base_records %>%
dplyr::filter(str_detect(PRINC_DIAG_CODE, ICD10_codes)) %>%
dplyr::mutate(ED_Flag_internal = NA,
year = year,
filetype = "IP") %>%
dplyr::select(c("RECORD_ID","PAT_UNIQUE_INDEX", "PRINC_DIAG_CODE",
"SEX_CODE","PAT_AGE_YEARS","RACE","ETHNICITY",
"PAT_ZIP","PAT_COUNTY","PAT_ADDR_CENSUS_BLOCK_GROUP",
"PROVIDER_NAME","THCIC_ID","TYPE_OF_ADMISSION",
"STMT_PERIOD_FROM","ADMIT_START_OF_CARE",
"Facility_Type","ED_Flag_internal","year","filetype"))
return(asthma_noED_records)
rm(base_records)
gc()
}Apply asthma ED filter function
## Move WD to where ED_records_extract.Rmd is based
setwd("//corral-secure/utdmc/Biomedical-Data-Scie/THCIC/ED flagged data and documentation")
asthma_codes <- paste(c("^J45.","J46."),collapse = "|^")
## We can look
## OP files from 2016-2017 don't need any auxiliary files
#
OP_2016_asthma <- harmonize_and_flag(
base_file_path = "../Outpatient THCIC data 2015Q4-2017/OP Decrypted/OP2016_Base.txt",
ED_ids = OP_2016_ED_ids,
ICD10_codes = asthma_codes,
year = 2016,
type = "OP")
OP_2017_asthma <- harmonize_and_flag(
base_file_path = "../Outpatient THCIC data 2015Q4-2017/OP Decrypted/OP2017_Base.txt",
ED_ids = OP_2017_ED_ids,
ICD10_codes = asthma_codes,
year = 2017,
type = "OP")
## IP files from 2016-2017 need the provider file
#
IP_2016_asthma <- harmonize_and_flag(
base_file_path = "../Inpatient THCIC data 2004-2017/Raw Data/IP_2016_base.txt",
ED_ids = IP_2016_ED_ids,
ICD10_codes = asthma_codes,
year = 2016,
type = "IP",
provider_filepath = "../Inpatient THCIC data 2004-2017/Raw Data/IP2016_Provider.txt")
IP_2017_asthma <- harmonize_and_flag(
base_file_path = "../Inpatient THCIC data 2004-2017/Raw Data/IP_2017_base.txt",
ED_ids = IP_2017_ED_ids,
ICD10_codes = asthma_codes,
year = 2017,
type = "IP",
provider_filepath = "../Inpatient THCIC data 2004-2017/Raw Data/IP2017_Provider.txt")
## OP files from 2018-2020 need THCIC file and stmtdate file
#
OP_2018_asthma <- harmonize_and_flag(
base_file_path = "../Outpatient THCIC data 2018-2020/OP_2018_base.txt",
ED_ids = OP_2018_ED_ids,
ICD10_codes = asthma_codes,
year = 2018,
type = "OP",
THCIC_filepath = "../Outpatient THCIC data 2018-2020/op_2018_THCIC_ID.txt",
stmtdate_filepath = "../Outpatient THCIC data 2018-2020/op_2018_enc.txt"
)
OP_2019_asthma <- harmonize_and_flag(
base_file_path = "../Outpatient THCIC data 2018-2020/OP_2019_base.txt",
ED_ids = OP_2019_ED_ids,
ICD10_codes = asthma_codes,
year = 2019,
type = "OP",
THCIC_filepath = "../Outpatient THCIC data 2018-2020/op_2019_THCIC_ID.txt",
stmtdate_filepath = "../Outpatient THCIC data 2018-2020/op_2019_enc.txt"
)
## Change from Sarah's - remove 2020 Records
#OP_2020_asthma <- harmonize_and_flag(
# base_file_path = "../Outpatient THCIC data 2018-2020/OP_2020_base.txt",
# ED_ids = OP_2020_ED_ids,
# ICD10_codes = asthma_codes,
# year = 2020,
# type = "OP",
# THCIC_filepath = "../Outpatient THCIC data 2018-2020/op_2020_THCIC_ID.txt",
# stmtdate_filepath = "../Outpatient THCIC data 2018-2020/op_2020_enc.txt"
#)
## IP files from 2018-2020 need THCIC file
## We will have to separately figure
IP_2018_asthma <-harmonize_and_flag_noED(
base_file_path = "../Inpatient THCIC data 2018-2020/IP_2018_base.txt",
ICD10_codes = asthma_codes,
year = 2018,
THCIC_filepath = "../Inpatient THCIC data 2018-2020/IP_2018_enc.txt",
provider_filepath = "../Inpatient THCIC data 2018-2020/IP_2018_Provider.txt"
)
IP_2019_asthma <-harmonize_and_flag_noED(
base_file_path = "../Inpatient THCIC data 2018-2020/IP_2019_base.txt",
ICD10_codes = asthma_codes,
year = 2019,
THCIC_filepath = "../Inpatient THCIC data 2018-2020/IP_2019_enc.txt",
provider_filepath = "../Inpatient THCIC data 2018-2020/IP_2019_Provider.txt"
)Combine and clean up records
library(stringi)
get_ED_records <- function(data_with_flags){
data_with_flags %>%
filter(ED_Flag_internal) %>%
filter(Facility_Type != "ASC" | is.na(Facility_Type))
}
code_addl_vars <- function(patient_records){
patient_records %>%
dplyr::mutate(
ethnicity_fac = factor(ifelse(ETHNICITY %in% 1:2,ETHNICITY,NA),
labels = c("Hispanic","Non-Hispanic")),
pat_addr_cbg = stri_pad_right(
str_replace_all(PAT_ADDR_CENSUS_BLOCK_GROUP,
"[^[:alnum:]]", ""),
11, 0),
GEOID = substr(pat_addr_cbg,1,11),
county = substr(.$PAT_ADDR_CENSUS_BLOCK_GROUP, start = 3, stop = 5),
age_group = factor(ifelse(as.numeric(PAT_AGE_YEARS) %in% 0:17, "1", "2"),
levels = c("1", "2"),
labels = c("Pediatric", "Adult"))
)
}
## Create three general datasets
## Dataset 1: All Records
all_records <- rbind(OP_2016_asthma, OP_2017_asthma,
OP_2018_asthma, OP_2019_asthma,
IP_2016_asthma, IP_2017_asthma,
IP_2018_asthma, IP_2019_asthma) %>%
code_addl_vars(.)
## Dataset 2: ED Records for OP 2016-2019, IP 2016-2017
## Plus all records from 2018-2019
asthma_2016_2019_ED <- rbind(OP_2016_asthma, OP_2017_asthma,
OP_2018_asthma, OP_2019_asthma,
IP_2016_asthma, IP_2017_asthma) %>%
code_addl_vars(.) %>%
get_ED_records(.)
ip_2018_2019 <- rbind(IP_2018_asthma, IP_2019_asthma) %>%
code_addl_vars(.)
asthma_2016_2019_ED <- rbind(asthma_2016_2019_ED, ip_2018_2019)
## Assumption: almost all asthma IP visits come thru ED
## Dataset 3: Only ED Records that come from OP Files
## Plus all records from IP Files
## This will be our primary dataset
asthma_2016_2019_OP_ED_allIP <- rbind(OP_2016_asthma, OP_2017_asthma,
OP_2018_asthma, OP_2019_asthma) %>%
code_addl_vars(.) %>%
get_ED_records(.)
ip <- rbind(IP_2016_asthma, IP_2017_asthma,
IP_2018_asthma, IP_2019_asthma) %>%
code_addl_vars(.)
asthma_2016_2019_OP_ED_allIP <- rbind(asthma_2016_2019_OP_ED_allIP, ip)Sanity Check
Now we will write out data
write_rds(asthma_2016_2019_ED,
"//corral-secure/utdmc/Biomedical-Data-Scie/dunphy/darlenedunphy/Raw Data/asthma_2016_2019_ED.rds")
write_rds(asthma_2016_2019_OP_ED_allIP,
"//corral-secure/utdmc/Biomedical-Data-Scie/dunphy/darlenedunphy/Raw Data/asthma_2016_2019_OP_ED_allIP.rds")Subsetting and Final Cleaning
To deal with sparseness, we will deal with two geographic populations: (1) looking solely at Travis County and (2) looking at the counties in the five largest Texas metropolitian statistical areas (MSAs). MSA definitions can be seen here: https://sph.uth.edu/research/centers/dell/webinars/Texas%20Metropolitan%20Statistical%20Areas(1)(1).pdf.
create_analysis_df <- function(raw_data, all_metros){
analysis_df <- subset(raw_data,
as.numeric(PAT_AGE_YEARS) %in% c(5:17))
## If STMT_PERIOD_FROM is missing (as is the case with all IP records),
## we'll substitute the ADMIT_START_OF_CARE
analysis_df$STMT_PERIOD_FROM <- ifelse(is.na(analysis_df$STMT_PERIOD_FROM),
analysis_df$ADMIT_START_OF_CARE,
analysis_df$STMT_PERIOD_FROM)
## Then we will convert this to a date class variable
analysis_df$STMT_PERIOD_FROM <-
as.character(analysis_df$STMT_PERIOD_FROM)
analysis_df$STMT_PERIOD_FROM <- ymd(analysis_df$STMT_PERIOD_FROM)
# Create a new variable for the week starting on Monday
analysis_df$STMT_WEEK <-
floor_date(analysis_df$STMT_PERIOD_FROM,
unit = "week") + days(1)
# Exclude observations where STMT_WEEK is from 2015 or 2020
analysis_df <- subset(analysis_df, year(STMT_WEEK) %in% c(2016, 2017, 2018, 2019))
if(all_metros == FALSE){
## Subset to Travis County only
analysis_df <- subset(analysis_df, county == "453")
analysis_df$msa <- "austin"
}
if(all_metros == TRUE){
## DFW MSA Counties
## Collin, Dallas, Denton, Ellis, Hunt, Johnson, Kaufman,
## Parker, Rockwall, Tarrant, and Wise counties
dfw <- c("085", "113", "121","139","231","251", "257",
"367","397","439","497")
## Houston MSA Counties
## Austin, Brazoria, Chambers, Fort Bend, Galveston,
## Harris, Montgomery, Waller
htx <- c("015","039", "071","157","167",
"201","339", "473")
## Austin MSA Counties
## Bastrop, Caldwell, Hays, Travis, Williamson
atx <- c("021", "055","209","453", "491")
## San Antonio MSA Counties
## Atascosa, Bandera, Bexar, Comal, Guadalupe,
## Kendall, Medina, Wilson
sa <- c("013", "019","029","091","187",
"259", "325", "493")
## McAllen-Edinburg-Mission MSA Counties
## Hidalgo County
mcallen <- c("215")
## El Paso MSA
## El Paso, Hudspeth
elp <- c("141","229")
## Subset to be in one of these 5 metroplexes
analysis_df <- subset(analysis_df, county %in% c(dfw, htx,atx,sa,elp, mcallen))
analysis_df$msa <- NA
# Create the 'msa' column without subsetting columns directly
analysis_df <- analysis_df %>%
mutate(
msa = case_when(
county %in% dfw ~ "dfw",
county %in% htx ~ "houston",
county %in% atx ~ "austin",
county %in% sa ~ "san_antonio",
county %in% mcallen ~ "mcallen",
county %in% elp ~ "el_paso",
TRUE ~ NA_character_
))
}
return(analysis_df)
gc()
}Now we will break down into four datasets.
## Only Austin
asthma_atx_ED <- create_analysis_df(asthma_2016_2019_ED, all_metros = FALSE)
asthma_atx_OP_ED_allIP<- create_analysis_df(asthma_2016_2019_OP_ED_allIP, all_metros = FALSE)
## All five MSAs
asthma_allMSA_ED <- create_analysis_df(asthma_2016_2019_ED, all_metros = TRUE)
asthma_allMSA_OP_ED_allIP<- create_analysis_df(asthma_2016_2019_OP_ED_allIP, all_metros = TRUE)analysis_df_atx <- asthma_atx_OP_ED_allIP
analysis_df_allmsa <- asthma_allMSA_OP_ED_allIP
export(analysis_df_atx, "analysis_df_atx.csv")
export(analysis_df_allmsa, "analysis_df_allmsa.csv")analysis_df_atx <- import("//corral-secure/utdmc/Biomedical-Data-Scie/dunphy/analysis_df_atx.csv")
analysis_df_allmsa <- import("//corral-secure/utdmc/Biomedical-Data-Scie/dunphy/analysis_df_allmsa.csv")Descriptive Analysis
Count Tables
For Austin solely
table(analysis_df_atx$year, analysis_df_atx$filetype)
IP OP
2016 290 1341
2017 190 1359
2018 117 1170
2019 41 1136
For all five MSAs:
table(analysis_df_allmsa$year, analysis_df_allmsa$filetype)
IP OP
2016 2762 26939
2017 2643 27088
2018 2362 23425
2019 1980 22480
Number of Yearly Visits Grouped by MSA
table(analysis_df_allmsa$year, analysis_df_allmsa$msa, analysis_df_allmsa$filetype), , = IP
austin dfw el_paso houston mcallen san_antonio
2016 446 688 191 698 133 606
2017 290 702 211 749 109 582
2018 193 644 215 669 104 537
2019 83 586 175 581 92 463
, , = OP
austin dfw el_paso houston mcallen san_antonio
2016 2285 11969 1026 6253 588 4818
2017 2399 12339 927 6260 620 4543
2018 2069 10265 881 5722 537 3951
2019 1979 9972 823 5420 431 3855
Temporal Count Graphs
Austin Solely
ed_atx <- subset(analysis_df_atx, filetype == "OP") # Only OP
ip_atx <- subset(analysis_df_atx, filetype == "IP") # Only IP
## Aggregate by week for ED
atx_weekly_count_ed <- ed_atx %>%
group_by(STMT_WEEK) %>%
summarise(ED = n()) %>%
pivot_wider(names_from = c(STMT_WEEK), values_from = ED)
## Aggregate by week for IP
atx_weekly_count_ip <- ip_atx %>%
group_by(STMT_WEEK) %>%
summarise(IP = n()) %>%
pivot_wider(names_from = c(STMT_WEEK), values_from = IP)
## Combine ED and IP counts
combined_counts_atx <- merge(atx_weekly_count_ed, atx_weekly_count_ip, all=TRUE)
combined_counts_atx <- as.data.frame(t(combined_counts_atx))
combined_counts_atx[is.na(combined_counts_atx)] <- 0
colnames(combined_counts_atx) <- c("IP","ED")
combined_counts_atx$Total <- combined_counts_atx$IP + combined_counts_atx$ED
combined_counts_atx$weeks <- rownames(combined_counts_atx)
combined_counts_atx$weeks <- ymd(combined_counts_atx$weeks)
## Create Graph of ED Counts
ed_counts_atx <- ggplot(combined_counts_atx, aes(x=weeks, y=ED, group = 1)) +
geom_point() +
geom_line() + scale_x_date(date_breaks = "2 month", date_labels = "%b/%Y")+
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(
title = "ED Counts per Week \n (Travis County Only, Outpatient ED Records)",
x = "Week",
y = "Number of ER Visits"
)
print(ed_counts_atx)
## Create Graph of Hosp Counts
hosp_counts_atx <- ggplot(combined_counts_atx, aes(x=weeks, y=IP, group = 1)) +
geom_point() +
geom_line() + scale_x_date(date_breaks = "2 month", date_labels = "%b/%Y")+theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(
title = "Hospitalization Counts per Week \n (Travis County Only)",
x = "Week",
y = "Number of Hospitalizations"
)
print(hosp_counts_atx)
# Reshape data to long format
long_combined_counts_atx <- gather(combined_counts_atx, key = "Type", value = "Count", -weeks)
# Create the combined graph
combined_graph <- ggplot(long_combined_counts_atx, aes(x = weeks, y = Count, group = Type, color = Type)) +
geom_point() +
geom_line() +
scale_x_date(date_breaks = "2 month", date_labels = "%b/%Y") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(
title = "ED, Hospitalization, and Total Counts per Week \n (Travis County Only)",
x = "Week",
y = "Count"
)
print(combined_graph)
All 5 MSA
ed_allmsa <- subset(analysis_df_allmsa, filetype == "OP") # Only OP
ip_allmsa <- subset(analysis_df_allmsa, filetype == "IP") # Only IP
## Aggregate by week for ED
allmsa_weekly_count_ed <- ed_allmsa %>%
group_by(STMT_WEEK) %>%
summarise(ED = n()) %>%
pivot_wider(names_from = c(STMT_WEEK), values_from = ED)
## Aggregate by week for IP
allmsa_weekly_count_ip <- ip_allmsa %>%
group_by(STMT_WEEK) %>%
summarise(IP = n()) %>%
pivot_wider(names_from = c(STMT_WEEK), values_from = IP)
## Combine ED and IP counts
combined_counts_allmsa <- merge(allmsa_weekly_count_ed, allmsa_weekly_count_ip, all=TRUE)
combined_counts_allmsa <- as.data.frame(t(combined_counts_allmsa))
combined_counts_allmsa[is.na(combined_counts_allmsa)] <- 0
colnames(combined_counts_allmsa) <- c("IP","ED")
combined_counts_allmsa$Total <- combined_counts_allmsa$IP + combined_counts_allmsa$ED
combined_counts_allmsa$weeks <- rownames(combined_counts_allmsa)
combined_counts_allmsa$weeks <- ymd(combined_counts_allmsa$weeks)
## Create Graph of ED Counts
ed_counts_allmsa <- ggplot(combined_counts_allmsa, aes(x=weeks, y=ED, group = 1)) +
geom_point() +
geom_line() + scale_x_date(date_breaks = "2 month", date_labels = "%b/%Y")+
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(
title = "ED Counts per Week \n (All 5 MSA, Outpatient ED Records)",
x = "Week",
y = "Number of ER Visits"
)
print(ed_counts_allmsa)
## Create Graph of Hosp Counts
hosp_counts_allmsa <- ggplot(combined_counts_allmsa, aes(x=weeks, y=IP, group = 1)) +
geom_point() +
geom_line() + scale_x_date(date_breaks = "2 month", date_labels = "%b/%Y")+
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(
title = "Hospitalization Counts per Week \n (All 5 MSA)",
x = "Week",
y = "Number of Hospitalizations"
)
print(hosp_counts_allmsa)
# Reshape data to long format
long_combined_counts_allmsa <- gather(combined_counts_allmsa, key = "Type", value = "Count", -weeks)
# Create the combined graph
combined_graph <- ggplot(long_combined_counts_allmsa, aes(x = weeks, y = Count, group = Type, color = Type)) +
geom_point() +
geom_line() +
scale_x_date(date_breaks = "2 month", date_labels = "%b/%Y") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(
title = "ED, Hospitalization, and Total Counts per Week \n (All 5 MSAs)",
x = "Week",
y = "Count"
)
print(combined_graph)
PBIR Tables
library(tidycensus)
clean_tidycensus <- function(acs_vars, var_name){
df <- get_acs(
geography = "tract",
variables = acs_vars,
state = "TX",
survey = "acs5",
year = 2019,
geometry = TRUE
)
## If we have multiple variables we want to aggregate (e.g. we have variables for 40-42
## 43-45, 45-49, and want to aggregate to a 40-49 variable), we'll sum up those independent
## variables based on Census Tract, so each dataset will be left with 218 observations (1 per tract)
df <- aggregate(estimate ~ GEOID, data = df, FUN = sum)
## In this process, we also drop the margin of error variables and are left with just 2
colnames(df) <- c("GEOID", var_name)
return(df)
}
## Texas Population from 5-17
tx_pop517 <- clean_tidycensus(c("B01001_003E","B01001_004E","B01001_005E",
"B01001_028E","B01001_029E", "B01001_030E"), "pop5_17")
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|======= | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|=========== | 15%
|
|============ | 16%
|
|============ | 17%
|
|============= | 18%
|
|============== | 20%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 25%
|
|=================== | 27%
|
|==================== | 28%
|
|=================================== | 50%
|
|=================================================== | 72%
|
|================================================================== | 94%
|
|======================================================================| 100%
## Median Income
median_income <- clean_tidycensus(acs_vars = "B19013_001E", var_name = "median_income")
## Non-Hispanic White Population
white_pop <- clean_tidycensus(acs_vars = "B03002_003E", var_name = "white_pop") # Non-Hispanic White
total_pop <- clean_tidycensus(acs_vars = "B01003_001E", var_name = "total_pop")
total_pop <- left_join(total_pop, white_pop, by = "GEOID")
total_pop$non_white_pop <- 1 - (total_pop$white_pop / total_pop$total_pop)
total_pop <- total_pop %>% dplyr::select(c("GEOID", "non_white_pop"))
## Create Empty DF to get Geo & Merge
tx_geo <- get_acs(
geography = "tract",
variables = "B01001_030E",
state = "TX",
survey = "acs5",
year = 2019,
geometry = TRUE
)
tx_geo <- tx_geo %>% dplyr::select(-c(variable, estimate, moe))
tx_geo <- left_join(tx_geo, total_pop, by = "GEOID")
tx_geo <- left_join(tx_geo, median_income, by = "GEOID")
tx_geo <- left_join(tx_geo, tx_pop517, by = "GEOID")
tract_df_atx <- tx_geo[grepl("^48453", tx_geo$GEOID), ]
## Recode MSA
## DFW MSA Counties
## Collin, Dallas, Denton, Ellis, Hunt, Johnson, Kaufman,
## Parker, Rockwall, Tarrant, and Wise counties
dfw <- c("085", "113", "121","139","231","251", "257",
"367","397","439","497")
## Houston MSA Counties
## Austin, Brazoria, Chambers, Fort Bend, Galveston,
## Harris, Montgomery, Waller
htx <- c("015","039", "071","157","167",
"201","339", "473")
## Austin MSA Counties
## Bastrop, Caldwell, Hays, Travis, Williamson
atx <- c("021", "055","209","453", "491")
## San Antonio MSA Counties
## Atascosa, Bandera, Bexar, Comal, Guadalupe,
## Kendall, Medina, Wilson
sa <- c("013", "019","029","091","187",
"259", "325", "493")
## McAllen-Edinburg-Mission MSA Counties
## Hidalgo County
mcallen <- c("215")
## El Paso MSA
## El Paso, Hudspeth
elp <- c("141","229")
msa_list <- c(dfw, htx, atx, sa, mcallen, elp)
msa_list <- paste0("48", msa_list)
tract_df_allmsa <- tx_geo[substr(tx_geo$GEOID, 1, 5) %in% msa_list, ]## Create Tract Level DF for Austin
total_visits_atx <- as.data.frame(table(analysis_df_atx$GEOID))
colnames(total_visits_atx) <- c("GEOID", "total_visits")
op <- as.data.frame(table(subset(analysis_df_atx$GEOID,
analysis_df_atx$filetype == "OP")))
colnames(op) <- c("GEOID", "ed_visits")
ip <- as.data.frame(table(subset(analysis_df_atx$GEOID,
analysis_df_atx$filetype == "IP")))
colnames(ip) <- c("GEOID", "ip_visits")
tract_df_atx <- left_join(tract_df_atx, total_visits_atx, by = "GEOID")
tract_df_atx <- left_join(tract_df_atx, op, by = "GEOID")
tract_df_atx <- left_join(tract_df_atx, ip, by = "GEOID")
tract_df_atx$msa <- "atx"
## Create Tract Level DF for Combined MSAs
analysis_df_allmsa <- subset(analysis_df_allmsa, GEOID %in% unique(tract_df_allmsa$GEOID))
total_visits_allmsa <- as.data.frame(table(analysis_df_allmsa$GEOID))
colnames(total_visits_allmsa) <- c("GEOID", "total_visits_allmsa")
op <- as.data.frame(table(subset(analysis_df_allmsa$GEOID,
analysis_df_allmsa$filetype == "OP")))
colnames(op) <- c("GEOID", "ed_visits")
ip <- as.data.frame(table(subset(analysis_df_allmsa$GEOID,
analysis_df_allmsa$filetype == "IP")))
colnames(ip) <- c("GEOID", "ip_visits")
tract_df_allmsa <- left_join(tract_df_allmsa, total_visits_allmsa, by = "GEOID")
tract_df_allmsa <- left_join(tract_df_allmsa, op, by = "GEOID")
tract_df_allmsa <- left_join(tract_df_allmsa, ip, by = "GEOID")
tract_df_allmsa$county <- substr(tract_df_allmsa$GEOID, start = 3, stop = 5)
tract_df_allmsa$msa <- NA
# Create the 'msa' column without subsetting columns directly
tract_df_allmsa <- tract_df_allmsa %>%
mutate(
msa = case_when(
county %in% dfw ~ "dfw",
county %in% htx ~ "houston",
county %in% atx ~ "austin",
county %in% sa ~ "san_antonio",
county %in% mcallen ~ "mcallen",
county %in% elp ~ "el_paso",
TRUE ~ NA_character_
))# Ensure NAs are zero
tract_df_allmsa$total_visits <- ifelse(is.na(tract_df_allmsa$total_visits), 0, tract_df_allmsa$total_visits)
tract_df_allmsa$ed_visits <- ifelse(is.na(tract_df_allmsa$ed_visits), 0, tract_df_allmsa$ed_visits)
tract_df_allmsa$ip_visits <- ifelse(is.na(tract_df_allmsa$ip_visits), 0, tract_df_allmsa$ip_visits)
tract_df_atx$total_visits <- ifelse(is.na(tract_df_atx$total_visits), 0, tract_df_atx$total_visits)
tract_df_atx$ed_visits <- ifelse(is.na(tract_df_atx$ed_visits), 0, tract_df_atx$ed_visits)
tract_df_atx$ip_visits <- ifelse(is.na(tract_df_atx$ip_visits), 0, tract_df_atx$ip_visits)
## Four Years of Data
tract_df_atx$total_pbir <- (tract_df_atx$total_visits / (tract_df_atx$pop5_17 * 4)) * 100000
tract_df_atx$ed_pbir <- (tract_df_atx$ed_visits / (tract_df_atx$pop5_17 * 4)) * 100000
tract_df_atx$hosp_pbir <- ((tract_df_atx$ip_visits)
/ (tract_df_atx$pop5_17 * 4)) * 100000
tract_df_allmsa$total_pbir <- (tract_df_allmsa$total_visits / (tract_df_allmsa$pop5_17 * 4)) * 100000
tract_df_allmsa$ed_pbir <- (tract_df_allmsa$ed_visits / (tract_df_allmsa$pop5_17 * 4)) * 100000
tract_df_allmsa$hosp_pbir <- ((tract_df_allmsa$ip_visits)
/ (tract_df_allmsa$pop5_17 * 4)) * 100000texas_svi <- read.csv("//corral-secure/utdmc/Biomedical-Data-Scie/dunphy/texas_svi.csv")
texas_svi <- texas_svi %>% dplyr::select(FIPS, RPL_THEMES)
texas_svi$FIPS <- as.character(texas_svi$FIPS)
colnames(texas_svi) <- c("GEOID", "svi")
tract_df_allmsa <- left_join(tract_df_allmsa, texas_svi, by = "GEOID")
tract_df_allmsa$svi[is.na(tract_df_allmsa$svi)] <- 0Weekly PBIR Time
combined_counts_atx$IP_PBIR <- (combined_counts_atx$IP / (sum(tract_df_atx$pop5_17) * (1/52))) * 100000
combined_counts_atx$ED_PBIR <- (combined_counts_atx$ED / (sum(tract_df_atx$pop5_17) * (1/52))) * 100000
combined_counts_atx$total_PBIR <- ((combined_counts_atx$ED + combined_counts_atx$IP) / (sum(tract_df_atx$pop5_17) * (1/52))) * 100000
combined_counts_atx_pbir <- combined_counts_atx %>% dplyr::select(-c(IP, ED, Total))
## Create Graph of ED Counts
ed_pbir_atx <- ggplot(combined_counts_atx_pbir, aes(x=weeks, y=ED_PBIR, group = 1)) +
geom_point() +
geom_line() + scale_x_date(date_breaks = "2 month", date_labels = "%b/%Y")+
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(
title = "ED PBIR per Week \n (Travis County Only)",
x = "Week",
y = "ER PBIR"
)
print(ed_pbir_atx)
## Create Graph of Hosp Counts
hosp_pbir_atx <- ggplot(combined_counts_atx_pbir, aes(x=weeks, y=IP_PBIR, group = 1)) +
geom_point() +
geom_line() + scale_x_date(date_breaks = "2 month", date_labels = "%b/%Y")+theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(
title = "Hospitalization PBIR per Week \n (Travis County Only)",
x = "Week",
y = "Number of Hospitalizations"
)
print(hosp_pbir_atx)
# Reshape data to long format
long_combined_counts_atx_pbir <- gather(combined_counts_atx_pbir, key = "Type", value = "PBIR", -weeks)
# Create the combined PBIR graph
combined_graph <- ggplot(long_combined_counts_atx_pbir, aes(x = weeks, y = PBIR, group = Type, color = Type)) +
geom_point() +
geom_line() +
scale_x_date(date_breaks = "2 month", date_labels = "%b/%Y") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(
title = "ED, Hospitalization, and Total PBIR per Week \n (Travis County Only)",
x = "Week",
y = "PBIR"
)
print(combined_graph)
combined_counts_allmsa$IP_PBIR <- (combined_counts_allmsa$IP / (sum(tract_df_allmsa$pop5_17) * (1/52))) * 100000
combined_counts_allmsa$ED_PBIR <- (combined_counts_allmsa$ED / (sum(tract_df_allmsa$pop5_17) * (1/52))) * 100000
combined_counts_allmsa$total_PBIR <- ((combined_counts_allmsa$ED + combined_counts_atx$IP) / (sum(tract_df_allmsa$pop5_17) * (1/52))) * 100000
combined_counts_allmsa_pbir <- combined_counts_allmsa %>% dplyr::select(-c(IP, ED, Total))
## Create Graph of ED Counts
ed_pbir_allmsa <- ggplot(combined_counts_allmsa_pbir, aes(x=weeks, y=ED_PBIR, group = 1)) +
geom_point() +
geom_line() + scale_x_date(date_breaks = "2 month", date_labels = "%b/%Y")+
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(
title = "ED PBIR per Week \n (All MSAs)",
x = "Week",
y = "ER PBIR"
)
print(ed_pbir_allmsa)
## Create Graph of Hosp Counts
hosp_pbir_allmsa <- ggplot(combined_counts_allmsa_pbir, aes(x=weeks, y=IP_PBIR, group = 1)) +
geom_point() +
geom_line() + scale_x_date(date_breaks = "2 month", date_labels = "%b/%Y")+theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(
title = "Hospitalization PBIR per Week \n (All 5 MSA)",
x = "Week",
y = "Number of Hospitalizations"
)
print(hosp_pbir_allmsa)
# Reshape data to long format
long_combined_counts_allmsa_pbir <- gather(combined_counts_allmsa_pbir, key = "Type", value = "PBIR", -weeks)
# Create the combined PBIR graph
combined_graph <- ggplot(long_combined_counts_allmsa_pbir, aes(x = weeks, y = PBIR, group = Type, color = Type)) +
geom_point() +
geom_line() +
scale_x_date(date_breaks = "2 month", date_labels = "%b/%Y") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(
title = "ED, Hospitalization, and Total PBIR per Week \n (All 5 MSAs)",
x = "Week",
y = "PBIR"
)
print(combined_graph)
Graphing PBIRs
totalpbir_atx <- tm_shape(tract_df_atx) +
tm_polygons(col = "total_pbir",
style = "quantile",
n = 5,
palette = "Purples",
title = "Per 100,000 Person-Year") +
tm_layout(title = "Asthma Total PBIR \n by Census tract",
frame = FALSE,
legend.outside = TRUE)
tmap_save(totalpbir_atx, filename = "totalpbir_atx.png",height = 5, width = 5, dpi=600)
edpbir_atx <- tm_shape(tract_df_atx) +
tm_polygons(col = "ed_pbir",
style = "quantile",
n = 5,
palette = "Purples",
title = "Per 100,000 Person-Year") +
tm_layout(title = "Asthma ED Visit PBIR \n by Census tract",
frame = FALSE,
legend.outside = TRUE)
print(edpbir_atx)
tmap_save(edpbir_atx, filename = "edpbir_atx.png",height = 5, width = 5, dpi=600)
hosppbir_atx <- tm_shape(tract_df_atx) +
tm_polygons(col = "hosp_pbir",
style = "quantile",
n = 5,
palette = "Purples",
title = "Per 100,000 Person-Year") +
tm_layout(title = "Asthma Hospitalization PBIR \n by Census tract",
frame = FALSE,
legend.outside = TRUE)
print(hosppbir_atx)
tmap_save(hosppbir_atx, filename = "hosppbir_atx.png",height = 5, width = 5, dpi=600)clean_leaflet <- function(df, var_name, clean_name){
# Change CRS
df <- st_transform(df, crs ='+proj=longlat
+datum = WGS84 + no_defs')
# Calculate 7 quantiles for the non-zero data
non_zero_data <- df[[var_name]][df[[var_name]] > 0]
quantile_values <- quantile(non_zero_data, probs = seq(0, 1, length.out = 8), na.rm = TRUE)
# Define the palette for non-zero values
pal_non_zero <- colorQuantile(palette = 'YlOrRd', domain = non_zero_data, n = 7, probs = seq(0, 1, length.out = 8))
# Define a color for zero values
zero_color <- '#fee8c8'
# Assign colors to each row in the dataframe
df$color <- ifelse(df[[var_name]] == 0, zero_color, pal_non_zero(df[[var_name]]))
# Manually define the legend colors and labels
legend_colors <- c(zero_color, pal_non_zero(quantile_values[-1]))
legend_labels <- c("0", sapply(2:length(quantile_values), function(i) {
paste(formatC(quantile_values[i - 1], format = 'f', digits = 0),
"-",
formatC(quantile_values[i], format = 'f', digits = 0))
}))
# Create the leaflet map
map <- leaflet(data = df) %>%
addTiles() %>%
addPolygons(fillColor = ~color,
color = "grey", weight = 1,
fillOpacity = 0.7) %>%
addLegend(position = "bottomright",
colors = legend_colors,
labels = legend_labels,
title = clean_name,
opacity = 1) %>%
addScaleBar(position ="bottomleft")
return(map)
}austin_osm_map_ed <- clean_leaflet(tract_df_atx, "ed_pbir", "ED PBIR")
austin_osm_map_edaustin_osm_map_hosp <- clean_leaflet(tract_df_atx, "hosp_pbir", "ED PBIR")
austin_osm_map_hosped_plot_list <- vector("list", length = length(unique(tract_df_allmsa$msa)))
hosp_plot_list <- vector("list", length = length(unique(tract_df_allmsa$msa)))
svi_plot_list <- vector("list", length = length(unique(tract_df_allmsa$msa)))
income_plot_list <- vector("list", length = length(unique(tract_df_allmsa$msa)))
nonwhite_plot_list <- vector("list", length = length(unique(tract_df_allmsa$msa)))
msa_list <- c("dfw", "san_antonio", "houston", "el_paso", "mcallen", "austin")
tract_df_allmsa <- st_transform(tract_df_allmsa, crs ='+proj=longlat
+datum = WGS84 + no_defs')
for(i in seq(msa_list)){
df <- subset(tract_df_allmsa, msa == msa_list[i])
df$svi[df$svi < 0] <- NA
df$ed_pbir[!is.finite(df$ed_pbir)] <- 0
df$hosp_pbir[!is.finite(df$hosp_pbir)] <- 0
colors_svi <- colorNumeric(palette = 'YlOrRd', domain = df$svi)
colors_income <- colorNumeric(palette = 'YlOrRd', domain = df$median_income)
colors_nonwhite <- colorNumeric(palette = 'YlOrRd', domain = df$non_white_pop)
ed_plot_list[[i]] <- clean_leaflet(df, "ed_pbir", "ED PBIR")
hosp_plot_list[[i]] <- clean_leaflet(df, "hosp_pbir", "Hospitalization PBIR")
svi_plot <-leaflet(data = df) %>%
addTiles() %>%
addPolygons(fillColor = ~colors_svi(df$svi),
color = "grey", weight = 1,
fillOpacity = 0.7) %>%
addLegend(pal = colors_svi, values = df$svi,
opacity = 1, title = "SVI") %>%
addScaleBar(position ="bottomleft")
svi_plot_list[[i]] <- svi_plot
income_plot <-leaflet(data = df) %>%
addTiles() %>%
addPolygons(fillColor = ~colors_income(df$median_income),
color = "grey", weight = 1,
fillOpacity = 0.7) %>%
addLegend(pal = colors_income, values = df$median_income,
opacity = 1, title = "Median Household Income") %>%
addScaleBar(position ="bottomleft")
income_plot_list[[i]] <- income_plot
nonwhite_plot <-leaflet(data = df) %>%
addTiles() %>%
addPolygons(fillColor = ~colors_nonwhite(df$non_white_pop),
color = "grey", weight = 1,
fillOpacity = 0.7) %>%
addLegend(pal = colors_nonwhite, values = df$non_white_pop,
opacity = 1, title = "% Non-White Population") %>%
addScaleBar(position ="bottomleft")
nonwhite_plot_list[[i]] <- nonwhite_plot
}Dallas-Forth Worth Maps
ed_plot_list[[1]]hosp_plot_list[[1]]svi_plot_list[[1]]income_plot_list[[1]]nonwhite_plot_list[[1]]San Antonio Maps
ed_plot_list[[2]]hosp_plot_list[[2]]svi_plot_list[[2]]income_plot_list[[2]]nonwhite_plot_list[[2]]Houston Maps
ed_plot_list[[3]]hosp_plot_list[[3]]svi_plot_list[[3]]income_plot_list[[3]]nonwhite_plot_list[[3]]El Paso
ed_plot_list[[4]]hosp_plot_list[[4]]svi_plot_list[[4]]income_plot_list[[4]]nonwhite_plot_list[[4]]McAllen
ed_plot_list[[5]]hosp_plot_list[[5]]svi_plot_list[[5]]income_plot_list[[5]]nonwhite_plot_list[[5]]Austin
ed_plot_list[[6]]hosp_plot_list[[6]]svi_plot_list[[6]]income_plot_list[[6]]nonwhite_plot_list[[6]]