library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(haven)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ ggplot2 3.4.1 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tmap)
library(ggplot2)
# Read in the 2010-2014 file
tx_cvd_2010_2014 <- read.table("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/CVD 2010-2014.txt", sep="\t", header=TRUE, fill=TRUE)
# Read in the 2015-2019 file
tx_cvd_2015_2019 <- read.table("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/CVD 2015-2019.txt", sep="\t", header=TRUE, fill=TRUE)
library(dplyr)
# Remove notes column
tx_cvd_2010_2014 <- select(tx_cvd_2010_2014, -Notes)
tx_cvd_2015_2019 <- select(tx_cvd_2015_2019, -Notes)
# Convert Deaths column to numeric in tx_cvd_2010_2014
tx_cvd_2010_2014$Deaths <- as.numeric(tx_cvd_2010_2014$Deaths)
## Warning: NAs introduced by coercion
# Convert Crude.Rate column to numeric in tx_cvd_2010_2014
tx_cvd_2010_2014$Crude.Rate <- as.numeric(tx_cvd_2010_2014$Crude.Rate)
## Warning: NAs introduced by coercion
# Convert Age.Adjusted.Rate column to numeric in tx_cvd_2010_2014
tx_cvd_2010_2014$Age.Adjusted.Rate <- as.numeric(tx_cvd_2010_2014$Age.Adjusted.Rate)
## Warning: NAs introduced by coercion
# Repeat for tx_cvd_2015_2019
# Convert Deaths column to numeric in tx_cvd_2015_2019
tx_cvd_2015_2019$Deaths <- as.numeric(tx_cvd_2015_2019$Deaths)
## Warning: NAs introduced by coercion
# Convert Crude.Rate column to numeric in tx_cvd_2015_2019
tx_cvd_2015_2019$Crude.Rate <- as.numeric(tx_cvd_2015_2019$Crude.Rate)
## Warning: NAs introduced by coercion
# Convert Age.Adjusted.Rate column to numeric in tx_cvd_2015_2019
tx_cvd_2015_2019$Age.Adjusted.Rate <- as.numeric(tx_cvd_2015_2019$Age.Adjusted.Rate)
## Warning: NAs introduced by coercion
# Replace dots with NA in tx_cvd_2010_2014
tx_cvd_2010_2014[tx_cvd_2010_2014 == "."] <- NA
# Replace dots with NA in tx_cvd_2015_2019
tx_cvd_2015_2019[tx_cvd_2015_2019 == "."] <- NA
# Replace "Suppressed" values with NA in Deaths column of tx_cvd_2010_2014
tx_cvd_2010_2014$Deaths[tx_cvd_2010_2014$Deaths == "Suppressed"] <- NA
# Convert Deaths column to numeric in tx_cvd_2010_2014
tx_cvd_2010_2014$Deaths <- as.numeric(tx_cvd_2010_2014$Deaths)
# Replace "Suppressed" values with NA in Deaths column of tx_cvd_2015_2019
tx_cvd_2015_2019$Deaths[tx_cvd_2015_2019$Deaths == "Suppressed"] <- NA
# Convert Deaths column to numeric in tx_cvd_2015_2019
tx_cvd_2015_2019$Deaths <- as.numeric(tx_cvd_2015_2019$Deaths)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
# Read Texas county shapefile
texas_counties <- st_read("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/tl_2019_us_county/tl_2019_us_county.shp")
## Reading layer `tl_2019_us_county' from data source
## `/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/tl_2019_us_county/tl_2019_us_county.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 3233 features and 17 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
## Geodetic CRS: NAD83
# Standardize county names and rename the column for tx_cvd_2010_2014
tx_cvd_2010_2014$County <- gsub(" County, TX", "", tx_cvd_2010_2014$County)
names(tx_cvd_2010_2014)[names(tx_cvd_2010_2014) == "County"] <- "NAME"
# Standardize county names and rename the column for tx_cvd_2015_2019
tx_cvd_2015_2019$County <- gsub(" County, TX", "", tx_cvd_2015_2019$County)
names(tx_cvd_2015_2019)[names(tx_cvd_2015_2019) == "County"] <- "NAME"
library(ggplot2)
library(sf)
library(sf)
# Read Texas county shapefile
texas_counties <- st_read("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/tl_2019_us_county/tl_2019_us_county.shp")
## Reading layer `tl_2019_us_county' from data source
## `/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/tl_2019_us_county/tl_2019_us_county.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 3233 features and 17 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
## Geodetic CRS: NAD83
# Filter shapefile to only include Texas counties
texas_counties <- texas_counties %>% filter(STATEFP == "48")
# Merge the CVD data with the Texas county shapefile
texas_counties_2010_2014 <- left_join(texas_counties, tx_cvd_2010_2014, by = "NAME")
texas_counties_2015_2019 <- left_join(texas_counties, tx_cvd_2015_2019, by = "NAME")
# Create a function to plot the maps
plot_map <- function(data, title) {
ggplot() +
geom_sf(data = data, aes(fill = Deaths)) +
scale_fill_viridis_c(trans = "log10", name = "Deaths") +
theme_minimal() +
theme(panel.grid.major = element_line(colour = "white"),
legend.position = "bottom",
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(hjust = 0.5),
plot.caption = element_text(hjust = 1)) +
labs(title = title,
caption = paste("Source: CDC Wonder |",
"Author: Julie Gonzalez"))
}
# Plot the maps
plot_2010_2014 <- plot_map(texas_counties_2010_2014, "CVD Deaths in Texas (2010-2014)")
plot_2015_2019 <- plot_map(texas_counties_2015_2019, "CVD Deaths in Texas (2015-2019)")
# Display the maps
plot_2010_2014

plot_2015_2019

# Descriptive statistics for tx_cvd_2010_2014
desc_stats_2010_2014 <- tx_cvd_2010_2014 %>%
summarize(
mean_deaths = mean(Deaths, na.rm = TRUE),
sd_deaths = sd(Deaths, na.rm = TRUE),
min_deaths = min(Deaths, na.rm = TRUE),
max_deaths = max(Deaths, na.rm = TRUE),
mean_crude_rate = mean(Crude.Rate, na.rm = TRUE),
sd_crude_rate = sd(Crude.Rate, na.rm = TRUE),
min_crude_rate = min(Crude.Rate, na.rm = TRUE),
max_crude_rate = max(Crude.Rate, na.rm = TRUE),
mean_age_adjusted_rate = mean(Age.Adjusted.Rate, na.rm = TRUE),
sd_age_adjusted_rate = sd(Age.Adjusted.Rate, na.rm = TRUE),
min_age_adjusted_rate = min(Age.Adjusted.Rate, na.rm = TRUE),
max_age_adjusted_rate = max(Age.Adjusted.Rate, na.rm = TRUE)
)
# Descriptive statistics for tx_cvd_2015_2019
desc_stats_2015_2019 <- tx_cvd_2015_2019 %>%
summarize(
mean_deaths = mean(Deaths, na.rm = TRUE),
sd_deaths = sd(Deaths, na.rm = TRUE),
min_deaths = min(Deaths, na.rm = TRUE),
max_deaths = max(Deaths, na.rm = TRUE),
mean_crude_rate = mean(Crude.Rate, na.rm = TRUE),
sd_crude_rate = sd(Crude.Rate, na.rm = TRUE),
min_crude_rate = min(Crude.Rate, na.rm = TRUE),
max_crude_rate = max(Crude.Rate, na.rm = TRUE),
mean_age_adjusted_rate = mean(Age.Adjusted.Rate, na.rm = TRUE),
sd_age_adjusted_rate = sd(Age.Adjusted.Rate, na.rm = TRUE),
min_age_adjusted_rate = min(Age.Adjusted.Rate, na.rm = TRUE),
max_age_adjusted_rate = max(Age.Adjusted.Rate, na.rm = TRUE)
)
# Print the descriptive statistics
desc_stats_2010_2014
## mean_deaths sd_deaths min_deaths max_deaths mean_crude_rate sd_crude_rate
## 1 2088.8 16735.5 15 261109 316.9012 108.1701
## min_crude_rate max_crude_rate mean_age_adjusted_rate sd_age_adjusted_rate
## 1 112.7 706.5 256.4433 47.09129
## min_age_adjusted_rate max_age_adjusted_rate
## 1 109.3 408
desc_stats_2015_2019
## mean_deaths sd_deaths min_deaths max_deaths mean_crude_rate sd_crude_rate
## 1 2391.276 19168.15 10 298924 328.1069 101.3764
## min_crude_rate max_crude_rate mean_age_adjusted_rate sd_age_adjusted_rate
## 1 120.2 645.6 251.7561 47.51084
## min_age_adjusted_rate max_age_adjusted_rate
## 1 125.6 368.2
# Check the summary statistics of the 'Crude.Rate' variable for both datasets
summary(tx_cvd_2010_2014$Crude.Rate)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 112.7 242.4 307.4 316.9 389.6 706.5 66
summary(tx_cvd_2015_2019$Crude.Rate)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 120.2 253.0 327.4 328.1 390.5 645.6 65