CDC_CVD 20102019

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(haven)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.4.1     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(tmap)
library(ggplot2)

# Read in the 2010-2014 file
tx_cvd_2010_2014 <- read.table("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/CVD 2010-2014.txt", sep="\t", header=TRUE, fill=TRUE)

# Read in the 2015-2019 file
tx_cvd_2015_2019 <- read.table("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/CVD 2015-2019.txt", sep="\t", header=TRUE, fill=TRUE)

library(dplyr)

# Remove notes column
tx_cvd_2010_2014 <- select(tx_cvd_2010_2014, -Notes)
tx_cvd_2015_2019 <- select(tx_cvd_2015_2019, -Notes)

# Convert Deaths column to numeric in tx_cvd_2010_2014
tx_cvd_2010_2014$Deaths <- as.numeric(tx_cvd_2010_2014$Deaths)

## Warning: NAs introduced by coercion

# Convert Crude.Rate column to numeric in tx_cvd_2010_2014
tx_cvd_2010_2014$Crude.Rate <- as.numeric(tx_cvd_2010_2014$Crude.Rate)

## Warning: NAs introduced by coercion

# Convert Age.Adjusted.Rate column to numeric in tx_cvd_2010_2014
tx_cvd_2010_2014$Age.Adjusted.Rate <- as.numeric(tx_cvd_2010_2014$Age.Adjusted.Rate)

## Warning: NAs introduced by coercion

# Repeat for tx_cvd_2015_2019
# Convert Deaths column to numeric in tx_cvd_2015_2019
tx_cvd_2015_2019$Deaths <- as.numeric(tx_cvd_2015_2019$Deaths)

## Warning: NAs introduced by coercion

# Convert Crude.Rate column to numeric in tx_cvd_2015_2019
tx_cvd_2015_2019$Crude.Rate <- as.numeric(tx_cvd_2015_2019$Crude.Rate)

## Warning: NAs introduced by coercion

# Convert Age.Adjusted.Rate column to numeric in tx_cvd_2015_2019
tx_cvd_2015_2019$Age.Adjusted.Rate <- as.numeric(tx_cvd_2015_2019$Age.Adjusted.Rate)

## Warning: NAs introduced by coercion

# Replace dots with NA in tx_cvd_2010_2014
tx_cvd_2010_2014[tx_cvd_2010_2014 == "."] <- NA

# Replace dots with NA in tx_cvd_2015_2019
tx_cvd_2015_2019[tx_cvd_2015_2019 == "."] <- NA

# Replace "Suppressed" values with NA in Deaths column of tx_cvd_2010_2014
tx_cvd_2010_2014$Deaths[tx_cvd_2010_2014$Deaths == "Suppressed"] <- NA

# Convert Deaths column to numeric in tx_cvd_2010_2014
tx_cvd_2010_2014$Deaths <- as.numeric(tx_cvd_2010_2014$Deaths)

# Replace "Suppressed" values with NA in Deaths column of tx_cvd_2015_2019
tx_cvd_2015_2019$Deaths[tx_cvd_2015_2019$Deaths == "Suppressed"] <- NA

# Convert Deaths column to numeric in tx_cvd_2015_2019
tx_cvd_2015_2019$Deaths <- as.numeric(tx_cvd_2015_2019$Deaths)

library(sf)

## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE

# Read Texas county shapefile 
texas_counties <- st_read("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/tl_2019_us_county/tl_2019_us_county.shp")

## Reading layer `tl_2019_us_county' from data source 
##   `/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/tl_2019_us_county/tl_2019_us_county.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3233 features and 17 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
## Geodetic CRS:  NAD83

# Standardize county names and rename the column for tx_cvd_2010_2014
tx_cvd_2010_2014$County <- gsub(" County, TX", "", tx_cvd_2010_2014$County)
names(tx_cvd_2010_2014)[names(tx_cvd_2010_2014) == "County"] <- "NAME"

# Standardize county names and rename the column for tx_cvd_2015_2019
tx_cvd_2015_2019$County <- gsub(" County, TX", "", tx_cvd_2015_2019$County)
names(tx_cvd_2015_2019)[names(tx_cvd_2015_2019) == "County"] <- "NAME"

library(ggplot2)
library(sf)

library(sf)
# Read Texas county shapefile 
texas_counties <- st_read("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/tl_2019_us_county/tl_2019_us_county.shp")

## Reading layer `tl_2019_us_county' from data source 
##   `/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/tl_2019_us_county/tl_2019_us_county.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3233 features and 17 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
## Geodetic CRS:  NAD83

# Filter shapefile to only include Texas counties
texas_counties <- texas_counties %>% filter(STATEFP == "48")

# Merge the CVD data with the Texas county shapefile
texas_counties_2010_2014 <- left_join(texas_counties, tx_cvd_2010_2014, by = "NAME")
texas_counties_2015_2019 <- left_join(texas_counties, tx_cvd_2015_2019, by = "NAME")

# Create a function to plot the maps
plot_map <- function(data, title) {
  ggplot() +
    geom_sf(data = data, aes(fill = Deaths)) +
    scale_fill_viridis_c(trans = "log10", name = "Deaths") +
    theme_minimal() +
    theme(panel.grid.major = element_line(colour = "white"),
          legend.position = "bottom",
          axis.text = element_blank(),
          axis.title = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(hjust = 0.5),
          plot.caption = element_text(hjust = 1)) +
    labs(title = title,
         caption = paste("Source: CDC Wonder |",
                         "Author: Julie Gonzalez"))
}

# Plot the maps
plot_2010_2014 <- plot_map(texas_counties_2010_2014, "CVD Deaths in Texas (2010-2014)")
plot_2015_2019 <- plot_map(texas_counties_2015_2019, "CVD Deaths in Texas (2015-2019)")

# Display the maps
plot_2010_2014

plot_2015_2019

# Descriptive statistics for tx_cvd_2010_2014
desc_stats_2010_2014 <- tx_cvd_2010_2014 %>%
  summarize(
    mean_deaths = mean(Deaths, na.rm = TRUE),
    sd_deaths = sd(Deaths, na.rm = TRUE),
    min_deaths = min(Deaths, na.rm = TRUE),
    max_deaths = max(Deaths, na.rm = TRUE),
    mean_crude_rate = mean(Crude.Rate, na.rm = TRUE),
    sd_crude_rate = sd(Crude.Rate, na.rm = TRUE),
    min_crude_rate = min(Crude.Rate, na.rm = TRUE),
    max_crude_rate = max(Crude.Rate, na.rm = TRUE),
    mean_age_adjusted_rate = mean(Age.Adjusted.Rate, na.rm = TRUE),
    sd_age_adjusted_rate = sd(Age.Adjusted.Rate, na.rm = TRUE),
    min_age_adjusted_rate = min(Age.Adjusted.Rate, na.rm = TRUE),
    max_age_adjusted_rate = max(Age.Adjusted.Rate, na.rm = TRUE)
  )

# Descriptive statistics for tx_cvd_2015_2019
desc_stats_2015_2019 <- tx_cvd_2015_2019 %>%
  summarize(
    mean_deaths = mean(Deaths, na.rm = TRUE),
    sd_deaths = sd(Deaths, na.rm = TRUE),
    min_deaths = min(Deaths, na.rm = TRUE),
    max_deaths = max(Deaths, na.rm = TRUE),
    mean_crude_rate = mean(Crude.Rate, na.rm = TRUE),
    sd_crude_rate = sd(Crude.Rate, na.rm = TRUE),
    min_crude_rate = min(Crude.Rate, na.rm = TRUE),
    max_crude_rate = max(Crude.Rate, na.rm = TRUE),
    mean_age_adjusted_rate = mean(Age.Adjusted.Rate, na.rm = TRUE),
    sd_age_adjusted_rate = sd(Age.Adjusted.Rate, na.rm = TRUE),
    min_age_adjusted_rate = min(Age.Adjusted.Rate, na.rm = TRUE),
    max_age_adjusted_rate = max(Age.Adjusted.Rate, na.rm = TRUE)
  )

# Print the descriptive statistics
desc_stats_2010_2014

##   mean_deaths sd_deaths min_deaths max_deaths mean_crude_rate sd_crude_rate
## 1      2088.8   16735.5         15     261109        316.9012      108.1701
##   min_crude_rate max_crude_rate mean_age_adjusted_rate sd_age_adjusted_rate
## 1          112.7          706.5               256.4433             47.09129
##   min_age_adjusted_rate max_age_adjusted_rate
## 1                 109.3                   408

desc_stats_2015_2019

##   mean_deaths sd_deaths min_deaths max_deaths mean_crude_rate sd_crude_rate
## 1    2391.276  19168.15         10     298924        328.1069      101.3764
##   min_crude_rate max_crude_rate mean_age_adjusted_rate sd_age_adjusted_rate
## 1          120.2          645.6               251.7561             47.51084
##   min_age_adjusted_rate max_age_adjusted_rate
## 1                 125.6                 368.2

# Check the summary statistics of the 'Crude.Rate' variable for both datasets
summary(tx_cvd_2010_2014$Crude.Rate)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   112.7   242.4   307.4   316.9   389.6   706.5      66

summary(tx_cvd_2015_2019$Crude.Rate)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   120.2   253.0   327.4   328.1   390.5   645.6      65

CDC_CVD 20102019

Jules Gonzalez

2023-04-23