library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(haven)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.4.1     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tmap)
library(ggplot2)
# Read in the 2010-2014 file
tx_cvd_2010_2014 <- read.table("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/CVD 2010-2014.txt", sep="\t", header=TRUE, fill=TRUE)

# Read in the 2015-2019 file
tx_cvd_2015_2019 <- read.table("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/CVD 2015-2019.txt", sep="\t", header=TRUE, fill=TRUE)
library(dplyr)

# Remove notes column
tx_cvd_2010_2014 <- select(tx_cvd_2010_2014, -Notes)
tx_cvd_2015_2019 <- select(tx_cvd_2015_2019, -Notes)
# Convert Deaths column to numeric in tx_cvd_2010_2014
tx_cvd_2010_2014$Deaths <- as.numeric(tx_cvd_2010_2014$Deaths)
## Warning: NAs introduced by coercion
# Convert Crude.Rate column to numeric in tx_cvd_2010_2014
tx_cvd_2010_2014$Crude.Rate <- as.numeric(tx_cvd_2010_2014$Crude.Rate)
## Warning: NAs introduced by coercion
# Convert Age.Adjusted.Rate column to numeric in tx_cvd_2010_2014
tx_cvd_2010_2014$Age.Adjusted.Rate <- as.numeric(tx_cvd_2010_2014$Age.Adjusted.Rate)
## Warning: NAs introduced by coercion
# Repeat for tx_cvd_2015_2019
# Convert Deaths column to numeric in tx_cvd_2015_2019
tx_cvd_2015_2019$Deaths <- as.numeric(tx_cvd_2015_2019$Deaths)
## Warning: NAs introduced by coercion
# Convert Crude.Rate column to numeric in tx_cvd_2015_2019
tx_cvd_2015_2019$Crude.Rate <- as.numeric(tx_cvd_2015_2019$Crude.Rate)
## Warning: NAs introduced by coercion
# Convert Age.Adjusted.Rate column to numeric in tx_cvd_2015_2019
tx_cvd_2015_2019$Age.Adjusted.Rate <- as.numeric(tx_cvd_2015_2019$Age.Adjusted.Rate)
## Warning: NAs introduced by coercion
# Replace dots with NA in tx_cvd_2010_2014
tx_cvd_2010_2014[tx_cvd_2010_2014 == "."] <- NA

# Replace dots with NA in tx_cvd_2015_2019
tx_cvd_2015_2019[tx_cvd_2015_2019 == "."] <- NA

# Replace "Suppressed" values with NA in Deaths column of tx_cvd_2010_2014
tx_cvd_2010_2014$Deaths[tx_cvd_2010_2014$Deaths == "Suppressed"] <- NA

# Convert Deaths column to numeric in tx_cvd_2010_2014
tx_cvd_2010_2014$Deaths <- as.numeric(tx_cvd_2010_2014$Deaths)

# Replace "Suppressed" values with NA in Deaths column of tx_cvd_2015_2019
tx_cvd_2015_2019$Deaths[tx_cvd_2015_2019$Deaths == "Suppressed"] <- NA

# Convert Deaths column to numeric in tx_cvd_2015_2019
tx_cvd_2015_2019$Deaths <- as.numeric(tx_cvd_2015_2019$Deaths)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
# Read Texas county shapefile 
texas_counties <- st_read("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/tl_2019_us_county/tl_2019_us_county.shp")
## Reading layer `tl_2019_us_county' from data source 
##   `/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/tl_2019_us_county/tl_2019_us_county.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3233 features and 17 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
## Geodetic CRS:  NAD83
# Standardize county names and rename the column for tx_cvd_2010_2014
tx_cvd_2010_2014$County <- gsub(" County, TX", "", tx_cvd_2010_2014$County)
names(tx_cvd_2010_2014)[names(tx_cvd_2010_2014) == "County"] <- "NAME"

# Standardize county names and rename the column for tx_cvd_2015_2019
tx_cvd_2015_2019$County <- gsub(" County, TX", "", tx_cvd_2015_2019$County)
names(tx_cvd_2015_2019)[names(tx_cvd_2015_2019) == "County"] <- "NAME"
library(ggplot2)
library(sf)

library(sf)
# Read Texas county shapefile 
texas_counties <- st_read("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/tl_2019_us_county/tl_2019_us_county.shp")
## Reading layer `tl_2019_us_county' from data source 
##   `/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/tl_2019_us_county/tl_2019_us_county.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3233 features and 17 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
## Geodetic CRS:  NAD83
# Filter shapefile to only include Texas counties
texas_counties <- texas_counties %>% filter(STATEFP == "48")

# Merge the CVD data with the Texas county shapefile
texas_counties_2010_2014 <- left_join(texas_counties, tx_cvd_2010_2014, by = "NAME")
texas_counties_2015_2019 <- left_join(texas_counties, tx_cvd_2015_2019, by = "NAME")

# Create a function to plot the maps
plot_map <- function(data, title) {
  ggplot() +
    geom_sf(data = data, aes(fill = Deaths)) +
    scale_fill_viridis_c(trans = "log10", name = "Deaths") +
    theme_minimal() +
    theme(panel.grid.major = element_line(colour = "white"),
          legend.position = "bottom",
          axis.text = element_blank(),
          axis.title = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(hjust = 0.5),
          plot.caption = element_text(hjust = 1)) +
    labs(title = title,
         caption = paste("Source: CDC Wonder |",
                         "Author: Julie Gonzalez"))
}

# Plot the maps
plot_2010_2014 <- plot_map(texas_counties_2010_2014, "CVD Deaths in Texas (2010-2014)")
plot_2015_2019 <- plot_map(texas_counties_2015_2019, "CVD Deaths in Texas (2015-2019)")

# Display the maps
plot_2010_2014

plot_2015_2019

# Descriptive statistics for tx_cvd_2010_2014
desc_stats_2010_2014 <- tx_cvd_2010_2014 %>%
  summarize(
    mean_deaths = mean(Deaths, na.rm = TRUE),
    sd_deaths = sd(Deaths, na.rm = TRUE),
    min_deaths = min(Deaths, na.rm = TRUE),
    max_deaths = max(Deaths, na.rm = TRUE),
    mean_crude_rate = mean(Crude.Rate, na.rm = TRUE),
    sd_crude_rate = sd(Crude.Rate, na.rm = TRUE),
    min_crude_rate = min(Crude.Rate, na.rm = TRUE),
    max_crude_rate = max(Crude.Rate, na.rm = TRUE),
    mean_age_adjusted_rate = mean(Age.Adjusted.Rate, na.rm = TRUE),
    sd_age_adjusted_rate = sd(Age.Adjusted.Rate, na.rm = TRUE),
    min_age_adjusted_rate = min(Age.Adjusted.Rate, na.rm = TRUE),
    max_age_adjusted_rate = max(Age.Adjusted.Rate, na.rm = TRUE)
  )

# Descriptive statistics for tx_cvd_2015_2019
desc_stats_2015_2019 <- tx_cvd_2015_2019 %>%
  summarize(
    mean_deaths = mean(Deaths, na.rm = TRUE),
    sd_deaths = sd(Deaths, na.rm = TRUE),
    min_deaths = min(Deaths, na.rm = TRUE),
    max_deaths = max(Deaths, na.rm = TRUE),
    mean_crude_rate = mean(Crude.Rate, na.rm = TRUE),
    sd_crude_rate = sd(Crude.Rate, na.rm = TRUE),
    min_crude_rate = min(Crude.Rate, na.rm = TRUE),
    max_crude_rate = max(Crude.Rate, na.rm = TRUE),
    mean_age_adjusted_rate = mean(Age.Adjusted.Rate, na.rm = TRUE),
    sd_age_adjusted_rate = sd(Age.Adjusted.Rate, na.rm = TRUE),
    min_age_adjusted_rate = min(Age.Adjusted.Rate, na.rm = TRUE),
    max_age_adjusted_rate = max(Age.Adjusted.Rate, na.rm = TRUE)
  )

# Print the descriptive statistics
desc_stats_2010_2014
##   mean_deaths sd_deaths min_deaths max_deaths mean_crude_rate sd_crude_rate
## 1      2088.8   16735.5         15     261109        316.9012      108.1701
##   min_crude_rate max_crude_rate mean_age_adjusted_rate sd_age_adjusted_rate
## 1          112.7          706.5               256.4433             47.09129
##   min_age_adjusted_rate max_age_adjusted_rate
## 1                 109.3                   408
desc_stats_2015_2019
##   mean_deaths sd_deaths min_deaths max_deaths mean_crude_rate sd_crude_rate
## 1    2391.276  19168.15         10     298924        328.1069      101.3764
##   min_crude_rate max_crude_rate mean_age_adjusted_rate sd_age_adjusted_rate
## 1          120.2          645.6               251.7561             47.51084
##   min_age_adjusted_rate max_age_adjusted_rate
## 1                 125.6                 368.2
# Check the summary statistics of the 'Crude.Rate' variable for both datasets
summary(tx_cvd_2010_2014$Crude.Rate)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   112.7   242.4   307.4   316.9   389.6   706.5      66
summary(tx_cvd_2015_2019$Crude.Rate)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   120.2   253.0   327.4   328.1   390.5   645.6      65