# Load the necessary packages
library(readr)
library(dplyr)
#Read in Air pollution data 2010-2014
airpo14 <- read_csv("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Laptop_airpo_20102019/annual_aqi_by_county_2014.csv")
airpo13 <- read_csv("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Laptop_airpo_20102019/annual_aqi_by_county_2013.csv")
airpo12 <- read_csv("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Laptop_airpo_20102019/annual_aqi_by_county_2012.csv")
airpo11 <- read_csv("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Laptop_airpo_20102019/annual_aqi_by_county_2011.csv")
airpo10 <- read_csv("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Laptop_airpo_20102019/annual_aqi_by_county_2010.csv")
# Read in respiratory mortality data
tx_resp_2010_2014 <- read.table("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/CDC2010-2014 Respiratory.txt", sep="\t", header=TRUE, fill=TRUE)
tx_resp_2010_2014 <- select(tx_resp_2010_2014, -Notes)
#one airpollution database
airpo10_14 <- rbind(airpo10, airpo11, airpo12, airpo13, airpo14)
#Clean some stuff
#CVD Mortality Data
# Repeat for tx_resp_2010_2014
# Convert Deaths column to numeric in tx_resp_2010_2014
tx_resp_2010_2014$Deaths <- as.numeric(tx_resp_2010_2014$Deaths)
## Warning: NAs introduced by coercion
# Convert Crude.Rate column to numeric in tx_resp_2010_2014
tx_resp_2010_2014$Crude.Rate <- as.numeric(tx_resp_2010_2014$Crude.Rate)
## Warning: NAs introduced by coercion
# Convert Age.Adjusted.Rate column to numeric in tx_resp_2010_2014
tx_resp_2010_2014$Age.Adjusted.Rate <- as.numeric(tx_resp_2010_2014$Age.Adjusted.Rate)
## Warning: NAs introduced by coercion
# Replace dots with NA in tx_resp_2010_2014
tx_resp_2010_2014[tx_resp_2010_2014 == "."] <- NA
# Replace "Suppressed" values with NA in Deaths column of tx_resp_2010_2014
tx_resp_2010_2014$Deaths[tx_resp_2010_2014$Deaths == "Suppressed"] <- NA
# Convert Deaths column to numeric in tx_resp_2010_2014
tx_resp_2010_2014$Deaths <- as.numeric(tx_resp_2010_2014$Deaths)
# remove ", TX" from County column
tx_resp_2010_2014$County <- gsub(" County, TX", "", tx_resp_2010_2014$County)
# Filter and select the data of interest
airpo10_14 <- airpo10_14 %>%
filter(State == "Texas"
) %>%
select(State, County, `Median AQI`)
# Aggregate airpo10_14 by County
airpo10_14_agg <- airpo10_14 %>%
group_by(County) %>%
summarize(Median_AQI = mean(`Median AQI`, na.rm = TRUE))
# Aggregate tx_cvd_2010_2014 by County
tx_resp_2010_2014_agg <- tx_resp_2010_2014 %>%
group_by(County) %>%
summarize(Crude_Rate = mean(`Crude.Rate`, na.rm = TRUE))
# Merge the aggregated datasets
airpo_mort1014_agg <- left_join(airpo10_14_agg, tx_resp_2010_2014_agg, by = "County")
names(airpo10_14_agg)
## [1] "County" "Median_AQI"
names(tx_resp_2010_2014_agg)
## [1] "County" "Crude_Rate"
summary(airpo_mort1014_agg )
## County Median_AQI Crude_Rate
## Length:46 Min. :16.00 Min. : 35.70
## Class :character 1st Qu.:36.10 1st Qu.: 53.70
## Mode :character Median :40.00 Median : 76.35
## Mean :38.91 Mean : 78.53
## 3rd Qu.:43.10 3rd Qu.: 95.92
## Max. :55.00 Max. :139.50
## NA's :2
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(ggplot2)
# Read Texas county shapefile
texas_counties <- st_read("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/tl_2019_us_county/tl_2019_us_county.shp")
## Reading layer `tl_2019_us_county' from data source
## `/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/tl_2019_us_county/tl_2019_us_county.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 3233 features and 17 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
## Geodetic CRS: NAD83
# Filter shapefile to only include Texas counties
texas_counties <- texas_counties %>% filter(STATEFP == "48")
tx_counties_data <- left_join(texas_counties, airpo_mort1014_agg, by = c("NAME" = "County"))
#Map with more details, CVD mortality
crude_rate_map <- ggplot() +
geom_sf(data = tx_counties_data, aes(fill = Crude_Rate), color = "black", size = 0.25) +
scale_fill_viridis_c(option = "plasma", name = "Crude Rate", na.value = "gray") +
labs(title = "Respiratory Mortality by County in Texas (2010-2014)",
caption = "Source: CDC Wonder | Author: Julie Gonzalez") +
theme_void() +
theme(legend.position = "right",
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
plot.caption = element_text(hjust = 1, size = 10))
# Display the Crude Rate map
crude_rate_map

#map with more details
median_aqi_map <- ggplot() +
geom_sf(data = tx_counties_data, aes(fill = Median_AQI), color = "black", size = 0.25) +
scale_fill_viridis_c(option = "plasma", name = "Median AQI", na.value = "gray") +
labs(title = "Median AQI by County in Texas (2010-2014)",
caption = "Source: EPA Annual AQI | Author: Julie Gonzalez") +
theme_void() +
theme(legend.position = "right",
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
plot.caption = element_text(hjust = 1, size = 10))
# Display the Median AQI map
median_aqi_map

# Histogram for Median AQI
ggplot(tx_counties_data, aes(x = Median_AQI)) +
geom_histogram(binwidth = 5, fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Histogram of Median AQI",
x = "Median AQI",
y = "Count")
## Warning: Removed 208 rows containing non-finite values (`stat_bin()`).

# Histogram for Crude Rate
ggplot(tx_counties_data, aes(x = Crude_Rate)) +
geom_histogram(binwidth = 5, fill = "red", color = "black", alpha = 0.7) +
labs(title = "Histogram of Crude Rate",
x = "Crude Rate",
y = "Count")
## Warning: Removed 210 rows containing non-finite values (`stat_bin()`).

ggplot(tx_counties_data, aes(x = Median_AQI, y = Crude_Rate)) +
geom_point(alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE, color = "red", linetype = "solid") +
labs(title = "Scatterplot of Median AQI vs. Crude Rate with Regression Line",
x = "Median AQI",
y = "Crude Rate")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 210 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 210 rows containing missing values (`geom_point()`).

library(dplyr)
library(knitr)
median_aqi_summary <- tx_counties_data %>%
summarise(min = min(Median_AQI, na.rm = TRUE),
q1 = quantile(Median_AQI, 0.25, na.rm = TRUE),
median = median(Median_AQI, na.rm = TRUE),
mean = mean(Median_AQI, na.rm = TRUE),
q3 = quantile(Median_AQI, 0.75, na.rm = TRUE),
max = max(Median_AQI, na.rm = TRUE),
sd = sd(Median_AQI, na.rm = TRUE),
iqr = IQR(Median_AQI, na.rm = TRUE))
crude_rate_summary <- tx_counties_data %>%
summarise(min = min(Crude_Rate, na.rm = TRUE),
q1 = quantile(Crude_Rate, 0.25, na.rm = TRUE),
median = median(Crude_Rate, na.rm = TRUE),
mean = mean(Crude_Rate, na.rm = TRUE),
q3 = quantile(Crude_Rate, 0.75, na.rm = TRUE),
max = max(Crude_Rate, na.rm = TRUE),
sd = sd(Crude_Rate, na.rm = TRUE),
iqr = IQR(Crude_Rate, na.rm = TRUE))
summary_df <- data.frame(Variable = c("Median AQI", "Crude Rate"),
Min = c(median_aqi_summary$min, crude_rate_summary$min),
Q1 = c(median_aqi_summary$q1, crude_rate_summary$q1),
Median = c(median_aqi_summary$median, crude_rate_summary$median),
Mean = c(median_aqi_summary$mean, crude_rate_summary$mean),
Q3 = c(median_aqi_summary$q3, crude_rate_summary$q3),
Max = c(median_aqi_summary$max, crude_rate_summary$max),
SD = c(median_aqi_summary$sd, crude_rate_summary$sd),
IQR = c(median_aqi_summary$iqr, crude_rate_summary$iqr))
knitr::kable(summary_df, digits = 2, align = "r")
| Median AQI |
16.0 |
36.1 |
40.00 |
38.91 |
43.10 |
55.0 |
7.79 |
7.00 |
| Crude Rate |
35.7 |
53.7 |
76.35 |
78.53 |
95.92 |
139.5 |
28.15 |
42.23 |