# Load the necessary packages
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Read in the data
airpo19 <- read_csv("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/Air Pollution Data/annual_aqi_by_county_2019 2.csv")
## Rows: 1020 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): State, County
## dbl (16): Year, Days with AQI, Good Days, Moderate Days, Unhealthy for Sensi...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Filter the data to only include counties in Texas
airpo19 <- airpo19 %>%
  filter(State == "Texas")

# Rename columns with special characters or spaces
colnames(airpo19) <- make.names(colnames(airpo19))

# Subset the data to only include O3, PM2.5, and PM10 data
airpo19 <- airpo19 %>%
  select(State,County, Days.Ozone, Days.PM2.5, Days.PM10)
# Get a summary of the data
summary(airpo19)
##     State              County            Days.Ozone    Days.PM2.5   
##  Length:41          Length:41          Min.   :  0   Min.   :  0.0  
##  Class :character   Class :character   1st Qu.:155   1st Qu.:  0.0  
##  Mode  :character   Mode  :character   Median :261   Median : 97.0  
##                                        Mean   :225   Mean   :123.9  
##                                        3rd Qu.:341   3rd Qu.:193.0  
##                                        Max.   :365   Max.   :354.0  
##    Days.PM10     
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :0.0000  
##  Mean   :0.2683  
##  3rd Qu.:0.0000  
##  Max.   :3.0000
# Load the necessary packages
library(ggplot2)

# Create a scatterplot of Days.Ozone vs. Days.PM2.5
ggplot(airpo19, aes(x = Days.Ozone, y = Days.PM2.5)) +
  geom_point() +
  labs(x = "Days with Ozone Above Air Quality Standard", y = "Days with PM2.5 Above Air Quality Standard")

# Create a scatterplot of Days.Ozone vs. Days.PM10
ggplot(airpo19, aes(x = Days.Ozone, y = Days.PM10)) +
  geom_point() +
  labs(x = "Days with Ozone Above Air Quality Standard", y = "Days with PM10 Above Air Quality Standard")

# Load the necessary packages
library(dplyr)

# Read in the air pollution data for Texas counties
airpo_tx <- airpo19 %>%
  filter(State == "Texas") %>%
  select(County, Days.Ozone, Days.PM2.5, Days.PM10)

# Summarize the air pollution data by county
county_summary <- airpo_tx %>%
  group_by(County) %>%
  summarize(mean_O3 = mean(Days.Ozone, na.rm = TRUE),
            mean_PM2.5 = mean(Days.PM2.5, na.rm = TRUE),
            mean_PM10 = mean(Days.PM10, na.rm = TRUE))

# Print the summary data frame
county_summary
## # A tibble: 41 × 4
##    County    mean_O3 mean_PM2.5 mean_PM10
##    <chr>       <dbl>      <dbl>     <dbl>
##  1 Bell          265         99         0
##  2 Bexar         161        200         1
##  3 Bowie           0        349         0
##  4 Brazoria      360          0         0
##  5 Brewster      308         54         0
##  6 Cameron       128        237         0
##  7 Collin        365          0         0
##  8 Culberson     103         73         0
##  9 Dallas        199        146         0
## 10 Denton        287         72         0
## # … with 31 more rows
# Load the necessary packages
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble  3.1.8     ✔ stringr 1.5.0
## ✔ tidyr   1.3.0     ✔ forcats 1.0.0
## ✔ purrr   1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(tmap)

# Read Texas county shapefile 
texas_counties <- st_read("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/tl_2019_us_county/tl_2019_us_county.shp")
## Reading layer `tl_2019_us_county' from data source 
##   `/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/tl_2019_us_county/tl_2019_us_county.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3233 features and 17 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
## Geodetic CRS:  NAD83
# Filter shapefile to only include Texas counties
texas_counties <- texas_counties %>% filter(STATEFP == "48")

# Join the county shapefile with the air pollution data
airpo_map <- left_join(texas_counties, county_summary, by = c("NAME" = "County"))

# Create a choropleth map of average PM2.5 levels by county
tm_shape(airpo_map) +
  tm_polygons("mean_PM2.5", palette = "Blues", title = "Average PM2.5 (µg/m³)")

#spatial patterns
# Load the necessary packages
library(tidyverse)
library(sf)
library(tmap)

# Read Texas county shapefile 
texas_counties <- st_read("/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/tl_2019_us_county/tl_2019_us_county.shp")
## Reading layer `tl_2019_us_county' from data source 
##   `/Users/juliegonzalez/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/Spring 2023 GIS/tl_2019_us_county/tl_2019_us_county.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3233 features and 17 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
## Geodetic CRS:  NAD83
# Filter shapefile to only include Texas counties
texas_counties <- texas_counties %>% filter(STATEFP == "48")

# Join the county shapefile with the air pollution data
airpo_map <- left_join(texas_counties, county_summary, by = c("NAME" = "County"))

# Create a choropleth map of average PM2.5 levels by county
tm_shape(airpo_map) +
  tm_polygons("mean_PM2.5", palette = "Blues", title = "Average PM2.5 Concentration") +
  tm_borders("gray50") +
  tm_layout(main.title = "Spatial Patterns of PM2.5 Pollution in Texas")
## Warning: One tm layer group has duplicated layer types, which are omitted. To
## draw multiple layers of the same type, use multiple layer groups (i.e. specify
## tm_shape prior to each of them).