# This script creates a map of California with stations showing the air quality
# on September 6th. It also creates a plot to identify a potential relationship
# between air quality and social vulnerability.
# Created by John King
# February 25th, 2025

#Check for sf package installation, installing if necessary, loading sf library,
#and creating a data frame for station_SVI layer in ArcGIS
if (!require('sf')) install.packages('sf')
## Loading required package: sf
## Warning: package 'sf' was built under R version 4.4.2
## Linking to GEOS 3.12.2, GDAL 3.9.3, PROJ 9.4.1; sf_use_s2() is TRUE
library(sf)
df.pm25 = sf::st_read(dsn = "Exercise5.gdb", layer = "station_SVI")
## Reading layer `station_SVI' from data source 
##   `\\samba2.engr.scu.edu\jwking\ECC\Documents\ArcGIS\Projects\Exercise5\Exercise5.gdb' 
##   using driver `OpenFileGDB'
## Simple feature collection with 107 features and 166 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -122.8183 ymin: 32.57816 xmax: -115.4831 ymax: 41.72689
## Geodetic CRS:  WGS 84
#Mapping data read from ArcGIS
plot(df.pm25["Sep6th_PM2_5_ugm3"], main="")

#Loading in basemap to show geographic locations of stations
if (!require('mapview')) install.packages('mapview')
## Loading required package: mapview
## Warning: package 'mapview' was built under R version 4.4.2
library(mapview)
mapview(df.pm25, zcol = "Sep6th_PM2_5_ugm3", cex = "Sep6th_PM2_5_ugm3",
        alpha.regions=.8, map.types = c("Esri.WorldTopoMap"),layer.name = c("PM 2.5"))
#Defining columns relating to SVI and pm2.5
y = df.pm25$Sep6th_PM2_5_ugm3
x = df.pm25$RPL_THEMES

#Linear regression of plot
regression_line = lm(y ~ x)

#Checking regression line with ArcGIS result
summary(regression_line)
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.145  -5.494  -0.944   2.554  90.057 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 15.54953    1.04064   14.94   <2e-16 ***
## x           -0.00733    0.01077   -0.68    0.498    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.72 on 105 degrees of freedom
## Multiple R-squared:  0.004388,   Adjusted R-squared:  -0.005094 
## F-statistic: 0.4628 on 1 and 105 DF,  p-value: 0.4978
#Installing ggpubr package
if (!require('ggpubr')) install.packages('ggpubr')
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.4.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.2
if (!require('ggplot2')) install.packages('ggplot2')
library(ggpubr)
library(ggplot2)

#Creating regression plot of SVI and pm2.5 relationship
ggplot(df.pm25, aes(x = RPL_THEMES, y = Sep6th_PM2_5_ugm3)) +
  geom_smooth(method="lm") +
  geom_point() +
  labs(x="SVI", y="PM2.5") +
  ggpubr::stat_cor(aes(label = paste(after_stat(rr.label), ..p.label.., sep = "~`,`~")),
                   size = 4, r.digits=3, p.digits = 3) +
  scale_x_continuous(limits = c(0,1)) +
  theme_bw()
## Warning: The dot-dot notation (`..p.label..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(p.label)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_cor()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).