# This script creates a map of California with stations showing the air quality
# on September 6th. It also creates a plot to identify a potential relationship
# between air quality and social vulnerability.
# Created by John King
# February 25th, 2025
#Check for sf package installation, installing if necessary, loading sf library,
#and creating a data frame for station_SVI layer in ArcGIS
if (!require('sf')) install.packages('sf')
## Loading required package: sf
## Warning: package 'sf' was built under R version 4.4.2
## Linking to GEOS 3.12.2, GDAL 3.9.3, PROJ 9.4.1; sf_use_s2() is TRUE
library(sf)
df.pm25 = sf::st_read(dsn = "Exercise5.gdb", layer = "station_SVI")
## Reading layer `station_SVI' from data source
## `\\samba2.engr.scu.edu\jwking\ECC\Documents\ArcGIS\Projects\Exercise5\Exercise5.gdb'
## using driver `OpenFileGDB'
## Simple feature collection with 107 features and 166 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -122.8183 ymin: 32.57816 xmax: -115.4831 ymax: 41.72689
## Geodetic CRS: WGS 84
#Mapping data read from ArcGIS
plot(df.pm25["Sep6th_PM2_5_ugm3"], main="")

#Loading in basemap to show geographic locations of stations
if (!require('mapview')) install.packages('mapview')
## Loading required package: mapview
## Warning: package 'mapview' was built under R version 4.4.2
library(mapview)
mapview(df.pm25, zcol = "Sep6th_PM2_5_ugm3", cex = "Sep6th_PM2_5_ugm3",
alpha.regions=.8, map.types = c("Esri.WorldTopoMap"),layer.name = c("PM 2.5"))
#Defining columns relating to SVI and pm2.5
y = df.pm25$Sep6th_PM2_5_ugm3
x = df.pm25$RPL_THEMES
#Linear regression of plot
regression_line = lm(y ~ x)
#Checking regression line with ArcGIS result
summary(regression_line)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.145 -5.494 -0.944 2.554 90.057
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.54953 1.04064 14.94 <2e-16 ***
## x -0.00733 0.01077 -0.68 0.498
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.72 on 105 degrees of freedom
## Multiple R-squared: 0.004388, Adjusted R-squared: -0.005094
## F-statistic: 0.4628 on 1 and 105 DF, p-value: 0.4978
#Installing ggpubr package
if (!require('ggpubr')) install.packages('ggpubr')
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.4.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.2
if (!require('ggplot2')) install.packages('ggplot2')
library(ggpubr)
library(ggplot2)
#Creating regression plot of SVI and pm2.5 relationship
ggplot(df.pm25, aes(x = RPL_THEMES, y = Sep6th_PM2_5_ugm3)) +
geom_smooth(method="lm") +
geom_point() +
labs(x="SVI", y="PM2.5") +
ggpubr::stat_cor(aes(label = paste(after_stat(rr.label), ..p.label.., sep = "~`,`~")),
size = 4, r.digits=3, p.digits = 3) +
scale_x_continuous(limits = c(0,1)) +
theme_bw()
## Warning: The dot-dot notation (`..p.label..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(p.label)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_cor()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
