- Data activity (10 min)
- Introduction to Simple Features (30 min)
- Mapping Points and Polygons (20 min)
- Break (5 min)
- Mapping Points and Polygons (10 min)
- Introduction to Tigris (if time)
- Final project time (Remainder)
2023-08-03
print.data.frame(groups)
## group 1 group 2 group 3 ## 1 Spindler, Laine Addison Tian, Zerui ## 2 Jun, Ernest Ng Wei Cai, Qingyuan Somyurek, Ecem ## 3 Lim, Fang Jan Andrew Yu Ming Xin, Su, Barry ## 4 Ng, Michelle Alsayegh, Aisha E H M I Gupta, Umang ## group 4 group 5 ## 1 Albertini, Federico Gnanam, Akash Y ## 2 Dotson, Bianca Ciara Shah, Jainam ## 3 Premkrishna, Shrish Huynh Le Hue Tam, Vivian ## 4 Knutson, Blue C ## group 6 group 7 ## 1 Tan, Zheng Yang Widodo, Ignazio Marco ## 2 Saccone, Alexander Connor Wan Rosli, Nadia ## 3 Cortez, Hugo Alexander Ning, Zhi Yan ## 4 Ramos, Jessica Andria Potestades Leong, Wen Hou Lester
ggplot2ggplot2library(sf)
# load a simple features object
nc <- st_read(system.file("shape/nc.shp", package="sf"))
## Reading layer `nc' from data source ## `C:\Users\tyler\AppData\Local\R\win-library\4.3\sf\shape\nc.shp' ## using driver `ESRI Shapefile' ## Simple feature collection with 100 features and 14 fields ## Geometry type: MULTIPOLYGON ## Dimension: XY ## Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 ## Geodetic CRS: NAD27
# look at nc head(nc)
## Simple feature collection with 6 features and 14 fields ## Geometry type: MULTIPOLYGON ## Dimension: XY ## Bounding box: xmin: -81.74107 ymin: 36.07282 xmax: -75.77316 ymax: 36.58965 ## Geodetic CRS: NAD27 ## AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 ## 1 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 ## 2 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 0 ## 3 0.143 1.630 1828 1828 Surry 37171 37171 86 3188 5 ## 4 0.070 2.968 1831 1831 Currituck 37053 37053 27 508 1 ## 5 0.153 2.206 1832 1832 Northampton 37131 37131 66 1421 9 ## 6 0.097 1.670 1833 1833 Hertford 37091 37091 46 1452 7 ## NWBIR74 BIR79 SID79 NWBIR79 geometry ## 1 10 1364 0 19 MULTIPOLYGON (((-81.47276 3... ## 2 10 542 3 12 MULTIPOLYGON (((-81.23989 3... ## 3 208 3616 6 260 MULTIPOLYGON (((-80.45634 3... ## 4 123 830 2 145 MULTIPOLYGON (((-76.00897 3... ## 5 1066 1606 3 1197 MULTIPOLYGON (((-77.21767 3... ## 6 954 1838 5 1237 MULTIPOLYGON (((-76.74506 3...
class(nc)
## [1] "sf" "data.frame"
View(nc)
plot(nc[[15]][[1]][[1]][[1]]) lines(nc[[15]][[1]][[1]][[1]])
plot(nc[1,15])
# define index for lengths greater than 1 w <- which(sapply(nc$geometry, length) > 1) # take a look nc[w,]
## Simple feature collection with 6 features and 14 fields ## Geometry type: MULTIPOLYGON ## Dimension: XY ## Bounding box: xmin: -77.4741 ymin: 34.58783 xmax: -75.45698 ymax: 36.55716 ## Geodetic CRS: NAD27 ## AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 ## 4 0.070 2.968 1831 1831 Currituck 37053 37053 27 508 1 ## 56 0.094 3.640 2000 2000 Dare 37055 37055 28 521 0 ## 57 0.203 3.197 2004 2004 Beaufort 37013 37013 7 2692 7 ## 87 0.167 2.709 2099 2099 Hyde 37095 37095 48 338 0 ## 91 0.177 2.916 2119 2119 Craven 37049 37049 25 5868 13 ## 95 0.125 2.868 2156 2156 Carteret 37031 37031 16 2414 5 ## NWBIR74 BIR79 SID79 NWBIR79 geometry ## 4 123 830 2 145 MULTIPOLYGON (((-76.00897 3... ## 56 43 1059 1 73 MULTIPOLYGON (((-75.78317 3... ## 57 1131 2909 4 1163 MULTIPOLYGON (((-77.10377 3... ## 87 134 427 0 169 MULTIPOLYGON (((-76.51894 3... ## 91 1744 7595 18 2342 MULTIPOLYGON (((-76.89761 3... ## 95 341 3339 4 487 MULTIPOLYGON (((-77.14896 3...
library(dplyr) # get all observations with lengths > 1 nc %>% filter(sapply(geometry, length) > 1)
## Simple feature collection with 6 features and 14 fields ## Geometry type: MULTIPOLYGON ## Dimension: XY ## Bounding box: xmin: -77.4741 ymin: 34.58783 xmax: -75.45698 ymax: 36.55716 ## Geodetic CRS: NAD27 ## AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 ## 1 0.070 2.968 1831 1831 Currituck 37053 37053 27 508 1 ## 2 0.094 3.640 2000 2000 Dare 37055 37055 28 521 0 ## 3 0.203 3.197 2004 2004 Beaufort 37013 37013 7 2692 7 ## 4 0.167 2.709 2099 2099 Hyde 37095 37095 48 338 0 ## 5 0.177 2.916 2119 2119 Craven 37049 37049 25 5868 13 ## 6 0.125 2.868 2156 2156 Carteret 37031 37031 16 2414 5 ## NWBIR74 BIR79 SID79 NWBIR79 geometry ## 1 123 830 2 145 MULTIPOLYGON (((-76.00897 3... ## 2 43 1059 1 73 MULTIPOLYGON (((-75.78317 3... ## 3 1131 2909 4 1163 MULTIPOLYGON (((-77.10377 3... ## 4 134 427 0 169 MULTIPOLYGON (((-76.51894 3... ## 5 1744 7595 18 2342 MULTIPOLYGON (((-76.89761 3... ## 6 341 3339 4 487 MULTIPOLYGON (((-77.14896 3...
ggplot2geom_sf() to plot simple featureslibrary(ggplot2) ggplot(nc)+ geom_sf()
ggplot() and geom_sf()?library(readr)
# read climate hazards
cdp_hazards <- read_csv("cdp_hazards.csv")
## Rows: 2693 Columns: 17 ## ── Column specification ──────────────────────────────────────────────────────── ## Delimiter: "," ## chr (12): Questionnaire, Organization, Country, CDP Region, Parent Section, ... ## dbl (4): Year Reported to CDP, Account Number, Question Number, Column Number ## lgl (1): File Name ## ## ℹ Use `spec()` to retrieve the full column specification for this data. ## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(magrittr)
# load location data
cdp_geo <- read_csv("CDP-Cities-geographical-coordinates.csv")
# only need account number and lat/long
cdp_geo %<>%
rename(`Account Number` = Account.Number) %>%
select(`Account Number`, lat, long)
cdp_hazards has “long” and “lat” variablessf object?# join hazards with geom cdp_hazards %<>% left_join(cdp_geo)
## Joining with `by = join_by(`Account Number`)`
# convert hazards to simple features
cdp_hazards %<>%
st_as_sf(coords = c("long", "lat"))
ggplot(cdp_hazards)+ geom_sf()
# plot heat wave cities ggplot(cdp_hazards %>% filter(hazard == "Heat wave"))+ geom_sf(col = "red")
col = hazard to the following code?aes()?# restrict data to only certain types of hazards ggplot(cdp_hazards)+ geom_sf()
col = hazard to the following code?aes()?# restrict data to only certain types of hazards ggplot(cdp_hazards)+ geom_sf(aes(col = hazard), alpha = 0.5)
%in% function# restrict data to only certain types of hazards
cdp_hazards %<>%
filter(hazard %in% c("Heat wave",
"Flash / surface flood",
"Drought",
"Coastal flood"))
# restrict data to only certain types of hazards ggplot(cdp_hazards)+ geom_sf(aes(col = hazard), alpha = 0.5)
# plot different hazards
ggplot(cdp_hazards)+
geom_sf(aes(col = hazard), alpha = 0.5)+
scale_color_manual(values = c("lightblue",
"tan",
"darkblue",
"gold3"))
# plot different hazards
ggplot(cdp_hazards)+
geom_sf(aes(col = hazard), alpha = 0.5)+
scale_color_manual(values = c("lightblue",
"tan",
"darkblue",
"gold3"))
rnaturalearth and rnaturalearthdata packageslibrary(rnaturalearth)
## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package, ## which was just loaded, will retire in October 2023. ## Please refer to R-spatial evolution reports for details, especially ## https://r-spatial.org/r/2023/05/15/evolution4.html. ## It may be desirable to make the sf package available; ## package maintainers should consider adding sf to Suggests:. ## The sp package is now running under evolution status 2 ## (status 2 uses the sf package in place of rgdal)
## Support for Spatial objects (`sp`) will be deprecated in {rnaturalearth} and will be removed in a future release of the package. Please use `sf` objects with {rnaturalearth}. For example: `ne_download(returnclass = 'sf')`
library(rnaturalearthdata)
## ## Attaching package: 'rnaturalearthdata'
## The following object is masked from 'package:rnaturalearth': ## ## countries110
world <- ne_countries(scale = "medium", returnclass = "sf")
## let's try plotting the world ggplot(world)+ geom_sf()
st_crs(cdp_hazards) st_crs(world)
ggplot(cdp_hazards)+ geom_sf(world)+ geom_sf()
cdp_hazards %<>% st_set_crs(st_crs(world))
## let's try plotting the world
ggplot(cdp_hazards)+
geom_sf(data = world)+
geom_sf(aes(col = hazard), alpha = 0.5)+
scale_color_manual(values = c("lightblue",
"tan",
"darkblue",
"gold3"))+
theme_minimal()
lims() to accomplish this## let's try plotting the world
ggplot(cdp_hazards)+
geom_sf(data = world)+
geom_sf(aes(col = hazard), alpha = 0.5)+
scale_color_manual(values = c("lightblue",
"tan",
"darkblue",
"gold3"))+
theme_minimal()
print.data.frame(groups)
## group 1 group 2 group 3 ## 1 Spindler, Laine Addison Tian, Zerui ## 2 Jun, Ernest Ng Wei Cai, Qingyuan Somyurek, Ecem ## 3 Lim, Fang Jan Andrew Yu Ming Xin, Su, Barry ## 4 Ng, Michelle Alsayegh, Aisha E H M I Gupta, Umang ## group 4 group 5 ## 1 Albertini, Federico Gnanam, Akash Y ## 2 Dotson, Bianca Ciara Shah, Jainam ## 3 Premkrishna, Shrish Huynh Le Hue Tam, Vivian ## 4 Knutson, Blue C ## group 6 group 7 ## 1 Tan, Zheng Yang Widodo, Ignazio Marco ## 2 Saccone, Alexander Connor Wan Rosli, Nadia ## 3 Cortez, Hugo Alexander Ning, Zhi Yan ## 4 Ramos, Jessica Andria Potestades Leong, Wen Hou Lester
tigris to gather shapefile datatigris, we can gather lots of different shapefiles from the US Census and the National Historical Geographic Information Systemtigris:: to see some optionslibrary(tigris) # gather county shapefiles counties <- counties(state = "CA")
tigris, we can gather lots of different shapefiles from the US Census and the National Historical Geographic Information System.tigris:: to see some options# gather county shapefiles counties <- counties(state = "CA") # plot counties ggplot(counties)+ geom_sf()
# gather county shapefiles roads <- roads(state = "CA", county = "Santa Clara")
# plot counties ggplot(counties)+ geom_sf()+ geom_sf(data = roads)
# it may help to limit data to santa clara county sc_county <- counties %>% filter(COUNTYFP == "085") # plot santa clara county first ggplot(sc_county)+ geom_sf()+ geom_sf(data = roads)
school_districts <- school_districts(state = "CA")
# get state of california shape ca <- states() %>% filter(NAME == "California")
# try plotting again ggplot(school_districts)+ geom_sf(data = ca)+ geom_sf()
# read in data
aqi22 <- read_csv("aqi22_clean_ca.csv")
# make air quality data spatial
aqi22 %<>%
st_as_sf(coords = c("X", "Y"))
# set crs
aqi22 %<>%
st_set_crs(st_crs(ca))
library(viridis) # try plotting again ggplot(aqi22)+ geom_sf(data = ca)+ geom_sf(data = school_districts)+ geom_sf(aes(col = `Arithmetic Mean`))+ scale_color_viridis(option = "magma", direction = -1)+ theme_minimal()+ labs(col = "PM2.5 Average")
# join school district boundaries with AQI school_districts %<>% st_join(aqi22)
school_districts
## Simple feature collection with 390 features and 67 fields ## Geometry type: MULTIPOLYGON ## Dimension: XY ## Bounding box: xmin: -124.482 ymin: 32.52883 xmax: -114.1312 ymax: 42.00748 ## Geodetic CRS: NAD83 ## First 10 features: ## STATEFP UNSDLEA GEOID NAME LSAD LOGRADE ## 1 06 42930 0642930 Winters Joint Unified School District 00 KG ## 2 06 42710 0642710 Willows Unified School District 00 KG ## 3 06 20670 0620670 Lakeport Unified School District 00 KG ## 4 06 24750 0624750 Middletown Unified School District 00 KG ## 5 06 24230 0624230 McFarland Unified School District 00 KG ## 6 06 36800 0636800 Sierra Sands Unified School District 00 KG ## 7 06 25230 0625230 Mojave Unified School District 00 KG ## 8 06 00045 0600045 Orland Joint Unified School District 00 KG ## 9 06 22860 0622860 Los Molinos Unified School District 00 KG ## 10 06 37830 0637830 St. Helena Unified School District 00 KG ## HIGRADE MTFCC SDTYP FUNCSTAT ALAND AWATER INTPTLAT INTPTLON ## 1 12 G5420 <NA> E 330017796 973625 +38.5718817 -122.0648647 ## 2 12 G5420 <NA> E 793909668 3491342 +39.5162423 -122.2199619 ## 3 12 G5420 <NA> E 297158894 34993938 +39.0573241 -122.9685093 ## 4 12 G5420 <NA> E 387635806 2175647 +38.7737905 -122.6071650 ## 5 12 G5420 <NA> E 423289057 37601 +35.6228995 -119.1161985 ## 6 12 G5420 <NA> E 2525462727 4383274 +35.5603578 -117.7739518 ## 7 12 G5420 <NA> E 2409612734 1546620 +35.1925527 -118.0317109 ## 8 12 G5420 <NA> E 727547789 12093189 +39.7749276 -122.7301903 ## 9 12 G5420 <NA> E 814070052 3288509 +39.9886420 -121.9149365 ## 10 12 G5420 <NA> E 731019544 81154899 +38.5954891 -122.3199583 ## State Code County Code Site Num Parameter Code POC Datum ## 1 <NA> <NA> <NA> NA NA <NA> ## 2 <NA> <NA> <NA> NA NA <NA> ## 3 06 033 3002 88101 1 NAD83 ## 4 <NA> <NA> <NA> NA NA <NA> ## 5 <NA> <NA> <NA> NA NA <NA> ## 6 06 029 0018 88101 1 NAD83 ## 7 06 029 0019 88101 3 NAD83 ## 8 <NA> <NA> <NA> NA NA <NA> ## 9 <NA> <NA> <NA> NA NA <NA> ## 10 <NA> <NA> <NA> NA NA <NA> ## Parameter Name Sample Duration Pollutant Standard Metric Used ## 1 <NA> <NA> <NA> <NA> ## 2 <NA> <NA> <NA> <NA> ## 3 PM2.5 - Local Conditions 24 HOUR PM25 24-hour 2012 Daily Mean ## 4 <NA> <NA> <NA> <NA> ## 5 <NA> <NA> <NA> <NA> ## 6 PM2.5 - Local Conditions 24-HR BLK AVG PM25 24-hour 2012 Daily Mean ## 7 PM2.5 - Local Conditions 24-HR BLK AVG PM25 24-hour 2012 Daily Mean ## 8 <NA> <NA> <NA> <NA> ## 9 <NA> <NA> <NA> <NA> ## 10 <NA> <NA> <NA> <NA> ## Method Name Year ## 1 <NA> NA ## 2 <NA> NA ## 3 R & P Model 2000 PM-2.5 Air Sampler w/VSCC - Gravimetric 2022 ## 4 <NA> NA ## 5 <NA> NA ## 6 <NA> 2022 ## 7 <NA> 2022 ## 8 <NA> NA ## 9 <NA> NA ## 10 <NA> NA ## Units of Measure Event Type Observation Count Observation Percent ## 1 <NA> <NA> NA NA ## 2 <NA> <NA> NA NA ## 3 Micrograms/cubic meter (LC) No Events 15 25 ## 4 <NA> <NA> NA NA ## 5 <NA> <NA> NA NA ## 6 Micrograms/cubic meter (LC) No Events 181 50 ## 7 Micrograms/cubic meter (LC) No Events 116 32 ## 8 <NA> <NA> NA NA ## 9 <NA> <NA> NA NA ## 10 <NA> <NA> NA NA ## Completeness Indicator Valid Day Count Required Day Count ## 1 <NA> NA NA ## 2 <NA> NA NA ## 3 N 15 61 ## 4 <NA> NA NA ## 5 <NA> NA NA ## 6 N 181 365 ## 7 N 116 365 ## 8 <NA> NA NA ## 9 <NA> NA NA ## 10 <NA> NA NA ## Exceptional Data Count Null Data Count Primary Exceedance Count ## 1 NA NA NA ## 2 NA NA NA ## 3 0 0 0 ## 4 NA NA NA ## 5 NA NA NA ## 6 0 0 0 ## 7 0 0 0 ## 8 NA NA NA ## 9 NA NA NA ## 10 NA NA NA ## Secondary Exceedance Count Certification Indicator Num Obs Below MDL ## 1 NA <NA> NA ## 2 NA <NA> NA ## 3 0 Certification not required 0 ## 4 NA <NA> NA ## 5 NA <NA> NA ## 6 0 Certification not required 0 ## 7 0 Certification not required 0 ## 8 NA <NA> NA ## 9 NA <NA> NA ## 10 NA <NA> NA ## Arithmetic Mean Arithmetic Standard Dev 1st Max Value 1st Max DateTime ## 1 NA NA NA <NA> ## 2 NA NA NA <NA> ## 3 3.880000 1.574892 8.0 2022/01/17 00:00:00+00 ## 4 NA NA NA <NA> ## 5 NA NA NA <NA> ## 6 4.109392 3.745408 32.3 2022/05/20 00:00:00+00 ## 7 4.192241 2.214739 10.9 2022/04/02 00:00:00+00 ## 8 NA NA NA <NA> ## 9 NA NA NA <NA> ## 10 NA NA NA <NA> ## 2nd Max Value 2nd Max DateTime 3rd Max Value 3rd Max DateTime ## 1 NA <NA> NA <NA> ## 2 NA <NA> NA <NA> ## 3 5.5 2022/01/11 00:00:00+00 5.2 2022/01/29 00:00:00+00 ## 4 NA <NA> NA <NA> ## 5 NA <NA> NA <NA> ## 6 20.7 2022/04/11 00:00:00+00 14.3 2022/06/14 00:00:00+00 ## 7 10.9 2022/04/27 00:00:00+00 10.7 2022/04/26 00:00:00+00 ## 8 NA <NA> NA <NA> ## 9 NA <NA> NA <NA> ## 10 NA <NA> NA <NA> ## 4th Max Value 4th Max DateTime 1st Max Non Overlapping Value ## 1 NA <NA> NA ## 2 NA <NA> NA ## 3 4.5 2022/02/04 00:00:00+00 NA ## 4 NA <NA> NA ## 5 NA <NA> NA ## 6 12.3 2022/05/03 00:00:00+00 NA ## 7 9.7 2022/04/28 00:00:00+00 NA ## 8 NA <NA> NA ## 9 NA <NA> NA ## 10 NA <NA> NA ## 1st NO Max DateTime 2nd Max Non Overlapping Value 2nd NO Max DateTime ## 1 NA NA NA ## 2 NA NA NA ## 3 NA NA NA ## 4 NA NA NA ## 5 NA NA NA ## 6 NA NA NA ## 7 NA NA NA ## 8 NA NA NA ## 9 NA NA NA ## 10 NA NA NA ## 99th Percentile 98th Percentile 95th Percentile 90th Percentile ## 1 NA NA NA NA ## 2 NA NA NA NA ## 3 8.0 8.0 8.0 5.5 ## 4 NA NA NA NA ## 5 NA NA NA NA ## 6 20.7 12.3 9.7 8.0 ## 7 10.9 10.7 8.7 7.2 ## 8 NA NA NA NA ## 9 NA NA NA NA ## 10 NA NA NA NA ## 75th Percentile 50th Percentile 10th Percentile Local Site Name ## 1 NA NA NA <NA> ## 2 NA NA NA <NA> ## 3 4.5 3.8 2.0 Lakeport-S. Main Street ## 4 NA NA NA <NA> ## 5 NA NA NA <NA> ## 6 5.5 3.5 0.7 Ridgecrest-Ward ## 7 5.4 3.9 1.5 Mojave - CA 58 Business ## 8 NA NA NA <NA> ## 9 NA NA NA <NA> ## 10 NA NA NA <NA> ## Address State Name County Name City Name ## 1 <NA> <NA> <NA> <NA> ## 2 <NA> <NA> <NA> <NA> ## 3 2617 S. Main Street, Lakeport, CA 95453 California Lake Lakeport ## 4 <NA> <NA> <NA> <NA> ## 5 <NA> <NA> <NA> <NA> ## 6 2051 Ward, Ridgecrest CA California Kern Ridgecrest ## 7 1773 CA-58 BUS California Kern Mojave ## 8 <NA> <NA> <NA> <NA> ## 9 <NA> <NA> <NA> <NA> ## 10 <NA> <NA> <NA> <NA> ## CBSA Name Date of Last Change geometry ## 1 <NA> <NA> MULTIPOLYGON (((-122.1493 3... ## 2 <NA> <NA> MULTIPOLYGON (((-122.4564 3... ## 3 Clearlake, CA 2022-06-09 MULTIPOLYGON (((-123.0942 3... ## 4 <NA> <NA> MULTIPOLYGON (((-122.8047 3... ## 5 <NA> <NA> MULTIPOLYGON (((-119.312 35... ## 6 Bakersfield, CA 2022-09-29 MULTIPOLYGON (((-118.1656 3... ## 7 Bakersfield, CA 2022-11-08 MULTIPOLYGON (((-118.3113 3... ## 8 <NA> <NA> MULTIPOLYGON (((-122.9382 3... ## 9 <NA> <NA> MULTIPOLYGON (((-122.1707 4... ## 10 <NA> <NA> MULTIPOLYGON (((-122.5864 3...
# average AQI for each school district with data school_districts %<>% group_by(GEOID) %>% summarise(mn_aqi = mean(`Arithmetic Mean`, na.rm = T))
# try plotting again ggplot(school_districts)+ geom_sf(data = ca)+ geom_sf(aes(fill = mn_aqi))+ scale_fill_viridis(option = "magma", direction = -1)+ theme_minimal()+ labs(fill = "PM2.5 Average")
leaflet functionaddTiles() will add default mapsaddCircleMarkers() allows us to add indicators of air qualitylibrary(leaflet)
m <- leaflet(data = aqi22) %>%
addTiles() %>% # Add default OpenStreetMap map tiles
addCircleMarkers(color = ~`Arithmetic Mean`,
radius = 5)
m # Print the map