library(ggplot2)
library(plotly)
library(raster)
library(RColorBrewer)
library(viridis)
library(sf)
library(kableExtra)
library(maps)
library(rgdal)
setwd("~/Desktop/University of Utah PhD /Course Work/Fall 2022 Semester/GEOG6000_Data Analysis/lab08")
path_to_data = system.file("shape/nc.shp", package = "sf")
north_carolina = st_read(path_to_data, quiet = TRUE)
#Subsetting North Carolina to include only the CNTY ID, NAME, AREA, and PERIMETER
north_carolina = north_carolina [ , c("CNTY_ID", "NAME", "AREA", "PERIMETER")]
north_carolina
## Simple feature collection with 100 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
## Geodetic CRS: NAD27
## First 10 features:
## CNTY_ID NAME AREA PERIMETER geometry
## 1 1825 Ashe 0.114 1.442 MULTIPOLYGON (((-81.47276 3...
## 2 1827 Alleghany 0.061 1.231 MULTIPOLYGON (((-81.23989 3...
## 3 1828 Surry 0.143 1.630 MULTIPOLYGON (((-80.45634 3...
## 4 1831 Currituck 0.070 2.968 MULTIPOLYGON (((-76.00897 3...
## 5 1832 Northampton 0.153 2.206 MULTIPOLYGON (((-77.21767 3...
## 6 1833 Hertford 0.097 1.670 MULTIPOLYGON (((-76.74506 3...
## 7 1834 Camden 0.062 1.547 MULTIPOLYGON (((-76.00897 3...
## 8 1835 Gates 0.091 1.284 MULTIPOLYGON (((-76.56251 3...
## 9 1836 Warren 0.118 1.421 MULTIPOLYGON (((-78.30876 3...
## 10 1837 Stokes 0.124 1.428 MULTIPOLYGON (((-80.02567 3...
##Creating two points in space using x,y coordinates:
point_one = st_point(c(0,3))
point_two = st_point(c(5,7))
##Creating a line between two points:
a_line = st_linestring(c(point_one, point_two))
plot(a_line)
print(point_one)
## POINT (0 3)
a_line
## LINESTRING (0 3, 5 7)
##To determine the geometry of your simple feature
st_geometry_type(a_line)
## [1] LINESTRING
## 18 Levels: GEOMETRY POINT LINESTRING POLYGON MULTIPOINT ... TRIANGLE
##The CRS is the most important element of a simple feature - it gives us our spatial coordinates!
##Visit spatialreference.org to determine the EPSG code for the datum you'd like to work with.
##4326 is the code for WGS84
st_crs(4326)
## Coordinate Reference System:
## User input: EPSG:4326
## wkt:
## GEOGCRS["WGS 84",
## ENSEMBLE["World Geodetic System 1984 ensemble",
## MEMBER["World Geodetic System 1984 (Transit)"],
## MEMBER["World Geodetic System 1984 (G730)"],
## MEMBER["World Geodetic System 1984 (G873)"],
## MEMBER["World Geodetic System 1984 (G1150)"],
## MEMBER["World Geodetic System 1984 (G1674)"],
## MEMBER["World Geodetic System 1984 (G1762)"],
## MEMBER["World Geodetic System 1984 (G2139)"],
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ENSEMBLEACCURACY[2.0]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic latitude (Lat)",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic longitude (Lon)",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## USAGE[
## SCOPE["Horizontal component of 3D system."],
## AREA["World."],
## BBOX[-90,-180,90,180]],
## ID["EPSG",4326]]
##Defines the spatial extent of the data - min/max of x and y
st_bbox(north_carolina)
## xmin ymin xmax ymax
## -84.32385 33.88199 -75.45698 36.58965
##Reading in data
NY8 = st_read(dsn = "../datafiles/NY_data/NY8_utm18.shp",
layer = "NY8_utm18",
drivers = "ESRI Shapefile")
## options: ESRI Shapefile
## Reading layer `NY8_utm18' from data source
## `/Users/davidleydet/Desktop/University of Utah PhD /Course Work/Fall 2022 Semester/GEOG6000_Data Analysis/datafiles/NY_data/NY8_utm18.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 281 features and 17 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 358241.9 ymin: 4649755 xmax: 480393.1 ymax: 4808545
## Projected CRS: WGS 84 / UTM zone 18N
##Because it is a simple shapefile with the same data source and layer we can use the following syntax
NY8 = st_read("../datafiles/NY_data/NY8_utm18.shp")
## Reading layer `NY8_utm18' from data source
## `/Users/davidleydet/Desktop/University of Utah PhD /Course Work/Fall 2022 Semester/GEOG6000_Data Analysis/datafiles/NY_data/NY8_utm18.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 281 features and 17 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 358241.9 ymin: 4649755 xmax: 480393.1 ymax: 4808545
## Projected CRS: WGS 84 / UTM zone 18N
wna_climate = read.csv("../datafiles/WNAclimate.csv")
head(wna_climate)
## WNASEQ LONDD LATDD ELEV totsnopt annp Jan_Tmp Jul_Tmp
## 1 1 -107.9333 50.8736 686 78.7869 326 -13.9 18.8
## 2 2 -105.2670 55.2670 369 145.3526 499 -21.3 16.8
## 3 3 -102.5086 41.7214 1163 42.6544 450 -4.2 23.3
## 4 4 -110.2606 44.2986 2362 255.1009 489 -10.9 14.1
## 5 5 -114.1500 59.2500 880 164.8924 412 -23.9 14.4
## 6 6 -120.6667 57.4500 900 141.9260 451 -17.5 13.8
##Set as simple feature, using coordinates from the columns LONDD and LATDD, using coordinate system WGS84 (code - 4326)
wna_climate = st_as_sf(wna_climate,
coords = c("LONDD", "LATDD"),
crs = 4326)
wna_climate
## Simple feature collection with 2012 features and 6 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -138 ymin: 29.8333 xmax: -100 ymax: 60
## Geodetic CRS: WGS 84
## First 10 features:
## WNASEQ ELEV totsnopt annp Jan_Tmp Jul_Tmp geometry
## 1 1 686 78.7869 326 -13.9 18.8 POINT (-107.9333 50.8736)
## 2 2 369 145.3526 499 -21.3 16.8 POINT (-105.267 55.267)
## 3 3 1163 42.6544 450 -4.2 23.3 POINT (-102.5086 41.7214)
## 4 4 2362 255.1009 489 -10.9 14.1 POINT (-110.2606 44.2986)
## 5 5 880 164.8924 412 -23.9 14.4 POINT (-114.15 59.25)
## 6 6 900 141.9260 451 -17.5 13.8 POINT (-120.6667 57.45)
## 7 7 1100 183.4187 492 -18.8 13.2 POINT (-119.7167 56.7167)
## 8 8 1480 191.5657 606 -11.0 12.8 POINT (-114.6 50.7667)
## 9 9 651 144.9015 443 -22.4 15.3 POINT (-122.1667 59.5167)
## 10 10 725 151.0778 455 -23.5 15.4 POINT (-112.1 57.7667)
myplot = ggplot(wna_climate)
myplot +
geom_sf(color = "mediumseagreen", alpha = 0.4)
##Save this to disk as a shape file
st_write(wna_climate, dsn = "../datafiles/wnaclim.shp")
Ensure all of your data is in the same reference system!!
st_crs(NY8)
## Coordinate Reference System:
## User input: WGS 84 / UTM zone 18N
## wkt:
## PROJCRS["WGS 84 / UTM zone 18N",
## BASEGEOGCRS["WGS 84",
## DATUM["D_unknown",
## ELLIPSOID["WGS84",6378137,298.257223563,
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["Degree",0.0174532925199433]]],
## CONVERSION["UTM zone 18N",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",0,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",-75,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",0.9996,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",500000,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]],
## ID["EPSG",16018]],
## CS[Cartesian,2],
## AXIS["(E)",east,
## ORDER[1],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]],
## AXIS["(N)",north,
## ORDER[2],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]]
##Check the EPSG code for your data using $epsg syntax
st_crs(NY8)$epsg
## [1] NA
st_crs(wna_climate)$epsg
## [1] 4326
##Another way to get the CRS
format(st_crs(NY8))
## [1] "WGS 84 / UTM zone 18N"
##Note: this should only be used when the simple feature is missing a CRS and you know what it is. It is NOT for re-projecting the sf object to a new coordinate system.
st_crs(wna_climate) = 4326
##The st_transform() function allows you to project your sf object to a new CRS. This is particularly useful if you have multiple data sources with different original coordinate systems.
##As a reminder: when you read in spatial data, the first thing you should use is st_crs to check the CRS and st_transform to re-project if necessary.
NY8 = st_transform(NY8, crs = 4326)
##Check to see if it worked - the initial check before the transformation was "NA"
st_crs(NY8)$epsg
## [1] 4326
oregon_tann = read_sf("../datafiles/oregon/oregontann.shp")
class(oregon_tann)
## [1] "sf" "tbl_df" "tbl" "data.frame"
oregon_tann %>%
kbl() %>%
kable_classic_2() %>%
scroll_box(width = "500px", height = "200px")
elevation | tann | coords_x1 | coords_x2 | geometry |
---|---|---|---|---|
846 | 9.6 | -120.717 | 44.917 | POINT (-120.717 44.917) |
96 | 12.5 | -120.200 | 45.717 | POINT (-120.2 45.717) |
543 | 11.1 | -122.717 | 42.217 | POINT (-122.717 42.217) |
2 | 10.3 | -123.883 | 46.150 | POINT (-123.883 46.15) |
1027 | 7.6 | -117.817 | 44.833 | POINT (-117.817 44.833) |
1050 | 8.4 | -117.833 | 44.783 | POINT (-117.833 44.783) |
24 | 10.8 | -124.383 | 43.117 | POINT (-124.383 43.117) |
1097 | 7.7 | -121.317 | 44.067 | POINT (-121.317 44.067) |
999 | 9.5 | -118.167 | 43.917 | POINT (-118.167 43.917) |
18 | 11.2 | -121.950 | 45.633 | POINT (-121.95 45.633) |
24 | 11.8 | -124.283 | 42.050 | POINT (-124.283 42.05) |
1262 | 8.1 | -119.050 | 43.583 | POINT (-119.05 43.583) |
64 | 10.2 | -124.567 | 42.833 | POINT (-124.567 42.833) |
1451 | 5.6 | -121.783 | 43.233 | POINT (-121.783 43.233) |
238 | 10.3 | -123.250 | 45.417 | POINT (-123.25 45.417) |
28 | 10.5 | -123.283 | 46.100 | POINT (-123.283 46.1) |
6 | 10.9 | -123.900 | 45.217 | POINT (-123.9 45.217) |
863 | 8.7 | -120.183 | 45.233 | POINT (-120.183 45.233) |
69 | 11.0 | -123.200 | 44.633 | POINT (-123.2 44.633) |
198 | 11.1 | -123.067 | 43.783 | POINT (-123.067 43.783) |
253 | 10.9 | -123.050 | 43.717 | POINT (-123.05 43.717) |
1974 | 3.2 | -122.133 | 42.900 | POINT (-122.133 42.9) |
99 | 11.1 | -123.317 | 44.933 | POINT (-123.317 44.933) |
250 | 10.7 | -122.967 | 43.783 | POINT (-122.967 43.783) |
89 | 11.7 | -123.317 | 43.667 | POINT (-123.317 43.667) |
405 | 9.6 | -121.133 | 45.450 | POINT (-121.133 45.45) |
809 | 8.7 | -117.917 | 45.567 | POINT (-117.917 45.567) |
35 | 12.4 | -123.583 | 43.600 | POINT (-123.583 43.6) |
1155 | 6.3 | -117.267 | 45.433 | POINT (-117.267 45.433) |
125 | 10.9 | -122.317 | 45.267 | POINT (-122.317 45.267) |
109 | 11.4 | -123.217 | 44.117 | POINT (-123.217 44.117) |
118 | 11.2 | -123.300 | 44.117 | POINT (-123.3 44.117) |
55 | 11.4 | -123.100 | 45.533 | POINT (-123.1 45.533) |
808 | 8.7 | -120.217 | 45.000 | POINT (-120.217 45) |
15 | 11.6 | -124.417 | 42.400 | POINT (-124.417 42.4) |
282 | 12.6 | -123.317 | 42.433 | POINT (-123.317 42.433) |
1109 | 7.5 | -120.933 | 44.517 | POINT (-120.933 44.517) |
814 | 7.9 | -117.117 | 44.883 | POINT (-117.117 44.883) |
1712 | 6.5 | -119.650 | 42.550 | POINT (-119.65 42.55) |
228 | 10.7 | -122.150 | 45.450 | POINT (-122.15 45.45) |
594 | 10.3 | -119.550 | 45.350 | POINT (-119.55 45.35) |
190 | 11.4 | -119.283 | 45.817 | POINT (-119.283 45.817) |
152 | 10.2 | -121.517 | 45.683 | POINT (-121.517 45.683) |
646 | 11.7 | -117.267 | 44.350 | POINT (-117.267 44.35) |
825 | 8.9 | -120.700 | 45.200 | POINT (-120.7 45.2) |
1249 | 8.8 | -121.783 | 42.200 | POINT (-121.783 42.2) |
840 | 9.8 | -118.083 | 45.317 | POINT (-118.083 45.317) |
1455 | 8.0 | -120.350 | 42.183 | POINT (-120.35 42.183) |
206 | 11.4 | -122.683 | 44.100 | POINT (-122.683 44.1) |
680 | 9.2 | -121.133 | 44.633 | POINT (-121.133 44.633) |
683 | 10.5 | -117.017 | 43.983 | POINT (-117.017 43.983) |
1252 | 8.1 | -118.833 | 43.283 | POINT (-118.833 43.283) |
45 | 11.1 | -123.183 | 45.233 | POINT (-123.183 45.233) |
400 | 12.0 | -122.867 | 42.367 | POINT (-122.867 42.367) |
762 | 8.5 | -121.183 | 44.583 | POINT (-121.183 44.583) |
472 | 10.9 | -120.350 | 45.467 | POINT (-120.35 45.467) |
256 | 12.1 | -118.433 | 45.967 | POINT (-118.433 45.967) |
569 | 9.3 | -120.717 | 45.483 | POINT (-120.717 45.483) |
47 | 10.2 | -124.050 | 44.633 | POINT (-124.05 44.633) |
2 | 11.2 | -124.250 | 43.417 | POINT (-124.25 43.417) |
666 | 10.6 | -117.000 | 43.867 | POINT (-117 43.867) |
1212 | 6.0 | -120.433 | 44.400 | POINT (-120.433 44.4) |
654 | 11.0 | -116.967 | 44.050 | POINT (-116.967 44.05) |
51 | 12.2 | -122.600 | 45.350 | POINT (-122.6 45.35) |
46 | 10.3 | -123.933 | 45.033 | POINT (-123.933 45.033) |
732 | 11.1 | -117.250 | 43.650 | POINT (-117.25 43.65) |
1329 | 9.0 | -120.533 | 42.700 | POINT (-120.533 42.7) |
452 | 11.4 | -118.850 | 45.683 | POINT (-118.85 45.683) |
517 | 10.5 | -118.817 | 45.483 | POINT (-118.817 45.483) |
6 | 11.7 | -122.600 | 45.600 | POINT (-122.6 45.6) |
70 | 11.8 | -124.067 | 42.883 | POINT (-124.067 42.883) |
866 | 8.4 | -120.900 | 44.350 | POINT (-120.9 44.35) |
757 | 10.3 | -122.517 | 42.733 | POINT (-122.517 42.733) |
917 | 8.7 | -121.217 | 44.267 | POINT (-121.217 44.267) |
937 | 8.4 | -121.150 | 44.267 | POINT (-121.15 44.267) |
18 | 11.2 | -124.117 | 43.700 | POINT (-124.117 43.7) |
675 | 10.8 | -117.167 | 44.767 | POINT (-117.167 44.767) |
202 | 12.2 | -123.350 | 42.967 | POINT (-123.35 42.967) |
142 | 12.1 | -123.350 | 43.200 | POINT (-123.35 43.2) |
1490 | 6.7 | -120.883 | 42.333 | POINT (-120.883 42.333) |
60 | 11.1 | -123.017 | 44.917 | POINT (-123.017 44.917) |
3 | 10.9 | -123.917 | 45.983 | POINT (-123.917 45.983) |
1422 | 4.4 | -118.967 | 44.150 | POINT (-118.967 44.15) |
1169 | 8.8 | -123.367 | 42.617 | POINT (-123.367 42.617) |
411 | 9.5 | -122.650 | 44.867 | POINT (-122.65 44.867) |
1425 | 7.6 | -119.683 | 43.483 | POINT (-119.683 43.483) |
142 | 11.3 | -122.767 | 44.800 | POINT (-122.767 44.8) |
341 | 9.8 | -122.067 | 45.117 | POINT (-122.067 45.117) |
3 | 10.2 | -123.867 | 45.450 | POINT (-123.867 45.45) |
843 | 8.8 | -117.883 | 45.217 | POINT (-117.883 45.217) |
683 | 10.7 | -117.250 | 43.983 | POINT (-117.25 43.983) |
1328 | 6.4 | -121.683 | 43.683 | POINT (-121.683 43.683) |
##Relook to add boundaries in ggplot
ggplot(oregon_tann) +
geom_sf(color = "red", alpha = 0.5) +
xlab("Longitude") +
ylab("Latitude")
##There are a couple of ways to do this. One way:
oregon_tann2 = subset(oregon_tann, select = c(elevation, tann))
##The geometry column is sticky and stays attached even when you subset
names(oregon_tann2)
## [1] "elevation" "tann" "geometry"
##Subsetting for rows/observation with an elevation over 1000 meters
oregon_tann3 = subset(oregon_tann, subset = elevation > 1000)
##state_bound = map('state', fill = TRUE, plot = FALSE) %>% st_as_sf()
ggplot() +
geom_sf(data = oregon_tann, aes(color = "red", alpha = 0.9)) +
geom_sf(data = oregon_tann3, color = "red") +
#geom_sf(data = state_bound) +
#coord_sf(xlim = c(-125,-116), ylim = c(42,47)) +
xlab("Longitude") +
ylab("Latitude") +
theme(legend.position = "none")
##Adding a column
oregon_tann$rando = runif(n = nrow(oregon_tann))
names(oregon_tann)
## [1] "elevation" "tann" "coords_x1" "coords_x2" "geometry" "rando"
##Extracting a column
elevation = oregon_tann$elevation
elevation[1:10]
## [1] 846 96 543 2 1027 1050 24 1097 999 18
##Get the geometry
geometry = oregon_tann$geometry
geometry
## Geometry set for 92 features
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -124.567 ymin: 42.05 xmax: -116.967 ymax: 46.15
## CRS: NA
## First 5 geometries:
## POINT (-120.717 44.917)
## POINT (-120.2 45.717)
## POINT (-122.717 42.217)
## POINT (-123.883 46.15)
## POINT (-117.817 44.833)
##Drop Geometry
attributes = st_drop_geometry(oregon_tann)
head(attributes)
## # A tibble: 6 × 5
## elevation tann coords_x1 coords_x2 rando
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 846 9.6 -121. 44.9 0.355
## 2 96 12.5 -120. 45.7 0.231
## 3 543 11.1 -123. 42.2 0.00502
## 4 2 10.3 -124. 46.2 0.891
## 5 1027 7.6 -118. 44.8 0.579
## 6 1050 8.4 -118. 44.8 0.182
##Spatial Filter
#Transform the coordinate system to UTM Zone 18N NAD83
north_carolina = st_transform(north_carolina, crs = 26918)
plot(st_geometry(north_carolina))
##Use st_sample to generate random points
##Set seed is used to ensure the random generation is consistent for each iteration
set.seed(1234)
random_pnts = st_sample(north_carolina, size = 500)
##Set as a simple feature (sf)
random_pnts = st_as_sf(random_pnts)
ggplot() +
geom_sf(data = north_carolina) +
geom_sf(data = random_pnts, color = "mediumorchid4", alpha = 0.5) +
theme_light() +
labs(title = "Random Sample - North Carolina",
subtitle = "By David, Abby, and Ellie Leydet")
##Subsetting for the County Polygon of Pasquotank
pasq = subset(north_carolina, NAME == "Pasquotank")
##Run the st_filter function to filter the random points by pasq (the district)
filtered_pnts = st_filter(random_pnts, pasq)
myncplot = ggplot() +
geom_sf(data = north_carolina) +
theme_light() +
labs(title = "Random Sample - North Carolina",
subtitle = "By David, Abby, and Ellie Leydet")
myncplot + geom_sf(data = filtered_pnts, color = "red", alpha = 0.5) +
labs(caption = "Pasquotank County")
##Points outside Pasquotank
out.pasq = st_filter(random_pnts, pasq, .predicate = st_disjoint)
##Editing the plot using the pasq subset to be filled a different color --> see geom_sf(data = pasq, fill = "grey19") line
myncplot + geom_sf(data = out.pasq, color = "mediumorchid4", alpha = 0.5) +
geom_sf(data = pasq, fill = "grey19") +
labs(caption = "All points outside of Pasquotank County")
##Another useful predicate is st_is_within_distance, which requires that you pass an additional distance (dist) argument to the filter. The dist argument is in units specified by the CRS, in this case meters.
fiftykm.filter = st_filter(random_pnts, pasq,
.predicate = st_is_within_distance,
dist = 50000)
##Reversed the drawing order so the points appear over the different shaded county
myncplot + geom_sf(data = pasq, fill = "snow1") +
geom_sf(data = fiftykm.filter, color = "mediumorchid4", alpha = 0.5) +
labs(caption = "All points within 50 kilometers of Pasquotank County")
##Centroid
center.of.pasq = st_centroid(pasq)
ggplot() +
geom_sf(data = pasq) +
geom_sf(data = center.of.pasq, color = "red", pch = 2) +
theme_light()
##Buffer
pasq.buffer = st_buffer(pasq, dist = 5000)
ggplot() +
geom_sf(data = pasq.buffer, alpha = 0.8) +
geom_sf(data = pasq, color = "grey2", fill = "snow1") +
labs(title = "Pasquotank 5 kilometer Buffer") +
theme_light()
##Union - merge polygons together into one large feature
nc.boundary = st_union(north_carolina)
ggplot(data = nc.boundary) +geom_sf()
##To cast a geometry is to change it from one geometry type to another. For example, to convert the boundary of North Carolina to points (the vertices of the polygon)
nc.points = st_cast(nc.boundary, "POINT")
ggplot() + geom_sf(data = nc.points, color = "mediumseagreen", alpha = 0.5 ) + theme_light()
nc.lines = st_cast(nc.boundary, "MULTILINESTRING")
ggplot() + geom_sf(data = nc.lines)
nc.lines2 = st_cast(nc.lines, "LINESTRING")
ggplot() + geom_sf(data = nc.lines2)
##If you can’t tell, it was broken into six lines: one for the mainland, and the other five for the ecological (and cultural) disaster known as the Outer Banks. How do we color by polygon?
##ggplot
NY8v2 <- st_read("../datafiles/NY_data/NY8_utm18.shp")
## Reading layer `NY8_utm18' from data source
## `/Users/davidleydet/Desktop/University of Utah PhD /Course Work/Fall 2022 Semester/GEOG6000_Data Analysis/datafiles/NY_data/NY8_utm18.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 281 features and 17 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 358241.9 ymin: 4649755 xmax: 480393.1 ymax: 4808545
## Projected CRS: WGS 84 / UTM zone 18N
binghamton = subset(NY8v2, AREANAME == "Binghamton city")
bing.plot = ggplot() +
theme_bw()
bing.plot + geom_sf(data = binghamton)
## Create a new sf object that has Binghampton and its neighboring polygons
bingies_neighbors = st_filter(NY8v2, binghamton)
## Create a random sample
bing.ran.points = st_sample(bingies_neighbors, size = 25)
## Set the sample as a simple feature (sf)
bing.ran.points = st_as_sf(bing.ran.points)
## REMINDER - THE ORDER THAT THE GEOM_SF ARE WRITTEN IS THE ORDER THEY ARE DRAWN**
bing.plot + geom_sf(data = bingies_neighbors) +
geom_sf(data = binghamton, fill = "deepskyblue") +
geom_sf(data = bing.ran.points, color = "brown1", alpha = 0.5, size = 2)
##Attributes names
names(binghamton)
## [1] "AREANAME" "AREAKEY" "X" "Y" "POP8"
## [6] "TRACTCAS" "PROPCAS" "PCTOWNHOME" "PCTAGE65P" "Z"
## [11] "AVGIDIST" "PEXPOSURE" "Cases" "Xm" "Ym"
## [16] "Xshift" "Yshift" "geometry"
## Plotting a thematic map by population
bing.plot + geom_sf(data = binghamton, aes(fill = POP8))
## Plotting a thematic map by exposure
bing.plot + geom_sf(data = binghamton, aes(fill = PEXPOSURE)) +
scale_fill_viridis(option = "cividis")
##
bing.plot + geom_sf(data = binghamton, aes(fill = PEXPOSURE)) +
scale_fill_viridis(option = "cividis") +
coord_sf(datum = 26918)
## Make sure the CRS is set appropriately!!!!!
## st_is_valid() check to see if the sf is valid
## st_make_valid() may fix the sf if there are issues
ggplot() +
geom_sf(data = binghamton, aes(fill = PEXPOSURE)) +
scale_fill_viridis(option = "cividis") +
coord_sf(crs = 4326, xlim = c(-75.91, -75.88), ylim = c(42.10, 42.13)) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90))
##Create an empty raster frame
r = raster(nrow = 10, ncol = 10)
## Fill it with random values from 0 to 1
r[] = runif(n=100, min = 0, max = 1)
r
## class : RasterLayer
## dimensions : 10, 10, 100 (nrow, ncol, ncell)
## resolution : 36, 18 (x, y)
## extent : -180, 180, -90, 90 (xmin, xmax, ymin, ymax)
## crs : +proj=longlat +datum=WGS84 +no_defs
## source : memory
## names : layer
## values : 0.01260905, 0.9844737 (min, max)
plot(r)
air_temp = raster("../datafiles/air.mon.ltm.nc")
## Loading required namespace: ncdf4
## Warning in .varName(nc, varname, warn = warn): varname used is: air
## If that is not correct, you can set it to one of: air, valid_yr_count
## Note that we have only **read the first layer (January)**. R will tell you that it loaded the variable called air. To avoid this message you can specify this directly, which is important for files containing multiple variables:##
air_temp = raster("../datafiles/air.mon.ltm.nc", varname = "air")
air_temp
## class : RasterLayer
## band : 1 (of 12 bands)
## dimensions : 73, 144, 10512 (nrow, ncol, ncell)
## resolution : 2.5, 2.5 (x, y)
## extent : -1.25, 358.75, -91.25, 91.25 (xmin, xmax, ymin, ymax)
## crs : NA
## source : air.mon.ltm.nc
## names : Monthly.Long.Term.Mean.Air.Temperature.at.sigma.level.0.995
## z-value : 0000-12-30
## zvar : air
writeRaster(air_temp, filename = "../datafiles/air_temp.tif")
## Set CRS
## Check out the PROJ4 for syntax. Go to the website!!
## "+init=epsg:4326" can be used for rasters so you don't have to write out the full PROJ4 syntax!
crs(air_temp) = "+proj=longlat +ellps=WGS84 +towgs84=0,0,0 +no_defs "
## Check CRS
crs(air_temp)
## Coordinate Reference System:
## Deprecated Proj.4 representation: +proj=longlat +ellps=WGS84 +no_defs
## WKT2 2019 representation:
## GEOGCRS["unknown",
## DATUM["Unknown based on WGS84 ellipsoid",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1],
## ID["EPSG",7030]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8901]],
## CS[ellipsoidal,2],
## AXIS["longitude",east,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433,
## ID["EPSG",9122]]],
## AXIS["latitude",north,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433,
## ID["EPSG",9122]]]]
weird_crs = crs("+proj=tmerc +lat_0=0 +lon_0=15 +k=0.999923 +x_0=5500000 +y_0=0 +ellps=GRS80 +units=m +no_defs")
air_temp_weird_crs = projectRaster(air_temp, crs = weird_crs)
## Warning in rgdal::rawTransform(projfrom, projto, nrow(xy), xy[, 1], xy[, : 100
## projected point(s) not finite
crs(air_temp_weird_crs)
## Coordinate Reference System:
## Deprecated Proj.4 representation:
## +proj=tmerc +lat_0=0 +lon_0=15 +k=0.999923 +x_0=5500000 +y_0=0
## +ellps=GRS80 +units=m +no_defs
## WKT2 2019 representation:
## PROJCRS["unknown",
## BASEGEOGCRS["unknown",
## DATUM["Unknown based on GRS80 ellipsoid",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1],
## ID["EPSG",7019]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8901]]],
## CONVERSION["unknown",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",0,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",15,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",0.999923,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",5500000,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["(E)",east,
## ORDER[1],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]],
## AXIS["(N)",north,
## ORDER[2],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]]
## Plotting
plot(air_temp, main = "NCEP NCAR January LTM Tair")
## Notice how the the the x axis starts at 0 and ends at 360
air_temp = rotate(air_temp)
##Create a color palette with nine colors using color brewer
my.pal = brewer.pal(n = 9, name = "OrRd")
##Add country geometries
countries = st_read("../datafiles/ne_50m_admin_0_countries/ne_50m_admin_0_countries.shp",
quiet = TRUE)
## Run these lines together
plot(air_temp,
main = "NCEP NCAR January LTM Tair",
col = my.pal)
plot(st_geometry(countries), add = TRUE)
## The function cellStats() can be used to calculate most summary statistics for a raster layer. So to get the mean global temperature (and standard deviation):
cellStats(air_temp, mean)
## [1] 3.546352
cellStats(air_temp, sd)
## [1] 19.63948
##If we want to use only a subset of the original raster layer, the function crop() will extract only the cells in a given region. This can be defined using another raster object or Spatial* object, or by defining an extent object:
# Extent Method
canada.ext = extent(c(xmin = -143,
xmax = -50,
ymin = 41,
ymax = 84))
canada_air_temp1 = crop(air_temp, canada.ext)
# Spatial Method
canada = subset(countries, NAME == "Canada")
canada_air_temp2 = crop(air_temp, canada)
##Plot both for comparison
par(mfrow = c(1, 2))
plot(canada_air_temp1,
main = "NCEP NCAR January LTM Tair",
sub = "Extent Method",
col = my.pal)
plot(st_geometry(canada), add = TRUE)
plot(canada_air_temp2,
main = "NCEP NCAR January LTM Tair",
sub = "Spatial Method",
col = my.pal)
plot(st_geometry(canada), add = TRUE)
## Note that crop subsets the original raster to the extent of Canada’s borders, rather than to the borders themselves. This is because rasters are always rectangular. You can ‘hide’ the values of raster cells outside of a polygon by using the mask function. The raster has to be rectangular, so this does not remove the cells outside the polygon. Rather, it sets their value to NA.
canada_air_temp3 = mask(canada_air_temp2, mask = canada)
plot(canada_air_temp3,
main = "NCEP NCAR January LTM Tair",
sub = "Masked Version",
col = my.pal)
plot(st_geometry(canada), add = TRUE)
## Values can be extracted from individual locations (or sets of locations) using extract(). This can take a set of coordinates in matrix form, or use a Spatial* object. To get the January temperature of Salt Lake City:
##Use long/lat for SLC. Gives value of the cell
extract(air_temp, cbind(-111.9, 40.76))
## [1] -2.919719
##By default this gives you the value of the cell in which the point falls. The value can equally be estimated by bilinear interpolation from the four closest cells with method='bilinear':
extract(air_temp, cbind(-111.9, 40.76), method = "bilinear")
## [1] -4.324555
##Using the wna climate samples locations to subset our data
##Note: character. 'simple' or 'bilinear'. If 'simple' values for the cell a point falls in are returned. If 'bilinear' the returned values are interpolated from the values of the four nearest raster cells.
## logical. If df=TRUE, results will be returned as a data.frame. The first column is a sequential ID, the other column(s) are the extracted values
wna.air.temp.df = extract(air_temp,
wna_climate,
method = "bilinear",
df = TRUE)
head(wna.air.temp.df)
## ID Monthly.Long.Term.Mean.Air.Temperature.at.sigma.level.0.995
## 1 1 -9.782378
## 2 2 -18.523962
## 3 3 -2.664617
## 4 4 -7.552574
## 5 5 -18.926520
## 6 6 -12.687242
## Extracting pixels by polygons
##Create the China Polygon
china = subset(countries, NAME == "China")
china.air.temp.df = extract(air_temp,
china,
df = TRUE)
head(china.air.temp.df)
## ID Monthly.Long.Term.Mean.Air.Temperature.at.sigma.level.0.995
## 1 1 -23.51869
## 2 1 -22.07151
## 3 1 -23.53544
## 4 1 -24.09268
## 5 1 -21.95177
## 6 1 -15.34994
##Extracts the temperature data for China
##When this function is used with a set of polygons, the output is in a list, but we can retrieve whatever we want from that list.
##Help on this?
two.countries = rbind(china, canada)
china.tjan = extract(air_temp, two.countries)[[1]]
hist(china.tjan)
##The extract() function also takes an argument fun. This allows you to calculate a summary statistic for each set of pixels that is extracted (i.e. one per polygon). Here, we’ll use this with countries to get an average value of January temperature. We add this back as a new column in the countries object, and then plot it:
countries$Jan_Tmp = extract(air_temp, countries, fun = mean)[,1]
country.temp = ggplot(countries) +
geom_sf(aes(fill = Jan_Tmp)) +
labs(fill = "Temperature",
title = "Mean January Temperature")
ggplotly(country.temp)
## A useful extension to the basic raster functions is the use of stacks. These are a stack of raster layers which represent different variables, but have the same spatial extent and resolution.
## Read in the stack
air.temp.stk = stack("../datafiles/air.mon.ltm.nc", varname = "air")
## Rotate the long to -18- to 180
air.temp.stk = rotate(air.temp.stk)
## Change my extent to
myext = extent(c(-130, -60, 25, 50))
## Crop the stack to the extent
air.temp.stk = crop(air.temp.stk, myext)
## Subset the first three stacks. Double check what [[]] means. I assume it is the first three stacks as opposed to picking the data out in a matrix
air.temp.substk = air.temp.stk[[1:3]]
air.temp.substk
## class : RasterBrick
## dimensions : 10, 28, 280, 3 (nrow, ncol, ncell, nlayers)
## resolution : 2.5, 2.5 (x, y)
## extent : -128.75, -58.75, 26.25, 51.25 (xmin, xmax, ymin, ymax)
## crs : NA
## source : memory
## names : X0000.12.30, X0001.01.30, X0001.02.27
## min values : -18.18637, -15.90084, -10.40882
## max values : 21.45181, 21.01648, 21.04738
## time : 0000-12-30, 0001-01-30, 0001-02-27
##Paste names to each stack. month.abb is built in to R
##Potential mismatch between the initial names and the paste? For example X0000.12.30 is December, but now reads as Jan?
names(air.temp.stk) = paste("TAS", month.abb)
names(air.temp.stk)
## [1] "TAS.Jan" "TAS.Feb" "TAS.Mar" "TAS.Apr" "TAS.May" "TAS.Jun" "TAS.Jul"
## [8] "TAS.Aug" "TAS.Sep" "TAS.Oct" "TAS.Nov" "TAS.Dec"
#Raster stack pull by name
## Method 1
air.temp.jan = air.temp.stk$TAS.Jan
## Method 2
air.temp.jan2 = air.temp.stk[["TAS.Jan"]]
air.temp.jan
## class : RasterLayer
## dimensions : 10, 28, 280 (nrow, ncol, ncell)
## resolution : 2.5, 2.5 (x, y)
## extent : -128.75, -58.75, 26.25, 51.25 (xmin, xmax, ymin, ymax)
## crs : NA
## source : memory
## names : TAS.Jan
## values : -18.18637, 21.45181 (min, max)
## time : 0000-12-30
## Plotting stacks. zlim syntax sets the same scale for each stack
plot(air.temp.stk,
col = my.pal,
zlim = c(-35, 35))
## Turn countries into spatial data to add to plot
## addfun Function to add additional items such as points or polygons to the plot (map). Typically containing statements like "points(xy); plot(polygons, add=TRUE)". This is particularly useful to add something to each map when plotting a multi-layer Raster* object.
addBorder = function() {plot(as_Spatial(countries), add = TRUE)}
plot(air.temp.stk,
col = my.pal,
zlim = c(-35, 35),
addfun = addBorder)
#The cellStats() function now returns the mean (or other statistic) for all layers, allowing a quick look at the seasonal cycle of average air temperature.
tavg = cellStats(air.temp.stk, mean)
plot(1:12, tavg,
type = "l",
col = "red",
xlab = "Month",
ylab = "Average Temperature (Celsius)")
## Extract a single location
## Colorado Springs 38.8339° N, 104.8214° W
cos.tavg = extract(air.temp.stk, cbind(-104.82, 38.83), method = "bilinear")
plot(1:12, cos.tavg,
type = "l",
col = "red",
xlab = "Month",
ylab = "Average Temperature (Celsius)",
main = "Average Colorado Springs Temperature")