These exercises accompany the Spatial Data Objects tutorial.
Use the following code to download 2014 daily SO2 data for the United States. This data will be used in the excercises.
# create a temporary file
temp <- tempfile()
# download the .zip file to a temporary file--this will take several minutes
download.file('http://aqsdr1.epa.gov/aqsweb/aqstmp/airdata/daily_42401_2014.zip', temp)
# unzip temporary file to your working directory
unzip(temp)
# delete the temporary file
unlink(temp)
# read the data into R
so2 <- read.csv('daily_42401_2014.csv', stringsAsFactors = FALSE)
dplyr
to summarize the so2
data frame so that each monitor has one record with a column for the SO2 mean for the entire year.SpatialPoints
object.SpatialPoints
object created in the first two exercises to make a SpatialPointsDataFrame
.SpatialPointsDataFrame
.library(dplyr)
as.tbl(so2)
## Source: local data frame [324,818 x 29]
##
## State.Code County.Code Site.Num Parameter.Code POC Latitude Longitude
## (int) (int) (int) (int) (int) (dbl) (dbl)
## 1 1 73 23 42401 2 33.55306 -86.815
## 2 1 73 23 42401 2 33.55306 -86.815
## 3 1 73 23 42401 2 33.55306 -86.815
## 4 1 73 23 42401 2 33.55306 -86.815
## 5 1 73 23 42401 2 33.55306 -86.815
## 6 1 73 23 42401 2 33.55306 -86.815
## 7 1 73 23 42401 2 33.55306 -86.815
## 8 1 73 23 42401 2 33.55306 -86.815
## 9 1 73 23 42401 2 33.55306 -86.815
## 10 1 73 23 42401 2 33.55306 -86.815
## .. ... ... ... ... ... ... ...
## Variables not shown: Datum (chr), Parameter.Name (chr), Sample.Duration
## (chr), Pollutant.Standard (chr), Date.Local (chr), Units.of.Measure
## (chr), Event.Type (chr), Observation.Count (int), Observation.Percent
## (dbl), Arithmetic.Mean (dbl), X1st.Max.Value (dbl), X1st.Max.Hour (int),
## AQI (int), Method.Code (int), Method.Name (chr), Local.Site.Name (chr),
## Address (chr), State.Name (chr), County.Name (chr), City.Name (chr),
## CBSA.Name (chr), Date.of.Last.Change (chr)
so2_mean <- group_by(so2, State.Code, County.Code, Site.Num,
Latitude, Longitude, Datum, State.Name,
County.Name, City.Name, CBSA.Name)
so2_mean <- summarize(so2_mean, so2_mean = mean(Arithmetic.Mean, na.rm = TRUE))
as.tbl(so2_mean)
## Source: local data frame [461 x 11]
## Groups: State.Code, County.Code, Site.Num, Latitude, Longitude, Datum, State.Name, County.Name, City.Name [?]
##
## State.Code County.Code Site.Num Latitude Longitude Datum State.Name
## (int) (int) (int) (dbl) (dbl) (chr) (chr)
## 1 1 73 23 33.55306 -86.81500 WGS84 Alabama
## 2 1 73 1003 33.48556 -86.91500 WGS84 Alabama
## 3 1 97 3 30.76994 -88.08753 NAD83 Alabama
## 4 2 90 34 64.84569 -147.72741 NAD83 Alaska
## 5 4 7 9 33.39914 -110.85890 WGS84 Arizona
## 6 4 7 11 33.38550 -110.86727 NAD83 Arizona
## 7 4 7 12 33.39743 -110.87445 NAD83 Arizona
## 8 4 7 1001 33.00618 -110.78580 WGS84 Arizona
## 9 4 12 8000 34.23190 -113.58000 WGS84 Arizona
## 10 4 13 3002 33.45793 -112.04601 WGS84 Arizona
## .. ... ... ... ... ... ... ...
## Variables not shown: County.Name (chr), City.Name (chr), CBSA.Name (chr),
## so2_mean (dbl)
We need to make sure that there is one projection.
unique(so2_mean$Datum)
## [1] "WGS84" "NAD83" "NAD27" "UNKNOWN"
We’ll assume that “UNKNOWN” is actually “WGS84” and make three SpatialPoints
objects.
library(sp)
so2_wgs84 <- filter(so2_mean, Datum %in% c("WGS84", "UNKNOWN"))
so2_nad83 <- filter(so2_mean, Datum == "NAD83")
so2_nad27 <- filter(so2_mean, Datum == "NAD27")
so2_wgs84_coords <- cbind(so2_wgs84$Longitude, so2_wgs84$Latitude)
row.names(so2_wgs84_coords) <- paste0(so2_wgs84$State.Code, so2_wgs84$County.Code,
so2_wgs84$Site.Num)
so2_nad83_coords <- cbind(so2_nad83$Longitude, so2_nad83$Latitude)
row.names(so2_nad83_coords) <- paste0(so2_nad83$State.Code, so2_nad83$County.Code,
so2_nad83$Site.Num)
so2_nad27_coords <- cbind(so2_nad27$Longitude, so2_nad27$Latitude)
row.names(so2_nad27_coords) <- paste0(so2_nad27$State.Code, so2_nad27$County.Code,
so2_nad27$Site.Num)
wgs84 <- CRS("+proj=longlat +ellpsWGS84")
nad83 <- CRS("+proj=longlat +ellpsNAD83")
nad27 <- CRS("+proj=longlat +ellpsNAD83")
so2_wgs84_spoints <- SpatialPoints(coords = so2_wgs84_coords, proj4string = wgs84)
so2_nad83_spoints <- SpatialPoints(coords = so2_nad83_coords, proj4string = nad83)
so2_nad27_spoints <- SpatialPoints(coords = so2_nad27_coords, proj4string = nad27)
Now we transform all of the projections to WGS84 and combine them into one SpatialPoints
object.
so2_transformed <- spTransform(so2_nad83_spoints, CRSobj = wgs84)
so2_spoints <- rbind(so2_wgs84_spoints, so2_transformed)
so2_transformed <- spTransform(so2_nad27_spoints, CRSobj = wgs84)
so2_spoints <- rbind(so2_spoints, so2_transformed)
Make sure that the row names for the data frame are the same as the row names for the SpatialPoints
object. Also, change the class back to data.frame
.
row.names(so2_mean) <- paste0(so2_mean$State.Code, so2_mean$County.Code,
so2_mean$Site.Num)
so2_mean <- as.data.frame(so2_mean)
so2_spdf <- SpatialPointsDataFrame(so2_spoints, so2_mean,
proj4string = wgs84,
match.ID = TRUE)
library(maps)
map(database = 'usa')
plot(so2_spdf, pch = 19, add = TRUE)
map(database = 'state', regions = c("illinois", "indiana", "michigan",
"minnesota", "ohio", "wisconsin"))
so2_region5_spdf <- so2_spdf[so2_spdf$State.Name %in%
c("Illinois", "Indiana", "Michigan",
"Minnesota", "Ohio", "Wisconsin"), ]
plot(so2_region5_spdf, pch = 19, add = TRUE)
Huh. That’s weird.