R Markdown

This is an R Markdown document on Accessing Geospatial Data Using API’s with R, from #Lesson 7. Programmatically Accessing Geospatial Data Using API’s - Working with and Mapping JSON Data from the Colorado Information Warehouse in R

# load packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(rjson)
library(jsonlite)
## 
## Attaching package: 'jsonlite'
## The following objects are masked from 'package:rjson':
## 
##     fromJSON, toJSON
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.

Get Web Pages:

# get URL
water_base_url <- "https://data.colorado.gov/resource/j5pc-4t32.json?"
water_full_url <- paste0(water_base_url, "station_status=Active",
            "&county=BOULDER")
water_data_url <- URLencode(water_full_url)

water_data_df <- fromJSON(water_data_url)

Explore the data:

# view data structure
typeof(water_data_df)
## [1] "list"
# view first 6 lines of the location nested data.frame
head(water_data_df$location)
##    latitude needs_recoding   longitude
## 1  40.09082          FALSE -105.514442
## 2  40.17708          FALSE -105.178567
## 3 40.204193          FALSE -105.218777
## 4 40.218043          FALSE -105.260001
## 5  40.03628          FALSE -105.203176
## 6 40.153363          FALSE -105.088695
# view for 6 lines of the location.latitude column
head(water_data_df$location$latitude)
## [1] "40.09082"  "40.17708"  "40.204193" "40.218043" "40.03628"  "40.153363"

Data Munging:

# remove the nested data frame
water_data_df <- flatten(water_data_df, recursive = TRUE)
water_data_df$location.latitude
##  [1] "40.09082"  "40.17708"  "40.204193" "40.218043" "40.03628"  "40.153363"
##  [7] "40.215043" "39.967726" "40.260827" "40.091391" "40.187578" "40.059809"
## [13] "40.193019" "39.961655" "40.215658" "40.086278" "40.170998" "39.986169"
## [19] "40.258038" "39.931659" "40.258367" "40.174844" "39.947704" "40.177423"
## [25] "40.160705" "40.01398"  "40.126407" "40.199321" "40.013218" "40.211389"
## [31] "40.172925" "40.19328"  "40.09603"  "40.219387" "40.258726" "39.938324"
## [37] "40.257844" "40.193758" "39.931597" "40.185033" "40.134278" "40.187524"
## [43] "39.988481" "40.212658" "40.042028" "40.218335" "40.087583" "40.196422"
## [49] "40.215904" "40.006534" "40.219046" "40.18188"  "39.932589" "40.212505"
## [55] "40.018667" "40.188579" "40.129806" "40.0208"   "40.153341" "40.256276"
## [61] "40.053661" "40.255776" "40.07856"  "40.051652" "40.17395"  "40.211083"
## [67] "40.053035"
# where are the cells with NA values in your data?
is.na(water_data_df$location.latitude)
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE

Data Transformation:

# turn columns to numeric and remove NA values
water_data_df <- water_data_df %>%
  mutate_at(c( "amount", "location.longitude", "location.latitude"),
            as.numeric) %>%
  filter(!is.na(location.latitude))

Plot the data:

ggplot(water_data_df, aes(location.longitude, location.latitude, size = amount,
  color = station_type)) +
  geom_point() + coord_equal() +
      labs(x = "Longitude",
           y = "Latitude",
          title = "Surface Water Site Locations by Type",
          subtitle = "Boulder, Colorado") +
  labs(size = "Amount", colour = "Station Type")
## Warning: Removed 1 rows containing missing values (geom_point).

Overlay your data on a basemap:

boulder <- get_stamenmap(bbox = c(left = -105.4969, bottom = 39.8995, right =
  -104.9579, top = 40.1274), zoom = 10, messaging = FALSE, urlonly = FALSE,
  filename = NULL, color = "color")
## Source : http://tile.stamen.com/terrain/10/211/387.png
## Source : http://tile.stamen.com/terrain/10/212/387.png
## Source : http://tile.stamen.com/terrain/10/213/387.png
## Source : http://tile.stamen.com/terrain/10/211/388.png
## Source : http://tile.stamen.com/terrain/10/212/388.png
## Source : http://tile.stamen.com/terrain/10/213/388.png
ggmap(boulder) +
  geom_point(data = water_data_df, aes(location.longitude, location.latitude, size = amount,
  color = factor(station_type)))
## Warning: Removed 44 rows containing missing values (geom_point).

by Linda, June 2020