This is an R Markdown document on Accessing Geospatial Data Using API’s with R, from #Lesson 7. Programmatically Accessing Geospatial Data Using API’s - Working with and Mapping JSON Data from the Colorado Information Warehouse in R
# load packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(rjson)
library(jsonlite)
##
## Attaching package: 'jsonlite'
## The following objects are masked from 'package:rjson':
##
## fromJSON, toJSON
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
# get URL
water_base_url <- "https://data.colorado.gov/resource/j5pc-4t32.json?"
water_full_url <- paste0(water_base_url, "station_status=Active",
"&county=BOULDER")
water_data_url <- URLencode(water_full_url)
water_data_df <- fromJSON(water_data_url)
# view data structure
typeof(water_data_df)
## [1] "list"
# view first 6 lines of the location nested data.frame
head(water_data_df$location)
## latitude needs_recoding longitude
## 1 40.09082 FALSE -105.514442
## 2 40.17708 FALSE -105.178567
## 3 40.204193 FALSE -105.218777
## 4 40.218043 FALSE -105.260001
## 5 40.03628 FALSE -105.203176
## 6 40.153363 FALSE -105.088695
# view for 6 lines of the location.latitude column
head(water_data_df$location$latitude)
## [1] "40.09082" "40.17708" "40.204193" "40.218043" "40.03628" "40.153363"
# remove the nested data frame
water_data_df <- flatten(water_data_df, recursive = TRUE)
water_data_df$location.latitude
## [1] "40.09082" "40.17708" "40.204193" "40.218043" "40.03628" "40.153363"
## [7] "40.215043" "39.967726" "40.260827" "40.091391" "40.187578" "40.059809"
## [13] "40.193019" "39.961655" "40.215658" "40.086278" "40.170998" "39.986169"
## [19] "40.258038" "39.931659" "40.258367" "40.174844" "39.947704" "40.177423"
## [25] "40.160705" "40.01398" "40.126407" "40.199321" "40.013218" "40.211389"
## [31] "40.172925" "40.19328" "40.09603" "40.219387" "40.258726" "39.938324"
## [37] "40.257844" "40.193758" "39.931597" "40.185033" "40.134278" "40.187524"
## [43] "39.988481" "40.212658" "40.042028" "40.218335" "40.087583" "40.196422"
## [49] "40.215904" "40.006534" "40.219046" "40.18188" "39.932589" "40.212505"
## [55] "40.018667" "40.188579" "40.129806" "40.0208" "40.153341" "40.256276"
## [61] "40.053661" "40.255776" "40.07856" "40.051652" "40.17395" "40.211083"
## [67] "40.053035"
# where are the cells with NA values in your data?
is.na(water_data_df$location.latitude)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# turn columns to numeric and remove NA values
water_data_df <- water_data_df %>%
mutate_at(c( "amount", "location.longitude", "location.latitude"),
as.numeric) %>%
filter(!is.na(location.latitude))
ggplot(water_data_df, aes(location.longitude, location.latitude, size = amount,
color = station_type)) +
geom_point() + coord_equal() +
labs(x = "Longitude",
y = "Latitude",
title = "Surface Water Site Locations by Type",
subtitle = "Boulder, Colorado") +
labs(size = "Amount", colour = "Station Type")
## Warning: Removed 1 rows containing missing values (geom_point).
boulder <- get_stamenmap(bbox = c(left = -105.4969, bottom = 39.8995, right =
-104.9579, top = 40.1274), zoom = 10, messaging = FALSE, urlonly = FALSE,
filename = NULL, color = "color")
## Source : http://tile.stamen.com/terrain/10/211/387.png
## Source : http://tile.stamen.com/terrain/10/212/387.png
## Source : http://tile.stamen.com/terrain/10/213/387.png
## Source : http://tile.stamen.com/terrain/10/211/388.png
## Source : http://tile.stamen.com/terrain/10/212/388.png
## Source : http://tile.stamen.com/terrain/10/213/388.png
ggmap(boulder) +
geom_point(data = water_data_df, aes(location.longitude, location.latitude, size = amount,
color = factor(station_type)))
## Warning: Removed 44 rows containing missing values (geom_point).