library(magrittr)
library(ggplot2)
library(dplyr)
library(tidyr)
library(viridis)
library(readr)
library(ggmap)
library(gridExtra)
library(choroplethr)
library(choroplethrMaps)
library(tigris)
library(sp)
library(stringr)
library(GISTools)
library(raster)
library(grid)
setwd("C:/Users/Steve/Data-Science-Toolbox/Advanced_R")
us_map <- map_data("state")
us_map %>%
filter(region %in% c("north carolina", "south carolina")) %>%
ggplot(aes(x = long, y = lat, group = group)) +
geom_path()
us_map %>%
filter(region %in% c("north carolina", "south carolina")) %>%
ggplot(aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "lightblue", color = "black")
us_map %>%
filter(region %in% c("north carolina", "south carolina")) %>%
ggplot(aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "lightblue", color = "black") +
theme_void()
us_map %>%
ggplot(aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "lightblue", color = "black") +
theme_void()
data(votes.repub)
votes.repub %>%
tbl_df() %>%
mutate(state = rownames(votes.repub),
state = tolower(state)) %>%
right_join(us_map, by = c("state" = "region")) %>%
ggplot(aes(x = long, y = lat, group = group, fill = `1976`)) +
geom_polygon(color = "black") +
theme_void() +
scale_fill_viridis(name = "Republican\nvotes (%)")
serial <- read_csv(paste0("https://raw.githubusercontent.com/",
"dgrtwo/serial-ggvis/master/input_data/",
"serial_podcast_data/serial_map_data.csv"))
serial <- serial %>%
mutate(long = -76.8854 + 0.00017022 * x,
lat = 39.23822 + 1.371014e-04 * y,
tower = Type == "cell-site")
maryland <- map_data('county', region = 'maryland')
baltimore <- maryland %>%
filter(subregion %in% c("baltimore city", "baltimore"))
ggplot(baltimore, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "lightblue", color = "black") +
geom_point(data = serial, aes(group = NULL, color = tower)) +
theme_void() +
scale_color_manual(name = "Cell tower", values = c("black", "red"))
beijing <- get_map("Beijing", zoom = 12)
ggmap(beijing) +
theme_void() +
ggtitle("Beijing, China")
map_1 <- get_map("Estes Park", zoom = 12,
source = "google", maptype = "terrain") %>%
ggmap(extent = "device")
map_2 <- get_map("Estes Park", zoom = 12,
source = "stamen", maptype = "watercolor") %>%
ggmap(extent = "device")
map_3 <- get_map("Estes Park", zoom = 12,
source = "google", maptype = "hybrid") %>%
ggmap(extent = "device")
grid.arrange(map_1, map_2, map_3, nrow = 1)
get_map(c(2.35, 48.86), zoom = 10) %>%
ggmap(extent = "device")
geocode("Supreme Court of the United States")
## lon lat
## 1 NA NA
geocode("1 First St NE, Washington, DC")
## lon lat
## 1 -77.00465 38.89051
mapdist("Baltimore, MD",
"1 First St NE, Washington, DC") %>%
select(from, to, miles)
## from to miles
## 1 Baltimore, MD 1 First St NE, Washington, DC 37.41636
data(df_pop_county)
county_choropleth(df_pop_county)
county_choropleth(df_pop_county, state_zoom = c("colorado", "wyoming"))
county_choropleth(df_pop_county, state_zoom = c("north carolina"),
reference_map = TRUE)
floyd_events <- read_csv("./Advanced R/data/floyd_events.csv")
floyd_events %>%
group_by(fips) %>%
dplyr::summarize(n_events = n()) %>%
mutate(fips = as.numeric(fips)) %>%
dplyr::rename(region = fips,
value = n_events) %>%
county_choropleth(state_zoom = c("north carolina", "virginia"),
reference_map = TRUE)
floyd_track <- read_csv("./Advanced R/data/floyd_track.csv")
floyd_events %>%
dplyr::group_by(fips) %>%
dplyr::summarize(flood = sum(grepl("Flood", type))) %>%
dplyr::mutate(fips = as.numeric(fips)) %>%
dplyr::rename(region = fips,
value = flood) %>%
county_choropleth(state_zoom = c("north carolina", "maryland",
"delaware", "new jersey",
"virginia", "south carolina",
"pennsylvania", "new york",
"connecticut", "massachusetts",
"new hampshire", "vermont",
"maine", "rhode island"),
reference_map = TRUE) +
geom_path(data = floyd_track, aes(x = -longitude, y = latitude,
group = NA),
color = "red")
floyd_floods <- floyd_events %>%
filter(grepl("Flood", type)) %>%
mutate(fips = as.numeric(fips)) %>%
group_by(fips) %>%
dplyr::summarize(value = 1) %>%
ungroup() %>%
dplyr::rename(region = fips)
floyd_map <- CountyChoropleth$new(floyd_floods)
floyd_map$set_zoom(c("north carolina", "maryland",
"delaware", "new jersey",
"virginia", "south carolina",
"pennsylvania", "new york",
"connecticut", "massachusetts",
"new hampshire", "vermont",
"maine", "rhode island"))
floyd_map$render()
floyd_map$add_state_outline <- TRUE
floyd_map$ggplot_scale <- scale_fill_manual(values = c("yellow"),
guide = FALSE)
floyd_xlim <- floyd_map$get_bounding_box()[c(1, 3)]
floyd_ylim <- floyd_map$get_bounding_box()[c(2, 4)]
a <- floyd_map$render() +
geom_path(data = floyd_track, aes(x = -longitude, y = latitude,
group = NA),
color = "red", size = 2, alpha = 0.6) +
xlim(floyd_map$get_bounding_box()[c(1, 3)]) +
ylim(floyd_map$get_bounding_box()[c(2, 4)])
b <- floyd_map$render_with_reference_map() +
geom_path(data = floyd_track, aes(x = -longitude, y = latitude,
group = NA),
color = "red", size = 2, alpha = 0.6) +
xlim(floyd_xlim) +
ylim(floyd_ylim)
grid.arrange(a, b, ncol = 2)
denver_tracts <- tracts(state = "CO", county = 31, cb = TRUE)
plot(denver_tracts)
bbox(denver_tracts)
## min max
## x -105.10993 -104.60030
## y 39.61443 39.91425
roads <- primary_roads()
plot(denver_tracts, col = "lightblue")
plot(roads, add = TRUE, col = "darkred")
denver_tracts_df <- fortify(denver_tracts)
denver_tracts_df %>%
dplyr::select(1:4) %>% dplyr::slice(1:5)
## long lat order hole
## 1 -105.0532 39.78353 1 FALSE
## 2 -105.0502 39.78483 2 FALSE
## 3 -105.0477 39.78488 3 FALSE
## 4 -105.0439 39.78351 4 FALSE
## 5 -105.0436 39.78347 5 FALSE
Once its converted we can use it in ggplot2
denver_tracts_df %>%
ggplot(aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "lightblue", color = "black") +
theme_void()
usamap <- map_data("state") %>%
ggplot(aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "black")
map_1 <- usamap + coord_map() + ggtitle("default")
map_2 <- usamap + coord_map("gilbert") + ggtitle("+ coord_map('gilbert')")
map_3 <- usamap + coord_map("conic", lat0 = 30) +
ggtitle("+ coord_map('conic', lat0 = 30)")
grid.arrange(map_1, map_2, map_3, ncol = 1)
load("./Advanced R/data/fars_colorado.RData")
driver_data %>%
dplyr::select(1:5) %>% dplyr::slice(1:5)
## state st_case county date latitude
## 1 8 80001 51 2001-01-01 10:00:00 39.10972
## 2 8 80002 31 2001-01-04 19:00:00 39.68215
## 3 8 80003 31 2001-01-03 07:00:00 39.63500
## 4 8 80004 31 2001-01-05 20:00:00 39.71304
## 5 8 80005 29 2001-01-05 10:00:00 39.09733
map_data("county", region = "Colorado") %>%
ggplot(aes(x = long, y = lat, group = subregion)) +
geom_polygon(color = "gray", fill = NA) +
theme_void() +
geom_point(data = driver_data,
aes(x = longitud, y = latitude, group = NULL),
alpha = 0.5, size = 0.7)
Summarize that by county
county_accidents <- driver_data %>%
dplyr::mutate(county = str_pad(county, width = 3,
side = "left", pad = "0")) %>%
tidyr::unite(region, state, county, sep = "") %>%
dplyr::group_by(region) %>%
dplyr::summarize(value = n()) %>%
dplyr::mutate(region = as.numeric(region))
county_accidents %>% slice(1:4)
## # A tibble: 4 x 2
## region value
## <dbl> <int>
## 1 8001 617
## 2 8003 77
## 3 8005 522
## 4 8007 41
county_choropleth(county_accidents, state_zoom = "colorado")
Now, we want to count up how many of these accidents occurred in each of the census tract shapes we pulled from the US Census earlier in this section using the tigris package. We can do this using the poly.counts function from GISTools package. However, before we can use that function, we need to make sure both objects are spatial objects, rather than dataframes, and that they have the same CRS.
The census tract data is already in a spatial object. We can change put the denver_fars object in a spatial object by resetting its coordinates attribute to specify which of its columns show longitude and latitude. Next, we need to specify what CRS the data has. Because it is coming from a dataframe of longitude and latitude data, the WSG84 system should be reasonable:
denver_fars <- driver_data %>%
filter(county == 31)
denver_fars_sp <- denver_fars
coordinates(denver_fars_sp) <- c("longitud", "latitude")
proj4string(denver_fars_sp) <- CRS("+init=epsg:4326")
To be able to pair up polygons and points, the spatial objects need to have the same CRS. To help later with calculating the area of each polygon, we’ll use a projected CRS that is reasonable for Colorado and reproject the spatial data using the spTransform function:
denver_tracts_proj <- spTransform(denver_tracts,
CRS("+init=epsg:26954"))
denver_fars_proj <- spTransform(denver_fars_sp,
CRS(proj4string(denver_tracts_proj)))
Drill down to just Denver
denver_fars <- driver_data %>%
filter(county == 31)
denver_fars_sp <- denver_fars
coordinates(denver_fars_sp) <- c("longitud", "latitude")
proj4string(denver_fars_sp) <- CRS("+init=epsg:4326")
denver_tracts_proj <- spTransform(denver_tracts,
CRS("+init=epsg:26954"))
denver_fars_proj <- spTransform(denver_fars_sp,
CRS(proj4string(denver_tracts_proj)))
plot(denver_tracts_proj)
plot(denver_fars_proj, add = TRUE, col = "red", pch = 1)
Now count the accidents by census tract and remap
tract_counts <- poly.counts(denver_fars_proj, denver_tracts_proj)
choropleth(denver_tracts, tract_counts)
poly.areas calculates the area of each polygon so you can get teh rate of accidents per population in the Denver census tracts thus:
choropleth(denver_tracts,
tract_counts / poly.areas(denver_tracts_proj))
When mapping in R, you may also need to map raster data. You can think of raster data as data shown with pixels- the graphing region is divided into even squares, and color is constant within each square.
There is a function in the raster package that allows you to “rasterize” data. That is, you take spatial points data, divide the region into squares, and count the number of points (or other summary) within each square. When you do this, you need to set the x- and y-range for the raster squares. You can use bbox on a spatial object to get an idea of its ranges to help you specify these limits. You can use the res parameter in raster to set how large the raster boxes should be. For example, here is some code for rasterizing the accident data for Denver:
bbox(denver_fars_sp)
## min max
## longitud -105.10973 -104.0122
## latitude 39.61715 39.8381
denver_raster <- raster(xmn = -105.09, ymn = 39.60,
xmx = -104.71, ymx = 39.86,
res = 0.02)
den_acc_raster <- rasterize(geometry(denver_fars_sp),
denver_raster,
fun = "count")
image(den_acc_raster, col = terrain.colors(5))
We can just add that to the base polt of Denver
plot(denver_tracts)
plot(den_acc_raster, add = TRUE, alpha = 0.5)
Making a map in a map using base grid
balt_counties <- map_data("county", region = "maryland") %>%
mutate(our_counties = subregion %in% c("baltimore", "baltimore city"))
balt_map <- get_map("Baltimore County", zoom = 10) %>%
ggmap(extent = "device") +
geom_polygon(data = filter(balt_counties, our_counties == TRUE),
aes(x = long, y = lat, group = group),
fill = "red", color = "darkred", alpha = 0.2)
maryland_map <- balt_counties %>%
ggplot(aes(x = long, y = lat, group = group, fill = our_counties)) +
geom_polygon(color = "black") +
scale_fill_manual(values = c("white", "darkred"), guide = FALSE) +
theme_void() +
coord_map()
grid.draw(ggplotGrob(balt_map))
md_inset <- viewport(x = 0, y = 0,
just = c("left", "bottom"),
width = 0.35, height = 0.35)
pushViewport(md_inset)
grid.draw(rectGrob(gp = gpar(alpha = 0.5, col = "white")))
grid.draw(rectGrob(gp = gpar(fill = NA, col = "black")))
grid.draw(ggplotGrob(maryland_map))
popViewport()