Playing with mapping packages and options for plotting fish abundance along a stream network.
Use raster::raster() to import Digital Elevation Model (DEM) raster. This should be the path to the folder containing the .aud, .aux, .xml, and other files defining the DEM.
Use the readOGR() function from the rgdal package to import the shapefiles (polylines and points) that define the stream network and sampling locations.
#library(maps)
library(sp)
library(raster)
library(rgdal)
## rgdal: version: 0.9-2, (SVN revision 526)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.9.2, released 2012/10/08
## Path to GDAL shared files: /Users/Dan/Library/R/3.1/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480]
## Path to PROJ.4 shared files: /Users/Dan/Library/R/3.1/library/rgdal/proj
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:raster':
##
## intersect, select, union
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(gridExtra)
## Loading required package: grid
#library(ggmap)
# load data
dem <- raster(x = "Data/layers/demwhite")
flowlines <- rgdal::readOGR(dsn = "Data/layers/flowlinesWhite", layer = "flowlinesWhite")
## OGR data source with driver: ESRI Shapefile
## Source: "Data/layers/flowlinesWhite", layer: "flowlinesWhite"
## with 335 features
## It has 6 fields
sites <- rgdal::readOGR(dsn = "Data/layers/pointsWhite", layer = "pointsWhite")
## OGR data source with driver: ESRI Shapefile
## Source: "Data/layers/pointsWhite", layer: "pointsWhite"
## with 24 features
## It has 12 fields
load( "Data/White_River_Network.RData") # family defines spatial relationship in network
colnames(family)[1] = "child_name"
family = cbind( family, "child_b"=1:nrow(family) )
Make sure projections are the same across all data
# get data in same projection
print(proj4string(dem))
## [1] "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs"
print(proj4string(sites))
## [1] "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
flowlines_prj <- spTransform(flowlines, crs(projection(dem)))
sites_prj <- spTransform(sites, crs(projection(dem)))
# str(flowlines_prj)
Make default plots with flowlines and sample sites overlaid on the raster.
# basic plot
plot(dem)
lines(flowlines_prj)
points(sites_prj, pch = 20, cex = 0.8, col = 'red')
This is pretty nice for a plot that took just a couple lines of simple code. However, it has a number of limitations and poor aesthetics. But before getting to the details of making the figure look nice, I want to color the flowlines (stream) to reflect abundance across the network. I will use data simulated with spatial autocorrelation and some stochasticity.
# Add abundance data to flowlines
load("Data/sim_9_st_1_sites_359_years_20.RData")
# make dataframe of abundance across network
df_N <- data.frame(child_name = rep(family$child_name, times = max(network$t_i)), year = network$t_i, N = network$N_i, N_hat = mod$Report$N_ip[ , 1], stringsAsFactors = FALSE)
# Add abundance and year to flowlines data
flowlines_prj@data <- dplyr::left_join(as.data.frame(flowlines_prj@data, stringsAsFactors = FALSE), df_N[which(df_N$year == 1), ], by = c("nodeID" = "child_name"))
# Set colors related to abundance (N_hat)
colors <- colorRampPalette(c('blue','red'))(max(flowlines_prj@data$N_hat))
# Make plot with colors
plot(dem, col = gray.colors(10, start = 0.3, end = 0.9, gamma = 2.2, alpha = NULL))
plot(flowlines_prj, col = colors, add = TRUE)
This plot looks pretty nice with relatively little work. Unfortunately, however I linked the abundance values to the flowline colors didn’t work correctly. It’s possible that I’d have to do a left_join to get the colors linked to abundance rather than position in the network. I do like that the size of the stream increases with drainage area. However, there doesn’t seem to be an easy way to adjust the white space to the sides of the figure and it might not be easy to get the colors associated with abundance.
Given the challenges with base plots and my desire to add additional features, I will try with ggplot2.
Prepare data for ggplot:
# convert flowlines to dataframe for ggplot2
flowlines_f <- ggplot2::fortify(flowlines_prj)
# convert dem to dataframe for ggplot
dem_df <- data.frame(rasterToPoints(dem), stringsAsFactors = FALSE)
colnames(dem_df) <- c("Longitude", "Latitude", "Elevation")
# add id to flowlines data
flowlines_df <- data.frame(id=rownames(flowlines_prj@data),
flowlines_prj@data,
stringsAsFactors=F)
# combine flowlines data to include both spatial info and other covariate info, then limit to the first year of data
flowlines_df <- left_join(flowlines_f, flowlines_df, by = "id") %>%
dplyr::filter(year == 1)
Make a series of ggplots using different combinations of backgrounds and flowline colors and scales. Then use extraGrid package to combine them as panels in one figure for comparison. This is also the method I would use to compare multiple years of spatiotemporal data if facet_wrap didn’t work.
# ggplot
g <- ggplot(dem_df, aes(Longitude, Latitude)) +
geom_raster(aes(fill = Elevation)) + scale_fill_gradient(low = "black", high = "white")
my_breaks = c(2, 10, 25, 50, 100, 250)
g1 <- g + geom_path(data = flowlines_df, aes(x=long, y=lat, group = group, colour = N_hat), size = 1) + scale_color_gradient("Abundance", trans = "log", low = "light blue", high = "dark blue", breaks = my_breaks, labels = my_breaks) + theme_bw()
# alt colors
g2 <- g + geom_path(data = flowlines_df, aes(x=long, y=lat, group = group, colour = N_hat), size = 1) + scale_color_gradient("Abundance", trans = "log", low = "red", high = "orange") + theme_bw()
# alt scale
g3 <- g + geom_path(data = flowlines_df, aes(x=long, y=lat, group = group, colour = N_hat), size = 1) + scale_colour_gradient("Abundance", low = "light blue", high = "dark blue", breaks = my_breaks, labels = my_breaks) + theme_bw()
# alt colors
g4 <- g + geom_path(data = flowlines_df, aes(x=long, y=lat, group = group, colour = N_hat), size = 1) + scale_colour_gradient2("Abundance", trans = "log", breaks = my_breaks, labels = my_breaks) + theme_bw()
# try to get broader range of colors
g5 <- g + geom_path(data = flowlines_df, aes(x=long, y=lat, group = group, colour = N_hat), size = 1) + scale_color_gradient("Abundance", trans = "sqrt", low = "yellow", high = "red") + theme_bw()
# remove DEM to make more clear on small scale
g6 <- ggplot(dem_df, aes(Longitude, Latitude)) + geom_path(data = flowlines_df, aes(x=long, y=lat, group = group, colour = N_hat), size = 1) + scale_color_gradient("Abundance", trans = "log", breaks = my_breaks, labels = my_breaks) + theme_bw()
# plot together for comparison
grid.arrange(g1, g2, g3, g4, g5, g6, ncol = 2)
These look good and may each be useful for different purposes and sizes. Using the log scale will be better for visualizing changes in spatial patterns over time on small figures, whereas the true scale is more appropriate for visualizing abundance patterns within a network at a given time (in a larger single plot).
These plots are a bit distorted, so adding coord_equal(ratio=1) should straighten things out. Also, the Latitude and Longitude are not in a very readable form. To fix this, I will change the projection.
new_crs <- "+proj=longlat + ellps=WGS84 +towgs84=0,0,0"
dem <- projectRaster(dem, crs = new_crs)
flowlines_prj <- spTransform(flowlines, crs(projection(dem)))
sites_prj <- spTransform(sites, crs(projection(dem)))
# Add abundance and year to flowlines data
flowlines_prj@data <- dplyr::left_join(as.data.frame(flowlines_prj@data, stringsAsFactors = FALSE), df_N[which(df_N$year == 1), ], by = c("nodeID" = "child_name"))
## Warning: joining character vector and factor, coercing into character
## vector
# convert flowlines to dataframe for ggplot2
flowlines_f <- ggplot2::fortify(flowlines_prj)
# convert dem to dataframe for ggplot
dem_df <- data.frame(rasterToPoints(dem), stringsAsFactors = FALSE)
colnames(dem_df) <- c("Longitude", "Latitude", "Elevation")
# add id to flowlines data
flowlines_df <- data.frame(id=rownames(flowlines_prj@data),
flowlines_prj@data,
stringsAsFactors=F)
# combine flowlines data to include both spatial info and other covariate info, then limit to the first year of data
flowlines_df <- left_join(flowlines_f, flowlines_df, by = "id") %>%
dplyr::filter(year == 1)
Now change background to be less distracting and be more mid-toned so allow for broader range of colors from light to dark without getting lost in the background.
p1 <- ggplot(dem_df, aes(Longitude, Latitude)) + geom_raster(fill = "dark grey") + geom_path(data = flowlines_df, aes(x=long, y=lat, group = group, colour = N_hat), size = 0.8) + scale_color_gradientn("Abundance", trans = "log", colours = c("dark blue", "blue", "light blue", "cyan"), breaks = my_breaks, labels = my_breaks) + coord_equal(ratio=1) + theme_bw()
p2 <- ggplot(dem_df, aes(Longitude, Latitude)) + geom_raster(fill = "dark grey") + geom_path(data = flowlines_df, aes(x=long, y=lat, group = group, colour = N_hat), size = 0.8) + scale_color_gradient("Abundance", trans = "log", low = "cyan", high = "blue", breaks = my_breaks, labels = my_breaks) + coord_equal(ratio=1) + theme_bw()
p3 <- ggplot(dem_df, aes(Longitude, Latitude)) + geom_raster(fill = "dark grey") + geom_path(data = flowlines_df, aes(x=long, y=lat, group = group, colour = N_hat), size = 0.8) + scale_color_gradient("Abundance", trans = "log", low = "blue", high = "cyan", breaks = my_breaks, labels = my_breaks) + coord_equal(ratio=1) + theme_bw()
p4 <- ggplot(dem_df, aes(Longitude, Latitude)) + geom_raster(fill = "dark grey") + geom_path(data = flowlines_df, aes(x=long, y=lat, group = group, colour = N_hat), size = 0.8) + scale_color_gradient("Abundance", trans = "log", low = "dark blue", high = "cyan", breaks = my_breaks, labels = my_breaks) + coord_equal(ratio=1) + theme_bw()
p5 <- ggplot(dem_df, aes(Longitude, Latitude)) + geom_path(data = flowlines_df, aes(x=long, y=lat, group = group, colour = N_hat), size = 0.8) + scale_color_gradient("Abundance", trans = "log", low = "dark blue", high = "cyan", breaks = my_breaks, labels = my_breaks) + coord_equal(ratio=1) + theme_bw()
p6 <- ggplot(dem_df, aes(Longitude, Latitude)) + geom_path(data = flowlines_df, aes(x=long, y=lat, group = group, colour = N_hat), size = 0.8) + scale_color_gradient("Abundance", trans = "log", low = "cyan", high = "blue", breaks = my_breaks, labels = my_breaks) + coord_equal(ratio=1) + theme_bw()
grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 2, widths = c(4, 4), heights = c(4,4,4))
These plots look better at this scale. I also tried going from white to blue but most of the plot always ended up with an ugly color of purple. Adding cyan instead of white was most effective for showing spatial differences across the network. The Latitude and Longitude are also more human readable.
When doing a panel of plots by year I would add year as a title or try it via facet_grid and I would only have a single legend.
The one thing that looked much better on the base plot was the size of the stream. In a future iteration I may try to link size to drainage area.