rm(list = ls())library(ggplot2) # ggplot() fortify()
library(dplyr) # %>% select() filter() bind_rows()
library(rgdal) # readOGR() spTransform()
library(raster) # intersect()
library(ggsn) # north2() scalebar()
library(rworldmap) # getMap()vulture <- read.csv("Gyps_rueppellii_GBIF.csv", sep = "\t")
penguin <- read.csv("Spheniscus_dermersus_GBIF.csv", sep = "\t")# Keep only the columns we need
vars <- c("gbifid", "scientificname", "locality", "decimallongitude",
"decimallatitude", "coordinateuncertaintyinmeters")# `one_of()` allows selects all columns specified in `vars`
vulture_trim <- vulture %>% dplyr::select(one_of(vars))
penguin_trim <- penguin %>% dplyr::select(one_of(vars))# Combine dataframes
pc_trim <- bind_rows(vulture_trim, penguin_trim)
str(pc_trim)## 'data.frame': 5374 obs. of 6 variables:
## $ gbifid : int 1424567323 1411752562 1411697229 1411652810 1411631368 1411629911 1411620724 1411619215 1411616203 1411614251 ...
## $ scientificname : chr "Gyps rueppellii (A. E. Brehm, 1852)" "Gyps rueppellii (A. E. Brehm, 1852)" "Gyps rueppellii (A. E. Brehm, 1852)" "Gyps rueppellii (A. E. Brehm, 1852)" ...
## $ locality : chr "" "Maasai Mara NR--Siana Springs (formely Cottar's Camp)" "Masaai Mara NR--Mara River (North)" "Mara River--Mara River Camp" ...
## $ decimallongitude : num 29 35.4 35 35 39.8 ...
## $ decimallatitude : num -23.04 -1.5 -1.33 -1.21 6.36 ...
## $ coordinateuncertaintyinmeters: num NA NA NA NA NA NA NA NA NA NA ...
One of the main reasons for using bind_rows over rbind is to combine two data frames having different number of columns. rbind throws an error in such a case whereas bind_rows assigns “NA” to those rows of columns missing in one of the data frames where the value is not provided by the data frames.
# Check species names are consistent
unique(pc_trim$scientificname)## [1] "Gyps rueppellii (A. E. Brehm, 1852)"
## [2] "Gyps rueppellii subsp. erlangeri Salvadori, 1908"
## [3] "Gyps rueppelli rueppelli"
## [4] "Spheniscus demersus (Linnaeus, 1758)"
# Clean up 'scientificname' to make names consistent
pc_trim$scientificname <- pc_trim$scientificname %>%
recode("Gyps rueppellii (A. E. Brehm, 1852)" = "Gyps rueppellii",
"Gyps rueppellii subsp. erlangeri Salvadori, 1908" = "Gyps rueppellii",
"Gyps rueppelli rueppelli" = "Gyps rueppellii",
"Spheniscus demersus (Linnaeus, 1758)" = "Spheniscus demersus")# Checking names to ensure only two names are now present
unique(pc_trim$scientificname)## [1] "Gyps rueppellii" "Spheniscus demersus"
(prelim_plot <- ggplot(pc_trim, aes(x = decimallongitude, y = decimallatitude,
colour = scientificname)) +
geom_point())pc_trim_us <- pc_trim %>% filter(decimallongitude > -50)(zoomed <- ggplot(pc_trim_us, aes(x = decimallongitude, y = decimallatitude,
colour = scientificname)) +
geom_point())# Get the world map
world <- getMap(resolution = "low") # SpatialPolygonsDataFrameworld is a SpatialPolygonsDataFrame, a complex object type with specific slots for different types of data.
world@data contains a dataframe with metadata for each polygon. Columns can be accessed like this: world@data$REGION. world@polygons contains coordinate data for all the polygons in the object, in the form of a list world@plotOrder contains an integer vector specifying the order in which polygons should be drawn, to deal with holes and overlaps. world@bbox contains the minimum and maximum x and y coordinates in which the polygons are found world@proj4string contains the Coordinate Reference System (CRS) for the polygons. A CRS specifies how the coordinates of the 2D map displayed on the computer screen are related to the real globe, which is roughly spherical. There are lot’s of different CRSs, used for maps of different scales, or of different parts of the globe (e.g. the poles vs. the equator) and it is important to keep them consistent amongst all the elements of your map.
# plot
(with_world <- ggplot() +
geom_polygon(data = world,
aes(x = long, y = lat, group = group),
fill = NA, colour = "black") +
geom_point(data = pc_trim_us, # Add and plot species data
aes(x = decimallongitude, y = decimallatitude,
colour = scientificname)) +
coord_quickmap() + # Prevents stretching when resizing
theme_classic() + # Remove ugly grey background
xlab("Longitude") +
ylab("Latitude") +
guides(colour=guide_legend(title="Species")))Using world@data$ADMIN
# Make a vector of country names
saf_countries <- c("South Africa", "Namibia", "Botswana", "Zimbabwe")
# Call the vector in `borders()`
world_saf <- world[world@data$ADMIN %in% saf_countries, ]%in% is a special R operator which matches multiple values in a vector, rather than just a single value like ==.
(southern_africa <- ggplot() +
geom_polygon(data = world_saf,
aes(x = long, y = lat, group = group),
fill = NA, colour = "black") +
geom_point(data = pc_trim_us, # Add and plot species data
aes(x = decimallongitude, y = decimallatitude,
colour = scientificname)) +
coord_quickmap() +
xlim(8, 35) + # Set x axis limits, xlim(min, max)
ylim(-35, -15) + # Set y axis limits
theme_classic() + # Remove ugly grey background
xlab("Longitude") +
ylab("Latitude") +
guides(colour=guide_legend(title="Species")))