1 Clear Workspace

rm(list = ls())

2 Load library

library(ggplot2)  # ggplot() fortify()
library(dplyr)  # %>% select() filter() bind_rows()
library(rgdal)  # readOGR() spTransform()
library(raster)  # intersect()
library(ggsn)  # north2() scalebar()
library(rworldmap)  # getMap()

3 Data input

vulture <- read.csv("Gyps_rueppellii_GBIF.csv", sep = "\t")
penguin <- read.csv("Spheniscus_dermersus_GBIF.csv", sep = "\t")

4 Clean data

# Keep only the columns we need
vars <- c("gbifid", "scientificname", "locality", "decimallongitude",
    "decimallatitude", "coordinateuncertaintyinmeters")
# `one_of()` allows selects all columns specified in `vars`
vulture_trim <- vulture %>% dplyr::select(one_of(vars))
penguin_trim <- penguin %>% dplyr::select(one_of(vars))
# Combine dataframes
pc_trim <- bind_rows(vulture_trim, penguin_trim)
str(pc_trim)
## 'data.frame':    5374 obs. of  6 variables:
##  $ gbifid                       : int  1424567323 1411752562 1411697229 1411652810 1411631368 1411629911 1411620724 1411619215 1411616203 1411614251 ...
##  $ scientificname               : chr  "Gyps rueppellii (A. E. Brehm, 1852)" "Gyps rueppellii (A. E. Brehm, 1852)" "Gyps rueppellii (A. E. Brehm, 1852)" "Gyps rueppellii (A. E. Brehm, 1852)" ...
##  $ locality                     : chr  "" "Maasai Mara NR--Siana Springs (formely Cottar's Camp)" "Masaai Mara NR--Mara River (North)" "Mara River--Mara River Camp" ...
##  $ decimallongitude             : num  29 35.4 35 35 39.8 ...
##  $ decimallatitude              : num  -23.04 -1.5 -1.33 -1.21 6.36 ...
##  $ coordinateuncertaintyinmeters: num  NA NA NA NA NA NA NA NA NA NA ...

One of the main reasons for using bind_rows over rbind is to combine two data frames having different number of columns. rbind throws an error in such a case whereas bind_rows assigns “NA” to those rows of columns missing in one of the data frames where the value is not provided by the data frames.

# Check species names are consistent
unique(pc_trim$scientificname)
## [1] "Gyps rueppellii (A. E. Brehm, 1852)"             
## [2] "Gyps rueppellii subsp. erlangeri Salvadori, 1908"
## [3] "Gyps rueppelli rueppelli"                        
## [4] "Spheniscus demersus (Linnaeus, 1758)"
# Clean up 'scientificname' to make names consistent
pc_trim$scientificname <- pc_trim$scientificname %>%
    recode("Gyps rueppellii (A. E. Brehm, 1852)" = "Gyps rueppellii",
             "Gyps rueppellii subsp. erlangeri Salvadori, 1908" = "Gyps rueppellii",
                 "Gyps rueppelli rueppelli" = "Gyps rueppellii",
                     "Spheniscus demersus (Linnaeus, 1758)" = "Spheniscus demersus")
# Checking names to ensure only two names are now present
unique(pc_trim$scientificname)
## [1] "Gyps rueppellii"     "Spheniscus demersus"

5 Plot

5.1 Preliminery Plot with ggplot

(prelim_plot <- ggplot(pc_trim, aes(x = decimallongitude, y = decimallatitude, 
    colour = scientificname)) +
    geom_point())

pc_trim_us <- pc_trim %>% filter(decimallongitude > -50)
(zoomed <- ggplot(pc_trim_us, aes(x = decimallongitude, y = decimallatitude, 
    colour = scientificname)) +
    geom_point())

5.2 ggplot & world map

# Get the world map 
world <- getMap(resolution = "low") # SpatialPolygonsDataFrame

world is a SpatialPolygonsDataFrame, a complex object type with specific slots for different types of data.

contains a dataframe with metadata for each polygon. Columns can be accessed like this: $REGION. contains coordinate data for all the polygons in the object, in the form of a list contains an integer vector specifying the order in which polygons should be drawn, to deal with holes and overlaps. contains the minimum and maximum x and y coordinates in which the polygons are found contains the Coordinate Reference System (CRS) for the polygons. A CRS specifies how the coordinates of the 2D map displayed on the computer screen are related to the real globe, which is roughly spherical. There are lot’s of different CRSs, used for maps of different scales, or of different parts of the globe (e.g. the poles vs. the equator) and it is important to keep them consistent amongst all the elements of your map.

# plot
(with_world <- ggplot() +
    geom_polygon(data = world, 
        aes(x = long, y = lat, group = group),
        fill = NA, colour = "black") + 
    geom_point(data = pc_trim_us,  # Add and plot species data
        aes(x = decimallongitude, y = decimallatitude, 
            colour = scientificname)) +
    coord_quickmap() +  # Prevents stretching when resizing
    theme_classic() +  # Remove ugly grey background
    xlab("Longitude") +
    ylab("Latitude") + 
    guides(colour=guide_legend(title="Species")))

5.2.1 Map with subsets of Contents

Using $ADMIN

# Make a vector of country names
saf_countries <- c("South Africa", "Namibia", "Botswana", "Zimbabwe")

# Call the vector in `borders()`
world_saf <- world[world@data$ADMIN %in% saf_countries, ]

%in% is a special R operator which matches multiple values in a vector, rather than just a single value like ==.

(southern_africa <- ggplot() +
    geom_polygon(data = world_saf, 
        aes(x = long, y = lat, group = group),
        fill = NA, colour = "black") + 
    geom_point(data = pc_trim_us,  # Add and plot species data
        aes(x = decimallongitude, y = decimallatitude, 
            colour = scientificname)) +
    coord_quickmap() + 
    xlim(8, 35) +  # Set x axis limits, xlim(min, max)
    ylim(-35, -15) +  # Set y axis limits
    theme_classic() +  # Remove ugly grey background
    xlab("Longitude") +
    ylab("Latitude") + 
    guides(colour=guide_legend(title="Species")))

6 Reference

SPATIAL DATA AND MAPS