knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message = FALSE, width = 50, height = 10)

# required libraries (install before)
require(dplyr)
require(tidyr)
require(ggplot2)
require(kable)
require(kableExtra)
require(plotly)
require(sf)
require(mapview)

# my plot defaults
require(ggthemes); 
theme_set(theme_bw(base_size = 9)); 
options(ggplot2.continuous.colour="viridis")
theme_update(line = element_line(),
               #element_blank(),linetype="dotted"
            strip.text = element_text(hjust = 0, vjust = 0),
            strip.background = element_rect(colour=NA, fill=NA),
            #panel.border = element_rect(fill = NA, color = "black"),
            panel.border = element_rect(colour = NA),
            axis.title = element_text(face = "bold",size = rel(1)),
            axis.title.y = element_text(angle=90,vjust =2),
            axis.title.x = element_text(vjust = -0.2),
            axis.text = element_text(), 
            axis.line = element_line(colour="black", size=0.3),
            axis.ticks = element_line(),
            panel.grid.major = element_line(colour="#f0f0f0")
               )

0.1 Motivation

This Rpub was done to make a soft introduction to r plotting to master students from International Master of Science in Marine Biological Resources (IMBRSea) - 2023 LEG3, and to give a couple of more sophisticated examples.

0.2 Frequencies and histograms

To make an histogram and a table, for example, we will simulate some data and then used it to plot.

# seed is used so we get the same data
set.seed(1234)

# generate some random data, or go to the field and weight 120 seahorses
x = round(rnorm(120,2,.5),2)

# make classes and calculate frequencies
x <- data.frame(x=x) %>%
  mutate(class = cut(x, round(seq(min(x)-.1, max(x),l=9),2))) %>%
  group_by(class) %>%
    summarise(n = n()) %>%
    mutate(totalN = (cumsum(n)),
         percent = round((n / sum(n))*100, 3),
         cumpercent = round(cumsum(freq = n / sum(n)),3)) 

# show the table
x %>%   
  kable() %>% 
  kable_classic(full=FALSE)
class n totalN percent cumpercent
(0.73,1.05] 2 2 1.667 0.017
(1.05,1.36] 6 8 5.000 0.067
(1.36,1.68] 31 39 25.833 0.325
(1.68,2] 37 76 30.833 0.633
(2,2.32] 21 97 17.500 0.808
(2.32,2.63] 12 109 10.000 0.908
(2.63,2.95] 7 116 5.833 0.967
(2.95,3.27] 4 120 3.333 1.000
# make the histogram
x %>% 
ggplot(aes(x=class, y=percent))+
  geom_histogram(stat="identity", fill="steelblue3")+
  geom_density(col = "red")+
  ylab("Percentage")+
  ylab("X")

0.3 Iris plots: faceting, stats and relationships

Several basic plots around the iris data set: faceted, with summary stats and relationships fitted.

# load data
data(iris)

# general simple plot
iris %>% 
  ggplot(aes(x=Sepal.Length, y=Sepal.Width, col=Species, size=Petal.Width))+
  geom_point(alpha=.5)

# faceted plots
iris %>% 
  ggplot(aes(x=Sepal.Length, y=Sepal.Width))+
  geom_point()+
  facet_wrap(~Species)

iris %>% 
  ggplot(aes(x=Sepal.Length, y=Sepal.Width))+
  geom_point()+
  facet_wrap(~Species, scales="free")

iris %>% 
  ggplot(aes(x=Sepal.Length, y=Sepal.Width, col=Petal.Length, size=Petal.Width))+
  geom_point(alpha=.5)+
  facet_wrap(~Species, scales="free")

# interactive plot
ggplotly(iris %>% 
  ggplot(aes(x=Sepal.Length, y=Sepal.Width, col=Petal.Length, size=Petal.Width))+
  geom_point(alpha=.5)+
  facet_wrap(~Species, scales="free"))
# summary stats
iris %>% 
  ggplot(aes(x=Species, y=Sepal.Width))+
    stat_summary(fun = mean) + 
    stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.1) 

# summary stats facet plots
iris %>% 
  gather(Measure, Value, Sepal.Length,Sepal.Width) %>% 
  ggplot(aes(x=Species, y=Value))+
    stat_summary(fun = mean) + 
    stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.1) +
  facet_wrap(~Measure, scales="free")

# make new factor level (multiple factors)
# named area and plot the average by both factors, Species and area
# summary stats facet plots: 2 factors
iris %>% 
  mutate(Area = factor(rep(1:3, 50), labels=c("Tjarno","Lisbon","Ghent"))) %>% 
  gather(Measure, Value, Sepal.Length,Sepal.Width) %>% 
  ggplot(aes(x=Species, y=Value, col=Area, shape=Area, group=Area))+
    stat_summary(fun = mean,
                 position=position_dodge(width=.3)) + 
    stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.1,
                 position=position_dodge(width=.3)) +
  facet_wrap(~Measure, scales="free")

# relationship facet plots: 2 factors
iris %>% 
  ggplot(aes(x=Sepal.Length, y=Sepal.Width, col=Species))+
  geom_point(alpha=.5)+
  geom_smooth(method="lm", se=F)

0.4 Basic maps

Make some basic maps.

# make simple map
data.frame(lat= c(58.87605, 58.87162, 58.87144), 
                  lon= c(11.14575, 11.14503, 11.11965)) %>% 
ggplot(aes(y=lat, x=lon))+
  borders("world", fill="grey90",colour="grey", alpha=.2)+
  coord_fixed(ylim=c(37,65), xlim=c(-10,14))+
  geom_point(size=2, col=2)

# get coordinates from Tjarno (I just do it with Google maps- quick and easy)
data.frame(lat= c(58.87605, 58.87162, 58.87144), 
                  lon= c(11.14575, 11.14503, 11.11965)) %>% 
  st_as_sf(coords=c("lon","lat")) %>% 
  st_set_crs(4326) %>% 
  mapview()
# be sure to play around with the button with layers, which is cool

0.5 Bathymetric example (not finished)

One example with bathymetry from NOAA.

# NOT RUNNING due to poor internet connection
# extract data from NOAA:
require(marmap)

# From the example file:
# plot with base graphics
system.time(a <- getNOAA.bathy(
  lon1=10, lon2=12, lat1=58, lat2=59.5, resolution=1))

# Image default
plot(a, image=TRUE, deep=-6000, shallow=0, step=10000)

0.6 Direct flights from Lisbon

This is an example of me trying to get the direct flights from LIS airport, closest than 4000km, using R.

# import flights table
head(kk <- read.csv("https://raw.githubusercontent.com/jpatokal/openflights/master/data/routes.dat", head=FALSE))
##   V1  V2  V3   V4  V5   V6 V7 V8  V9
## 1 2B 410 AER 2965 KZN 2990     0 CR2
## 2 2B 410 ASF 2966 KZN 2990     0 CR2
## 3 2B 410 ASF 2966 MRV 2962     0 CR2
## 4 2B 410 CEK 2968 KZN 2990     0 CR2
## 5 2B 410 CEK 2968 OVB 4078     0 CR2
## 6 2B 410 DME 4029 KZN 2990     0 CR2
names(kk) = c("IATA", "ICAO", "Source_IATA","Source","Destin_IATA", "Destin", "Codeshare", "stops","Equipment")
head(kk)  
##   IATA ICAO Source_IATA Source Destin_IATA Destin Codeshare stops Equipment
## 1   2B  410         AER   2965         KZN   2990               0       CR2
## 2   2B  410         ASF   2966         KZN   2990               0       CR2
## 3   2B  410         ASF   2966         MRV   2962               0       CR2
## 4   2B  410         CEK   2968         KZN   2990               0       CR2
## 5   2B  410         CEK   2968         OVB   4078               0       CR2
## 6   2B  410         DME   4029         KZN   2990               0       CR2
# select flights from Lisbon airport
dim(kk <- kk %>% 
  filter(Source_IATA=="LIS", !is.na(Destin_IATA),
         !is.na(Source_IATA)) %>% 
    distinct(Destin_IATA))
## [1] 103   1
# extract city code for destinies
dim(kk1 <- read.csv("https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat", head=FALSE))
## [1] 7698   14
names(kk1) <- c("AirportID","Name","City","Country","IATA", "ICAO","Latitude","Longitude","Altitude", "Timezone")
dim(kk1 <- kk1[!kk1$IATA=="\\N",])
## [1] 6072   14
head(kk1)
##   AirportID                                        Name         City
## 1         1                              Goroka Airport       Goroka
## 2         2                              Madang Airport       Madang
## 3         3                Mount Hagen Kagamuga Airport  Mount Hagen
## 4         4                              Nadzab Airport       Nadzab
## 5         5 Port Moresby Jacksons International Airport Port Moresby
## 6         6                 Wewak International Airport        Wewak
##            Country IATA ICAO  Latitude Longitude Altitude Timezone NA
## 1 Papua New Guinea  GKA AYGA -6.081690   145.392     5282       10  U
## 2 Papua New Guinea  MAG AYMD -5.207080   145.789       20       10  U
## 3 Papua New Guinea  HGU AYMH -5.826790   144.296     5388       10  U
## 4 Papua New Guinea  LAE AYNZ -6.569803   146.726      239       10  U
## 5 Papua New Guinea  POM AYPY -9.443380   147.220      146       10  U
## 6 Papua New Guinea  WWK AYWK -3.583830   143.669       19       10  U
##                     NA      NA          NA
## 1 Pacific/Port_Moresby airport OurAirports
## 2 Pacific/Port_Moresby airport OurAirports
## 3 Pacific/Port_Moresby airport OurAirports
## 4 Pacific/Port_Moresby airport OurAirports
## 5 Pacific/Port_Moresby airport OurAirports
## 6 Pacific/Port_Moresby airport OurAirports
kk1[kk1$IATA=="LIS",]
##      AirportID                                              Name   City
## 1596      1638 Humberto Delgado Airport (Lisbon Portela Airport) Lisbon
##       Country IATA ICAO Latitude Longitude Altitude Timezone NA            NA
## 1596 Portugal  LIS LPPT  38.7813  -9.13592      374        0  E Europe/Lisbon
##           NA          NA
## 1596 airport OurAirports
# add city name to flights data table: Destin_IATA==V5
# left_join(kk, kk1, by=c("Destin_IATA"=="IATA"))
dim(kk <- data.frame(kk, kk1[match(kk$Destin_IATA, kk1$IATA),1:10]))
## [1] 103  11
head(kk)
##      Destin_IATA AirportID                                    Name         City
## 341          CGN       344                    Cologne Bonn Airport      Cologne
## 347          STR       350                       Stuttgart Airport    Stuttgart
## 1682         KIV      1735          Chişinău International Airport     Chisinau
## 503          LHR       507                 London Heathrow Airport       London
## 3553         PHL      3752      Philadelphia International Airport Philadelphia
## 1347         CDG      1382 Charles de Gaulle International Airport        Paris
##             Country IATA ICAO Latitude  Longitude Altitude Timezone
## 341         Germany  CGN EDDK  50.8659   7.142740      302        1
## 347         Germany  STR EDDS  48.6899   9.221960     1276        1
## 1682        Moldova  KIV LUKK  46.9277  28.931000      399        2
## 503  United Kingdom  LHR EGLL  51.4706  -0.461941       83        0
## 3553  United States  PHL KPHL  39.8719 -75.241096       36       -5
## 1347         France  CDG LFPG  49.0128   2.550000      392        1
# Add distance
# Great circles as a SpatialLines object
library(geosphere)
kk$dist=NA
for (i in (1:dim(kk)[1])) { 
kk$dist.km[i] <- distHaversine(c(kk$Latitude[i], kk$Longitude[i]), 
              c(kk1[kk1$IATA=="LIS","Latitude"],kk1[kk1$IATA=="LIS","Longitude"]))/1000
}


##################################################
## MAPS
##################################################

## using the example
# https://www.gis-blog.com/flight-connection-map-with-r/

# create basemap for EU flights
require(maps)
bbox_europe <- st_bbox(c(xmin = -10, ymin = 20, xmax = 50, ymax = 80))
map("world", fill=T, col="grey8", bg="grey15", ylim=c(20,80), xlim=c(-10,50))

# overlay airports (direct flights destinies from LIS)
points(kk$Longitude ,kk$Latitude, pch=3, cex=0.1, col="chocolate1")

# make a flight conection map
require(geosphere)
for (i in (1:dim(kk)[1])) { 
inter <- gcIntermediate(
  c(kk1[kk1$IATA=="LIS","Longitude"], kk1[kk1$IATA=="LIS","Latitude"]), 
  c(kk$Longitude[i], kk$Latitude[i]), n=200)
lines(inter, lwd=0.01, col="turquoise2")    
}

# create basemap for WORLD flights
require(maps)
map("world", fill=T, col="grey8", bg="grey15")

# overlay airports (direct flights destinies from LIS)
points(kk$Longitude ,kk$Latitude, pch=3, cex=0.1, col="chocolate1")

# make a flight conection map
require(geosphere)
for (i in (1:dim(kk)[1])) { 
inter <- gcIntermediate(
  c(kk1[kk1$IATA=="LIS","Longitude"], kk1[kk1$IATA=="LIS","Latitude"]), 
  c(kk$Longitude[i], kk$Latitude[i]), n=200)
lines(inter, lwd=0.01, col="turquoise2")    
}

# better with city names
# https://medium.com/@shaofeij/visualize-your-flight-log-using-r-and-ggplot-e3bf35c5f5e1
# https://www.r-bloggers.com/2017/03/create-air-travel-route-maps-in-ggplot-a-visual-travel-diary/
require(ggrepel)
worldmap <- borders("world", colour="lightblue", fill="lightblue")
bbox_europe <- st_bbox(c(xmin = -10, ymin = 20, xmax = 50, ymax = 80))

ggplot() + worldmap + theme_void() +
  geom_curve(data = kk %>% filter(dist.km<4000), aes(
    x = kk1[kk1$IATA=="LIS","Longitude"], 
    y = kk1[kk1$IATA=="LIS","Latitude"], 
    xend = Longitude, 
    yend = Latitude), 
    col = "#62a8e5", size = .4, curvature = -.3) +
  geom_point(data = kk, aes(x = Longitude, y = Latitude), col = "#00244d") + 
  geom_text_repel(data=kk, aes(x = Longitude, y = Latitude, label = City), col = "#00244d", size = 2.5, segment.color = NA, box.padding = .1, point.padding = .1)+
  coord_fixed(ylim=c(20,70), xlim=c(-30,40))

# ggsave("AirMap.png", width = 10.8, height = 7.2, dpi = 355)

# results table
kk %>% 
  select(City, dist.km) %>% 
  arrange(dist.km) %>% 
  top_n(-20) %>% 
  kbl(caption = "Distance from Lisbon (direct flihgts only, in km)") %>%
  kable_classic(full_width = F, html_font = "Cambria")
Distance from Lisbon (direct flihgts only, in km)
City dist.km
Faro 234.0813
Porto 275.9672
Sevilla 391.0802
Tanger 491.9375
La Coruna 504.4995
Malaga 566.2203
Casablanca 620.5688
Madrid 647.9751
Marrakech 799.1065
Bilbao 854.6082
Valencia 966.5814
Porto Santo 1014.1042
Funchal 1076.0260
Bordeaux 1152.2082
Nantes 1249.5525
Barcelona 1279.1923
Toulouse 1286.5533
Gran Canaria 1369.4065
Algier 1394.3673
Cork 1438.3768