knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message = FALSE, width = 50, height = 10)
# required libraries (install before)
require(dplyr)
require(tidyr)
require(ggplot2)
require(kable)
require(kableExtra)
require(plotly)
require(sf)
require(mapview)
# my plot defaults
require(ggthemes);
theme_set(theme_bw(base_size = 9));
options(ggplot2.continuous.colour="viridis")
theme_update(line = element_line(),
#element_blank(),linetype="dotted"
strip.text = element_text(hjust = 0, vjust = 0),
strip.background = element_rect(colour=NA, fill=NA),
#panel.border = element_rect(fill = NA, color = "black"),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black", size=0.3),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0")
)
This Rpub was done to make a soft introduction to r plotting to
master students from International Master of Science in Marine
Biological Resources (IMBRSea) - 2023 LEG3, and to give a couple of more
sophisticated examples.
To make an histogram and a table, for example, we will simulate some
data and then used it to plot.
# seed is used so we get the same data
set.seed(1234)
# generate some random data, or go to the field and weight 120 seahorses
x = round(rnorm(120,2,.5),2)
# make classes and calculate frequencies
x <- data.frame(x=x) %>%
mutate(class = cut(x, round(seq(min(x)-.1, max(x),l=9),2))) %>%
group_by(class) %>%
summarise(n = n()) %>%
mutate(totalN = (cumsum(n)),
percent = round((n / sum(n))*100, 3),
cumpercent = round(cumsum(freq = n / sum(n)),3))
# show the table
x %>%
kable() %>%
kable_classic(full=FALSE)
| class | n | totalN | percent | cumpercent |
|---|---|---|---|---|
| (0.73,1.05] | 2 | 2 | 1.667 | 0.017 |
| (1.05,1.36] | 6 | 8 | 5.000 | 0.067 |
| (1.36,1.68] | 31 | 39 | 25.833 | 0.325 |
| (1.68,2] | 37 | 76 | 30.833 | 0.633 |
| (2,2.32] | 21 | 97 | 17.500 | 0.808 |
| (2.32,2.63] | 12 | 109 | 10.000 | 0.908 |
| (2.63,2.95] | 7 | 116 | 5.833 | 0.967 |
| (2.95,3.27] | 4 | 120 | 3.333 | 1.000 |
# make the histogram
x %>%
ggplot(aes(x=class, y=percent))+
geom_histogram(stat="identity", fill="steelblue3")+
geom_density(col = "red")+
ylab("Percentage")+
ylab("X")
Several basic plots around the iris data set: faceted, with summary
stats and relationships fitted.
# load data
data(iris)
# general simple plot
iris %>%
ggplot(aes(x=Sepal.Length, y=Sepal.Width, col=Species, size=Petal.Width))+
geom_point(alpha=.5)
# faceted plots
iris %>%
ggplot(aes(x=Sepal.Length, y=Sepal.Width))+
geom_point()+
facet_wrap(~Species)
iris %>%
ggplot(aes(x=Sepal.Length, y=Sepal.Width))+
geom_point()+
facet_wrap(~Species, scales="free")
iris %>%
ggplot(aes(x=Sepal.Length, y=Sepal.Width, col=Petal.Length, size=Petal.Width))+
geom_point(alpha=.5)+
facet_wrap(~Species, scales="free")
# interactive plot
ggplotly(iris %>%
ggplot(aes(x=Sepal.Length, y=Sepal.Width, col=Petal.Length, size=Petal.Width))+
geom_point(alpha=.5)+
facet_wrap(~Species, scales="free"))
# summary stats
iris %>%
ggplot(aes(x=Species, y=Sepal.Width))+
stat_summary(fun = mean) +
stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.1)
# summary stats facet plots
iris %>%
gather(Measure, Value, Sepal.Length,Sepal.Width) %>%
ggplot(aes(x=Species, y=Value))+
stat_summary(fun = mean) +
stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.1) +
facet_wrap(~Measure, scales="free")
# make new factor level (multiple factors)
# named area and plot the average by both factors, Species and area
# summary stats facet plots: 2 factors
iris %>%
mutate(Area = factor(rep(1:3, 50), labels=c("Tjarno","Lisbon","Ghent"))) %>%
gather(Measure, Value, Sepal.Length,Sepal.Width) %>%
ggplot(aes(x=Species, y=Value, col=Area, shape=Area, group=Area))+
stat_summary(fun = mean,
position=position_dodge(width=.3)) +
stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.1,
position=position_dodge(width=.3)) +
facet_wrap(~Measure, scales="free")
# relationship facet plots: 2 factors
iris %>%
ggplot(aes(x=Sepal.Length, y=Sepal.Width, col=Species))+
geom_point(alpha=.5)+
geom_smooth(method="lm", se=F)
Make some basic maps.
# make simple map
data.frame(lat= c(58.87605, 58.87162, 58.87144),
lon= c(11.14575, 11.14503, 11.11965)) %>%
ggplot(aes(y=lat, x=lon))+
borders("world", fill="grey90",colour="grey", alpha=.2)+
coord_fixed(ylim=c(37,65), xlim=c(-10,14))+
geom_point(size=2, col=2)
# get coordinates from Tjarno (I just do it with Google maps- quick and easy)
data.frame(lat= c(58.87605, 58.87162, 58.87144),
lon= c(11.14575, 11.14503, 11.11965)) %>%
st_as_sf(coords=c("lon","lat")) %>%
st_set_crs(4326) %>%
mapview()
# be sure to play around with the button with layers, which is cool
One example with bathymetry from NOAA.
# NOT RUNNING due to poor internet connection
# extract data from NOAA:
require(marmap)
# From the example file:
# plot with base graphics
system.time(a <- getNOAA.bathy(
lon1=10, lon2=12, lat1=58, lat2=59.5, resolution=1))
# Image default
plot(a, image=TRUE, deep=-6000, shallow=0, step=10000)
This is an example of me trying to get the direct flights from LIS
airport, closest than 4000km, using R.
# import flights table
head(kk <- read.csv("https://raw.githubusercontent.com/jpatokal/openflights/master/data/routes.dat", head=FALSE))
## V1 V2 V3 V4 V5 V6 V7 V8 V9
## 1 2B 410 AER 2965 KZN 2990 0 CR2
## 2 2B 410 ASF 2966 KZN 2990 0 CR2
## 3 2B 410 ASF 2966 MRV 2962 0 CR2
## 4 2B 410 CEK 2968 KZN 2990 0 CR2
## 5 2B 410 CEK 2968 OVB 4078 0 CR2
## 6 2B 410 DME 4029 KZN 2990 0 CR2
names(kk) = c("IATA", "ICAO", "Source_IATA","Source","Destin_IATA", "Destin", "Codeshare", "stops","Equipment")
head(kk)
## IATA ICAO Source_IATA Source Destin_IATA Destin Codeshare stops Equipment
## 1 2B 410 AER 2965 KZN 2990 0 CR2
## 2 2B 410 ASF 2966 KZN 2990 0 CR2
## 3 2B 410 ASF 2966 MRV 2962 0 CR2
## 4 2B 410 CEK 2968 KZN 2990 0 CR2
## 5 2B 410 CEK 2968 OVB 4078 0 CR2
## 6 2B 410 DME 4029 KZN 2990 0 CR2
# select flights from Lisbon airport
dim(kk <- kk %>%
filter(Source_IATA=="LIS", !is.na(Destin_IATA),
!is.na(Source_IATA)) %>%
distinct(Destin_IATA))
## [1] 103 1
# extract city code for destinies
dim(kk1 <- read.csv("https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat", head=FALSE))
## [1] 7698 14
names(kk1) <- c("AirportID","Name","City","Country","IATA", "ICAO","Latitude","Longitude","Altitude", "Timezone")
dim(kk1 <- kk1[!kk1$IATA=="\\N",])
## [1] 6072 14
head(kk1)
## AirportID Name City
## 1 1 Goroka Airport Goroka
## 2 2 Madang Airport Madang
## 3 3 Mount Hagen Kagamuga Airport Mount Hagen
## 4 4 Nadzab Airport Nadzab
## 5 5 Port Moresby Jacksons International Airport Port Moresby
## 6 6 Wewak International Airport Wewak
## Country IATA ICAO Latitude Longitude Altitude Timezone NA
## 1 Papua New Guinea GKA AYGA -6.081690 145.392 5282 10 U
## 2 Papua New Guinea MAG AYMD -5.207080 145.789 20 10 U
## 3 Papua New Guinea HGU AYMH -5.826790 144.296 5388 10 U
## 4 Papua New Guinea LAE AYNZ -6.569803 146.726 239 10 U
## 5 Papua New Guinea POM AYPY -9.443380 147.220 146 10 U
## 6 Papua New Guinea WWK AYWK -3.583830 143.669 19 10 U
## NA NA NA
## 1 Pacific/Port_Moresby airport OurAirports
## 2 Pacific/Port_Moresby airport OurAirports
## 3 Pacific/Port_Moresby airport OurAirports
## 4 Pacific/Port_Moresby airport OurAirports
## 5 Pacific/Port_Moresby airport OurAirports
## 6 Pacific/Port_Moresby airport OurAirports
kk1[kk1$IATA=="LIS",]
## AirportID Name City
## 1596 1638 Humberto Delgado Airport (Lisbon Portela Airport) Lisbon
## Country IATA ICAO Latitude Longitude Altitude Timezone NA NA
## 1596 Portugal LIS LPPT 38.7813 -9.13592 374 0 E Europe/Lisbon
## NA NA
## 1596 airport OurAirports
# add city name to flights data table: Destin_IATA==V5
# left_join(kk, kk1, by=c("Destin_IATA"=="IATA"))
dim(kk <- data.frame(kk, kk1[match(kk$Destin_IATA, kk1$IATA),1:10]))
## [1] 103 11
head(kk)
## Destin_IATA AirportID Name City
## 341 CGN 344 Cologne Bonn Airport Cologne
## 347 STR 350 Stuttgart Airport Stuttgart
## 1682 KIV 1735 Chişinău International Airport Chisinau
## 503 LHR 507 London Heathrow Airport London
## 3553 PHL 3752 Philadelphia International Airport Philadelphia
## 1347 CDG 1382 Charles de Gaulle International Airport Paris
## Country IATA ICAO Latitude Longitude Altitude Timezone
## 341 Germany CGN EDDK 50.8659 7.142740 302 1
## 347 Germany STR EDDS 48.6899 9.221960 1276 1
## 1682 Moldova KIV LUKK 46.9277 28.931000 399 2
## 503 United Kingdom LHR EGLL 51.4706 -0.461941 83 0
## 3553 United States PHL KPHL 39.8719 -75.241096 36 -5
## 1347 France CDG LFPG 49.0128 2.550000 392 1
# Add distance
# Great circles as a SpatialLines object
library(geosphere)
kk$dist=NA
for (i in (1:dim(kk)[1])) {
kk$dist.km[i] <- distHaversine(c(kk$Latitude[i], kk$Longitude[i]),
c(kk1[kk1$IATA=="LIS","Latitude"],kk1[kk1$IATA=="LIS","Longitude"]))/1000
}
##################################################
## MAPS
##################################################
## using the example
# https://www.gis-blog.com/flight-connection-map-with-r/
# create basemap for EU flights
require(maps)
bbox_europe <- st_bbox(c(xmin = -10, ymin = 20, xmax = 50, ymax = 80))
map("world", fill=T, col="grey8", bg="grey15", ylim=c(20,80), xlim=c(-10,50))
# overlay airports (direct flights destinies from LIS)
points(kk$Longitude ,kk$Latitude, pch=3, cex=0.1, col="chocolate1")
# make a flight conection map
require(geosphere)
for (i in (1:dim(kk)[1])) {
inter <- gcIntermediate(
c(kk1[kk1$IATA=="LIS","Longitude"], kk1[kk1$IATA=="LIS","Latitude"]),
c(kk$Longitude[i], kk$Latitude[i]), n=200)
lines(inter, lwd=0.01, col="turquoise2")
}
# create basemap for WORLD flights
require(maps)
map("world", fill=T, col="grey8", bg="grey15")
# overlay airports (direct flights destinies from LIS)
points(kk$Longitude ,kk$Latitude, pch=3, cex=0.1, col="chocolate1")
# make a flight conection map
require(geosphere)
for (i in (1:dim(kk)[1])) {
inter <- gcIntermediate(
c(kk1[kk1$IATA=="LIS","Longitude"], kk1[kk1$IATA=="LIS","Latitude"]),
c(kk$Longitude[i], kk$Latitude[i]), n=200)
lines(inter, lwd=0.01, col="turquoise2")
}
# better with city names
# https://medium.com/@shaofeij/visualize-your-flight-log-using-r-and-ggplot-e3bf35c5f5e1
# https://www.r-bloggers.com/2017/03/create-air-travel-route-maps-in-ggplot-a-visual-travel-diary/
require(ggrepel)
worldmap <- borders("world", colour="lightblue", fill="lightblue")
bbox_europe <- st_bbox(c(xmin = -10, ymin = 20, xmax = 50, ymax = 80))
ggplot() + worldmap + theme_void() +
geom_curve(data = kk %>% filter(dist.km<4000), aes(
x = kk1[kk1$IATA=="LIS","Longitude"],
y = kk1[kk1$IATA=="LIS","Latitude"],
xend = Longitude,
yend = Latitude),
col = "#62a8e5", size = .4, curvature = -.3) +
geom_point(data = kk, aes(x = Longitude, y = Latitude), col = "#00244d") +
geom_text_repel(data=kk, aes(x = Longitude, y = Latitude, label = City), col = "#00244d", size = 2.5, segment.color = NA, box.padding = .1, point.padding = .1)+
coord_fixed(ylim=c(20,70), xlim=c(-30,40))
# ggsave("AirMap.png", width = 10.8, height = 7.2, dpi = 355)
# results table
kk %>%
select(City, dist.km) %>%
arrange(dist.km) %>%
top_n(-20) %>%
kbl(caption = "Distance from Lisbon (direct flihgts only, in km)") %>%
kable_classic(full_width = F, html_font = "Cambria")
| City | dist.km |
|---|---|
| Faro | 234.0813 |
| Porto | 275.9672 |
| Sevilla | 391.0802 |
| Tanger | 491.9375 |
| La Coruna | 504.4995 |
| Malaga | 566.2203 |
| Casablanca | 620.5688 |
| Madrid | 647.9751 |
| Marrakech | 799.1065 |
| Bilbao | 854.6082 |
| Valencia | 966.5814 |
| Porto Santo | 1014.1042 |
| Funchal | 1076.0260 |
| Bordeaux | 1152.2082 |
| Nantes | 1249.5525 |
| Barcelona | 1279.1923 |
| Toulouse | 1286.5533 |
| Gran Canaria | 1369.4065 |
| Algier | 1394.3673 |
| Cork | 1438.3768 |