# devtools::install_github('EcologicalTraitData/traitdataform')
rm(list=ls())
list.of.packages <- c("ggplot2","data.table","dplyr","tidyr","parallel","bdc","taxadb","traitdataform","pbapply","tidyverse","readxl","lme4","coefplot","sjPlot","sjmisc","effects","rgdal","maptools","rgeos","terra","MuMIn","rnaturalearthdata","lsmeans","GGally","tidyterra","httr","purrr","rlist","usethis","ggpubr","leaflet")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
sapply(list.of.packages, require, character.only = TRUE)
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'lme4' was built under R version 4.3.2
## Warning: package 'terra' was built under R version 4.3.2
## Warning: package 'tidyterra' was built under R version 4.3.2
## Warning: package 'ggpubr' was built under R version 4.3.2
## ggplot2 data.table dplyr tidyr
## TRUE TRUE TRUE TRUE
## parallel bdc taxadb traitdataform
## TRUE TRUE TRUE TRUE
## pbapply tidyverse readxl lme4
## TRUE TRUE TRUE TRUE
## coefplot sjPlot sjmisc effects
## TRUE TRUE TRUE TRUE
## rgdal maptools rgeos terra
## TRUE TRUE TRUE TRUE
## MuMIn rnaturalearthdata lsmeans GGally
## TRUE TRUE TRUE TRUE
## tidyterra httr purrr rlist
## TRUE TRUE TRUE TRUE
## usethis ggpubr leaflet
## TRUE TRUE TRUE
# Code to find accepted species names
# Before you can download the source code from github, make sure you have a personal github token
# run this:
# usethis::edit_r_environ()
# if you have no toke, create one following:
# 1. Generate on GitHub your personal token
# 1.1 Go to GitHub
# 2.1 In the right corner go to "Settings"
# 2.2 Then in the left part go to "Developer setting"
# 2.3 Select the option "Personal access tokens"
# 2.4 Select the option "Generate new token"
# 2.5 Copy your personal token
# run this to add the token to your .Renviron
# usethis::edit_r_environ()
# write GITHUB_PAT=YOUR_TOKEN
# Sys.getenv("GITHUB_PAT")
# source(here::here("R/fetchGHdata.R"))
#
# fetchGHdata(gh_account = "Bioshifts",
# repo = "bioshifts_v1_v2",
# path = "R/Source_code/Clean_names.R",
# output = here::here("R/Clean_names.R"))
#
# fetchGHdata(gh_account = "Bioshifts",
# repo = "bioshifts_v1_v2",
# path = "R/Source_code/Find_Sci_Names.R",
# output = here::here("R/Find_Sci_Names.R"))
source(here::here("R/Clean_names.R"))
source(here::here("R/Find_Sci_Names.R"))
source(here::here("R/mclapply_hack.R"))
##
## *** Microsoft Windows detected ***
##
## For technical reasons, the MS Windows version of mclapply()
## is implemented as a serial function instead of a parallel
## function.
##
## As a quick hack, we replace this serial version of mclapply()
## with a wrapper to parLapply() for this R session. Please see
##
## http://www.stat.cmu.edu/~nmv/2014/07/14/implementing-mclapply-on-windows
##
## for details.
# Malin's transformation for moving values slightly inward.
transform01 <- function(x) (x * (length(x) - 1) + 0.5) / (length(x))
# De Kort transformation
deKort_trans <- function(p){
p <- scale(p)
p <- (p - min(p, na.rm = T))/(max(p, na.rm = T)-min(p, na.rm = T))
return(p)
}
# Malin's transformation before a logit transformation
logit_trans <- function(p){
p <- transform01(p)
p <- log(p/(1-p))
return(p)
}
splist <- read.csv(here::here("Data/splist.csv"), header = T)
# remove duplicated sp_names
splist <- splist %>%
dplyr::filter(!duplicated(scientificName))%>%
mutate(Kingdom=kingdom,
Phylum=phylum,
Class=class,
Order=order,
Family=family)%>%
dplyr::select(scientificName,Kingdom,Phylum,Class,Order,Family,db)
splist$Genus <- sapply(splist$scientificName, function(x){
strsplit(x," ")[[1]][1]
})
if(!file.exists(here::here("data/bioshiftsv3_metadata.csv"))){
# # From Google drive
# drive_path <- readWindowsShortcut("G:/My Drive.lnk")$pathname
# bioshifts_path <- readWindowsShortcut(paste0(drive_path,"\\BIOSHIFTS Working Group.lnk"))$pathname
# shp_path <- here::here(bioshifts_path,("DATA/BIOSHIFTS_v3/CleanDatabasev3/ShapefilesBioShiftsv3"))
# all_shps <- list.files(shp_path,pattern = "dbf", full.names = TRUE)
# From local folder
all_shps <- list.files("C:/Users/brunn/ShadowDrive/CreateGeodatabaseBioShifts/Data/ShapefilesBioShiftsv3",
pattern = "shp", full.names = TRUE)
biov1_meta <- pblapply(all_shps, terra::vect)
biov1_meta <- pblapply(biov1_meta, data.frame)
biov1_meta <- data.table::rbindlist(biov1_meta)
write.csv(biov1_meta, here::here("data/bioshiftsv3_metadata.csv"),row.names = FALSE)
} else {
biov1_meta <- read.csv(here::here("data/bioshiftsv3_metadata.csv"))
}
biov1 <- read.csv(here::here("Data/biov1_fixednames.csv"), header = T)
# Fix references in biov1
biov1$sp_name_std_v1 <- gsub("_"," ",biov1$sp_name_std_v1)
# Select columns of interest
biov1 <- biov1 %>%
filter(!Type %in% c("BAT","LON")) %>%
dplyr::select(ID,Article_ID,Study_ID,
Type,Param,Trend,SHIFT,UNIT,DUR,
v.lat.mean,v.ele.mean,trend.mean,
START,END,Sampling,Uncertainty_Distribution,Uncertainty_Parameter,Nperiodes,
N,Grain_size,Data,ID.area,
Phylum,Class,Order,Family,Genus,sp_name_std_v1,
group,ECO,Hemisphere) %>% # select columns
mutate(
Type = case_when(
Type=="HOR" ~ "LAT",
TRUE ~ as.character(Type)),
Data = case_when(
Data=="occurence-based" ~ "occurrence-based",
TRUE ~ as.character(Data)),
spp = sp_name_std_v1,
SHIFT_abs = abs(SHIFT),
velocity = ifelse(Type == "LAT", v.lat.mean, v.ele.mean),
vel_sign = ifelse(velocity>0,"pos","neg"))
# add geospatial data
biov1_meta$ID <- biov1_meta$Name
biov1 <- biov1 %>%
filter(ID %in% biov1_meta$ID)
# merge
biov1 <- merge(biov1,biov1_meta,by="ID",all.y = TRUE) # all.y to keep only studies with geospatial data
# Get Extent (=Lat/Ele extent)
biov1$Extent <- biov1$LatExtentk
biov1$Extent[which(biov1$Type=="ELE")] <- biov1$EleExtentm[which(biov1$Type=="ELE")]
# Number of temporal units
biov1$NtempUnits <- biov1$Nperiodes
these <- which(biov1$NtempUnits > biov1$DUR)
biov1$NtempUnits[these] <- biov1$DUR[these]
# log NtempUnits
biov1$LogNtempUnits <- log(biov1$NtempUnits)
## group irregular sampling with multiple sampling
biov1$Sampling = ifelse(biov1$Sampling %in% c("IRR","MULTIPLE(continuous)"),"MULT", biov1$Sampling)
## Grain
biov1$Grain <- factor(biov1$Grain_size, levels = c("small", "moderate", "large","very_large"))
biov1$ContinuousGrain <-
as.numeric(
as.character(
ifelse(
biov1$Grain == "small", 1,
ifelse(biov1$Grain == "moderate",
2, #10,
ifelse(biov1$Grain == "large",
3, #100,
4))))) #we have a few very large in the full database
#harmonize column names
biov1$Quality <- ifelse(biov1$Uncertainty_Distribution %in% c("RESAMPLING"),"BALANCED",
ifelse(biov1$Uncertainty_Distribution %in% c("MODEL","MODEL+RESAMPLING(same)","RESAMPLING(same)+MODEL","RESAMPLING+MODEL"),"BALANCED",
ifelse(biov1$Uncertainty_Distribution %in% c("DETECTABILITY","RESAMPLING(same)+DETECTABILITY"),"BALANCED",
ifelse(biov1$Uncertainty_Distribution %in% c("RESAMPLING(same)"),"RESURVEYED",biov1$Uncertainty_Distribution))))
table(biov1$Quality)
##
## BALANCED RAW RESURVEYED
## 14050 9497 6522
biov1$PrAb <- ifelse(biov1$Data %in% c("occurence-based", "occurrence-based"),"occurrence","abundance")
table(biov1$PrAb)
##
## abundance occurrence
## 9544 20571
biov1 <- biov1 %>%
dplyr::filter(!is.na(sp_name_std_v1))
all(biov1$sp_name_std_v1 %in% splist$scientificName)
## [1] TRUE
# from continuous to categorical
q1=quantile(biov1$START,probs=c(0,0.25,0.5,0.75,1))
biov1$StartF=cut(biov1$START,breaks=q1,include.lowest=T)
q1=quantile(biov1$Extent ,probs=c(0,0.25,0.5,0.75,1))
biov1$ExtentF=cut(biov1$Extent,breaks=q1,include.lowest=T)
q1=quantile(biov1$ID.area,probs=c(0,0.25,0.5,0.75,1))
biov1$AreaF=cut(biov1$ID.area,breaks=q1,include.lowest=T)
q1=quantile(biov1$N,probs=c(0,0.25,0.5,0.75,1))
biov1$NtaxaF=cut(biov1$N,breaks=q1,include.lowest=T)
# add ID to obs
biov1$obs_ID <- paste0("S",1:nrow(biov1))
summary(biov1$velocity)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -5.8328 0.5415 2.0271 2.3071 2.8319 13.1259 1
summary(biov1$velocity[which(biov1$vel_sign=="pos")])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.009396 0.541507 2.055833 2.434770 2.831932 13.125885
# Class species as Terrestrial, Marine or Freshwater
Terv1 <- unique(biov1$sp_name_std_v1[which(biov1$ECO == "T")])
Terrestrials = unique(c(Terv1))
Mar1 <- unique(biov1$sp_name_std_v1[which(biov1$ECO == "M")])
Marine = unique(c(Mar1))
# Freshwater fish
FFishv1 <- unique(biov1$sp_name_std_v1[(biov1$Class == "Actinopterygii" | biov1$Class == "Cephalaspidomorphi") & biov1$ECO == "T"])
FFish = unique(c(FFishv1))
# Marine fish
MFishv1 <- unique(biov1$sp_name_std_v1[biov1$Class == "Actinopterygii" & biov1$ECO == "M"])
MFish = unique(c(MFishv1))
splist$ECO = NA
splist$ECO[which(splist$scientificName %in% Terrestrials)] <- "T"
splist$ECO[which(splist$scientificName %in% Marine)] <- "M"
splist$ECO[which(splist$scientificName %in% MFish)] <- "M"
biov1$ECO = NA
biov1$ECO[which(biov1$sp_name_std_v1 %in% Terrestrials)] <- "T"
biov1$ECO[which(biov1$sp_name_std_v1 %in% Marine)] <- "M"
biov1$ECO[which(biov1$sp_name_std_v1 %in% MFish)] <- "M"
splist$Group = NA
splist$Group[which(splist$Class == "Phaeophyceae")] <- "Chromista"
splist$Kingdom[which(splist$Class == "Phaeophyceae")] <- "Chromista"
splist$Group[which(splist$Phylum == "Rhodophyta")] <- "Seaweed"
splist$Kingdom[which(splist$Phylum == "Rhodophyta")] <- "Plantae"
splist$Group[which(splist$Family == "Elminiidae")] <- "Barnacles"
splist$Group[which(splist$Kingdom == "Bacteria")] <- "Bacteria"
splist$Group[which(splist$Class == "Holothuroidea")] <- "Sea cucumber"
splist$Group[which(splist$Class == "Aves")] <- "Bird"
splist$Group[which(splist$Class == "Insecta")] <- "Insect"
splist$Group[which(splist$Class == "Mammalia")] <- "Mammal"
splist$Group[which(splist$Class == "Arachnida")] <- "Spider"
splist$Kingdom[which(splist$Kingdom == "Viridiplantae")] <- "Plantae"
splist$Kingdom[which(splist$Phylum == "Tracheophyta")] <- "Plantae"
splist$Group[which(splist$Kingdom == "Plantae")] <- "Plant"
splist$Group[which(splist$Class == "Hydrozoa")] <- "Hydrozoa"
splist$Group[which(splist$Class == "Anthozoa")] <- "Sea anemones and corals"
splist$Group[which(splist$Class == "Polychaeta")] <- "Polychaetes"
splist$Group[which(splist$Phylum == "Mollusca")] <- "Molluscs"
splist$Group[which(splist$Class == "Malacostraca")] <- "Crustacean"
splist$Group[which(splist$Class == "Hexanauplia")] <- "Crustacean"
splist$Group[which(splist$Class == "Maxillopoda")] <- "Crustacean"
splist$Group[which(splist$Class == "Ostracoda")] <- "Crustacean"
splist$Group[which(splist$Class == "Branchiopoda")] <- "Crustacean"
splist$Group[which(splist$Class == "Asteroidea")] <- "Starfish"
splist$Group[which(splist$Class == "Ascidiacea")] <- "Ascidians tunicates and sea squirts"
splist$Class[which(splist$Class == "Actinopteri")] <- "Actinopterygii"
splist$Group[which(splist$Class == "Actinopterygii")] <- "Fish"
splist$Group[which(splist$Class == "Elasmobranchii")] <- "Fish"
splist$Group[which(splist$Order == "Perciformes")] <- "Fish"
splist$Group[which(splist$Class == "Chondrichthyes")] <- "Fish"
splist$Group[which(splist$Class == "Holocephali")] <- "Fish"
splist$Group[which(splist$Class == "Cephalaspidomorphi")] <- "Fish"
splist$Group[which(splist$Class == "Echinoidea")] <- "Sea urchin"
splist$Group[which(splist$Class == "Crinoidea")] <- "Crinoid"
splist$Group[which(splist$Class == "Holothuroidea")] <- "Sea cucumber"
splist$Group[which(splist$Class == "Reptilia")] <- "Reptile"
splist$Group[which(splist$Order == "Squamata")] <- "Reptile"
splist$Group[which(splist$Class == "Ophiuroidea")] <- "Brittle stars"
splist$Group[which(splist$Class == "Chilopoda")] <- "Centipedes"
splist$Group[which(splist$Class == "Diplopoda")] <- "Millipedes"
splist$Group[which(splist$Class == "Amphibia")] <- "Amphibian"
splist$Group[which(splist$Kingdom == "Fungi")] <- "Fungi"
splist$Group[which(splist$Order == "Balanomorpha")] <- "Barnacles"
splist$Group[which(splist$Phylum == "Nematoda")] <- "Nematodes"
splist$Group[which(splist$Class == "Myxini")] <- "Hagfish"
splist$Group[which(splist$Kingdom == "Chromista")] <- "Chromista"
splist$Family[which(splist$Genus == "Dendrocopus")] <- "Picidae"
######################################
biov1 <- merge(biov1[,-which(names(biov1) %in% c("Phylum","Class","Order","Family","Genus","Group","ECO"))],
splist[,c("Kingdom","Phylum","Class","Order","Family","Genus","Group","ECO","scientificName")],
by.x = "sp_name_std_v1", by.y = "scientificName",
all.x = T)
Use only latitudinal and elevational shifts
# v1
biov1 <- biov1 %>%
dplyr::filter(Type %in% c("ELE","LAT")) # Use LAT ELE shifts
# splist
sps <- unique(biov1$sp_name)
splist <- splist %>% dplyr::filter(scientificName %in% sps)
if(any(grep("sp[.]",biov1$sp_reported_name_v1))){
biov1 <- biov1 %>% dplyr::filter(!grepl("sp[.]",sp_reported_name_v1))
}
if(any(grep("sp[.]",biov1$sp_name_std_v1))){
biov1 <- biov1 %>% dplyr::filter(!grepl("sp[.]",sp_name_std_v1))
}
if(any(grep("cf[.]",biov1$sp_reported_name_v1))){
biov1 <- biov1 %>% dplyr::filter(!grepl("cf[.]",sp_reported_name_v1))
}
if(any(grep("cf[.]",biov1$sp_name_std_v1))){
biov1 <- biov1 %>% dplyr::filter(!grepl("cf[.]",sp_name_std_v1))
}
#remove freshwater fishes
biov1 <- biov1[-which((biov1$Class == "Actinopterygii" | biov1$Group == "Fish") & biov1$ECO=="T"),]
#remove marine birds
biov1 <- biov1[-which(biov1$Class == "Aves" & biov1$ECO=="M"),]
if(any(grep("sp[.]",splist$scientificName))){
splist <- splist %>% dplyr::filter(!grepl("sp[.]",scientificName))
}
if(any(grep("sp[.]",splist$scientificName))){
splist <- splist %>% dplyr::filter(!grepl("sp[.]",scientificName))
}
if(any(grep("cf[.]",splist$scientificName))){
splist <- splist %>% dplyr::filter(!grepl("cf[.]",scientificName))
}
if(any(grep("cf[.]",splist$scientificName))){
splist <- splist %>% dplyr::filter(!grepl("cf[.]",scientificName))
}
unique(biov1$Data)
## [1] "abundance-based" "occurrence-based"
biov1$Data[biov1$Data=="occurence-based"] <- "occurrence-based"
biov1$Data <- factor(biov1$Data, levels = unique(biov1$Data))
table(biov1$Data)
##
## abundance-based occurrence-based
## 9421 20393
unique(biov1$Sampling)
## [1] "TWO" "CONTINUOUS" "MULT"
biov1$Sampling = ifelse(biov1$Sampling %in% c("IRR","MULTIPLE(continuous)"),"MULTIPLE", biov1$Sampling)
biov1$Sampling <- ordered(biov1$Sampling,
levels = c("TWO","MULTIPLE","CONTINUOUS"))
table(biov1$Sampling)
##
## TWO MULTIPLE CONTINUOUS
## 25466 0 3105
unique(biov1$Grain_size)
## [1] "small" "large" "moderate" "very_large" NA
biov1$Grain_size <- ifelse(biov1$Grain_size %in% c("large","very_large"),"large",biov1$Grain_size)
biov1$Grain_size <- ordered(biov1$Grain_size,
levels = c("small","moderate","large"))
table(biov1$Grain_size)
##
## small moderate large
## 9740 10657 9416
unique(biov1$Uncertainty_Distribution)
## [1] "RESAMPLING(same)" "RAW"
## [3] "RESAMPLING" "MODEL"
## [5] "RESAMPLING+MODEL" "MODEL+RESAMPLING(same)"
## [7] "RESAMPLING(same)+DETECTABILITY" "RESAMPLING(same)+MODEL"
## [9] "DETECTABILITY"
biov1$Uncertainty_Distribution <- ifelse(biov1$Uncertainty_Distribution %in% c("RESAMPLING","RESAMPLING(same)"),"RESAMPLING",
ifelse(biov1$Uncertainty_Distribution %in% c("MODEL","MODEL+RESAMPLING(same)","RESAMPLING+MODEL"),"MODEL",
ifelse(biov1$Uncertainty_Distribution %in% c("DETECTABILITY","RESAMPLING(same)+DETECTABILITY"),"DETECTABILITY",
biov1$Uncertainty_Distribution)))
biov1$Uncertainty_Distribution <- ifelse(biov1$Uncertainty_Distribution == "RAW","OPPORTUNISTIC","PROCESSED")
table(biov1$Uncertainty_Distribution)
##
## OPPORTUNISTIC PROCESSED
## 9449 20365
#transform study area
biov1$ID.area <- log(biov1$ID.area)
This will be used to merge genetic data with bioshifts based on distance of observations
# # The input file geodatabase
# fgdb <- "C:/Users/brunn/nextCloud/bioshifts_v1_v2/v1/Study_Areas_v1/Study_Areas.gdb"
#
# # List all feature classes in a file geodatabase
# fc_list <- ogrListLayers(fgdb)
#
# # Get centroids
# cl <- makeCluster(detectCores()-1)
# clusterExport(cl, c("readOGR", "gCentroid", "fgdb"))
#
# centroids <- pblapply(fc_list, function(x){
# fc <- readOGR(dsn=fgdb,layer=x)
# c <- data.frame(gCentroid(fc))
# names(c) <- c("centroid.x", "centroid.y")
#
# geomet <- data.frame(terra::geom(terra::vect(fc)))
#
# max_lat <- order(geomet[,"y"], decreasing = T)[1]
# max_lat <- geomet[max_lat,c("x","y")]
# names(max_lat) <- c("max_lat.x", "max_lat.y")
#
# min_lat <- order(geomet[,"y"])[1]
# min_lat <- data.frame(geomet[min_lat,c("x","y")])
# names(min_lat) <- c("min_lat.x", "min_lat.y")
#
# cbind(c, max_lat, min_lat)
# }, cl = cl)
#
# stopCluster(cl)
#
# centroids <- do.call("rbind",centroids)
# centroids$ID <- fc_list
#
# write.csv(centroids, "Data/centroids_study_areas.csv", row.names = FALSE)
centroids <- read.csv(here::here("Data/centroids_study_areas.csv"))
biov1 <- merge(biov1, centroids, by = "ID")
Identify direction of shift
biov1 <- biov1 %>%
mutate(shift_sign = ifelse(SHIFT>0,"pos","neg"),
shift_vel_sign = paste0(shift_sign,vel_sign)) %>%
filter(!is.na(velocity))
ggplot(biov1, aes(x=shift_vel_sign))+
geom_bar()+
coord_flip()+
facet_wrap(Type~Param)
summary(biov1$velocity)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -5.8328 0.5415 2.0271 2.2860 2.7543 13.1259
summary(biov1$velocity[which(biov1$shift_vel_sign=="pospos")])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.009396 0.541507 2.073300 2.470806 2.831932 13.125880
summary(biov1$velocity[which(biov1$shift_vel_sign=="negneg")])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.79347 -2.30487 -1.78004 -1.59872 -0.58152 -0.02945
# calculate lags
# positive values are a lag (range shift lower smaller than expected or in opposite directions) and negative values are range shift larger than expected
biov1$lag <- biov1$velocity-biov1$SHIFT
position = which(biov1$vel_sign == "neg")
biov1$lag[position] <- biov1$lag[position] * -1 # Any negative velocity means the sign of lag has to shift.
# Lag 2
# When shift is in opposite sign of velocity, lag = abs(velocity)
biov1$lag2 = biov1$lag
position <- which(biov1$shift_vel_sign == "posneg" | biov1$shift_vel_sign == "negpos")
biov1$lag2[position] <- abs(biov1$velocity[position])
# Lag 3
# When shift is > velocity, lag = 0
biov1$lag3 = biov1$lag2
position <- which((biov1$shift_sign == "pos" & biov1$SHIFT > biov1$velocity) |
(biov1$shift_sign == "neg" & biov1$SHIFT < biov1$velocity))
biov1$lag3[position] <- 0
{
par(mfrow=c(2,2))
plot(lag~velocity, biov1)
plot(lag2~velocity, biov1)
plot(lag3~velocity, biov1)
}
ggplot(biov1, aes(x = Param, y=lag, fill = Param, color = Param))+
geom_boxplot(alpha = .5, outlier.shape = NA)+
scale_y_continuous(limits = quantile(biov1$lag2, c(0.1, 0.9), na.rm = T))+
facet_wrap(.~Type, scales = "free")
## Warning: Removed 8756 rows containing non-finite values (`stat_boxplot()`).
ggplot(biov1, aes(x = Param, y=lag2, fill = Param, color = Param))+
geom_boxplot(alpha = .5, outlier.shape = NA)+
scale_y_continuous(limits = quantile(biov1$lag2, c(0.1, 0.9), na.rm = T))+
facet_wrap(.~Type, scales = "free")
## Warning: Removed 5850 rows containing non-finite values (`stat_boxplot()`).
ggplot(biov1, aes(x = Param, y=log1p(lag3), fill = Param, color = Param))+
geom_boxplot(alpha = .5, outlier.shape = NA)+
# scale_y_continuous(limits = quantile(log1p(biov1$lag3), c(0.1, 0.9), na.rm = T))+
facet_wrap(.~Type, scales = "free")
ggplot(biov1, aes(x = velocity, y = SHIFT))+
geom_point(aes(color = lag2), alpha = .1)+
scale_color_viridis_c()+
geom_smooth(method = "lm")+
theme_classic()+
facet_wrap(Param~Type+ECO)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(biov1 %>%
filter(shift_vel_sign == "pospos" | shift_vel_sign == "negneg"),
aes(x = abs(velocity), y = abs(SHIFT)))+
geom_point(aes(color = lag2), alpha = .1)+
scale_color_viridis_c()+
geom_smooth(method = "lm")+
theme_classic()+
facet_wrap(Param~Type+ECO)
## `geom_smooth()` using formula = 'y ~ x'
# Fonseca et al. 2023
gen_div <- fread(here::here("Data/Fonseca_etal_2023_EvoLetters.txt"))
gen_div$spp <- gen_div$Species
gen_div$spp <- gsub("-"," ",gen_div$spp)
gen_div$spp <- gsub("_"," ",gen_div$spp)
gen_div$spp <- Clean_Names(gen_div$spp,
return_gen_sps = TRUE)
##############################
# Remove species identified to the genus level or cf. species
all_sps <- unique(gen_div$spp)
length(all_sps)
spgen <- sapply(all_sps, function(x){
tmp. <- strsplit(x, " ")[[1]]
any(tmp. == "sp" | tmp. == "spp" | grepl("sp[.]",tmp.) | grepl("spp[.]",tmp.) | tmp. == "cf[.]" | tmp. == "cf" | length(tmp.) == 1 | length(tmp.) > 2)
})
spgen <- which(spgen)
spgen <- all_sps[-spgen]
all(spgen %in% all_sps)
all_sps <- all_sps[which(all_sps %in% spgen)]
length(all_sps) # N species after filtering species identified to the species level
# filter dataset
gen_div <- gen_div[which(gen_div$spp %in% all_sps),]
length(unique(gen_div$spp)) # 35219
# Species to find
mycols <- c("reported_name_fixed","scientificName","kingdom","phylum","class","order","family","db_code")
sps_accepted_names <- data.frame(matrix(ncol = length(mycols), nrow = length(unique(all_sps))))
names(sps_accepted_names) <- mycols
sps_accepted_names$reported_name_fixed <- unique(all_sps)
tofind_ <- sps_accepted_names[which(is.na(sps_accepted_names$scientificName)),]
tofind_ <- unique(tofind_$reported_name_fixed)
tofind <- data.frame(matrix(nrow = length(tofind_), ncol = 8))
names(tofind) = c("scientificName", "kingdom", "phylum", "class", "order", "family", "db", "db_code")
tofind <- data.frame(species = tofind_, tofind)
tofind <- tofind %>%
mutate(across(everything(), as.character))
# retrieve sp names
sp_names_found <- Find_Sci_Names(sp_name = tofind$species)
# ----------------
# Summary
# ----------------
#
# N taxa:
# 36697
# N taxa found:
# |db | N|
# |:----|-----:|
# |GBIF | 35903|
# |ITIS | 76|
# |NCBI | 278|
# N taxa not found:
# 440
## Add found species names to the sps_accepted_names
all(sp_names_found$requested_name %in% sps_accepted_names$reported_name_fixed)
for(i in 1:length(sp_names_found$requested_name)){
tofill <- unique(which(sps_accepted_names$reported_name_fixed == sp_names_found$requested_name[i]))
sps_accepted_names$scientificName[tofill] <- sp_names_found$scientificName[i]
sps_accepted_names$kingdom[tofill] <- sp_names_found$kingdom[i]
sps_accepted_names$phylum[tofill] <-sp_names_found$phylum[i]
sps_accepted_names$class[tofill] <- sp_names_found$class[i]
sps_accepted_names$order[tofill] <- sp_names_found$order[i]
sps_accepted_names$family[tofill] <- sp_names_found$family[i]
sps_accepted_names$db[tofill] <- sp_names_found$db[i]
sps_accepted_names$db_code[tofill] <- sp_names_found$db_code[i]
sps_accepted_names$method[tofill] <- sp_names_found$method[i]
}
sps_accepted_names$spp = sps_accepted_names$reported_name_fixed
## Keep original taxa information for those species we could not find a name at GBIF
pos <- which(is.na(sps_accepted_names$scientificName))
sps_accepted_names$scientificName[pos] <- sps_accepted_names$spp[pos]
for(i in 1:nrow(gen_div)){
gen_div$spp_new[i] <- sps_accepted_names$scientificName[which(sps_accepted_names$spp == gen_div$spp[i])]
}
write.csv(gen_div,
here::here('Data/Fonseca_etal_2023_EvoLetters_harmo.txt'), row.names = FALSE)
gen_div <- fread(here::here('Data/Fonseca_etal_2023_EvoLetters_harmo.txt'))
N_obs <- gen_div %>%
group_by(spp) %>%
tally() %>%
dplyr::filter(n>1)
# total N
length(unique(gen_div$spp))
## [1] 35156
# total N with > 1 observation
nrow(N_obs)
## [1] 57
# these species
DT::datatable(merge(gen_div[,c("spp","Species")], N_obs, by = "spp"))
The duplicates comes from the grouping of species names in the original data. Fix by deleting F. fusca complex and T. caespitum x T. immigrans.
gen_div <- gen_div %>% filter(!Species %in% c("Formica-fusca_complex", "Tetramorium-caespitum_x_Tetramorium_immigrans"))
# Gen
gen_sps <- unique(gen_div$spp_new)
bsi_v1 <- intersect(biov1$spp, gen_sps)
# Break down
tot1 <- data.frame(N_species_with_gen_data = length(gen_sps),
v1_matches = length(bsi_v1))
tot1
## N_species_with_gen_data v1_matches
## 1 34702 2960
# N_species_with_gen_data v1_matches
# 34702 2982
# Select only species in Bioshifts
gen_div <- gen_div %>% dplyr::filter(spp %in% bsi_v1)
toplot_ <- gen_div %>%
group_by(Group) %>%
dplyr::summarise(Obs = length(spp),
Species = length(unique(spp))) %>%
gather(var, N, -c(Group))
ggplot(toplot_, aes(x = Group, y = N))+
geom_bar(stat="identity")+
geom_text(aes(y=N, label=N,
hjust = ifelse(N<max(N),-.1,1.1)), vjust=0.2, size=3,
position = position_dodge(0.9))+
theme_classic()+
coord_flip()+
facet_wrap(.~var, scales = "free", ncol=1)
unique(gen_div$Group)
## [1] "Actinopterygii" "Insecta" "Arachnida" "Magnoliopsida"
## [5] "Aves" "Mammalia" "Amphibia" "Reptilia"
## [9] "Pinopsida" "Polypodiopsida" "Liliopsida" "Bryopsida"
## [13] "Lycopodiopsida" "Elasmobranchii" "Psilotopsida" "Holocephali"
## [17] "Florideophyceae"
to_plot <- gen_div %>%
select(Group,Nucleotide_diversity, TajimasD) %>%
filter(Group %in% c("Actinopterygii","Insecta","Aves", "Magnoliopsida", "Mammalia")) %>%
gather(metric, value, -Group)
ggplot(to_plot, aes(value, fill = Group, color = Group))+
geom_density(alpha = .3)+
scale_color_viridis_d()+
scale_fill_viridis_d()+
facet_wrap(.~metric, scales = "free")
toplot <- gen_div %>%
mutate(Hemisphere = ifelse(Latitude<0, "South", "North"),
lat_abs = abs(Latitude),
Nucleotide_diversity = scale(Nucleotide_diversity),
TajimasD = scale(TajimasD))
ggplot(toplot, aes(x = lat_abs, y = Nucleotide_diversity))+
geom_point(alpha = .1)+
stat_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
mod1 <- lm(Nucleotide_diversity~lat_abs, toplot)
summary(mod1)
##
## Call:
## lm(formula = Nucleotide_diversity ~ lat_abs, data = toplot)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2743 -0.5904 -0.3532 0.1754 6.7954
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.528347 0.079678 6.631 3.96e-11 ***
## lat_abs -0.011234 0.001648 -6.815 1.14e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9923 on 2912 degrees of freedom
## Multiple R-squared: 0.0157, Adjusted R-squared: 0.01536
## F-statistic: 46.44 on 1 and 2912 DF, p-value: 1.142e-11
ggplot(toplot, aes(x = lat_abs, y = Nucleotide_diversity, color = Group))+
geom_point(alpha = .1)+
stat_smooth(method = "lm")+
facet_wrap(.~Group,scales = "free")
## `geom_smooth()` using formula = 'y ~ x'
## Warning in qt((1 - level)/2, df): NaNs produced
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
mod1 <- lm(Nucleotide_diversity~lat_abs*Group, toplot)
emtrends(mod1, ~ lat_abs*Group, var = "lat_abs")
## lat_abs Group lat_abs.trend SE df lower.CL upper.CL
## 47 Actinopterygii -0.02001 0.00540 2882 -0.0306 -0.00941
## 47 Amphibia -0.01105 0.01918 2882 -0.0487 0.02656
## 47 Arachnida -0.00856 0.01281 2882 -0.0337 0.01655
## 47 Aves -0.02014 0.00329 2882 -0.0266 -0.01368
## 47 Bryopsida 0.00515 0.03441 2882 -0.0623 0.07262
## 47 Elasmobranchii 0.00251 0.01710 2882 -0.0310 0.03605
## 47 Florideophyceae nonEst NA NA NA NA
## 47 Holocephali 0.00842 0.10358 2882 -0.1947 0.21152
## 47 Insecta -0.00824 0.00251 2882 -0.0132 -0.00331
## 47 Liliopsida 0.00274 0.00670 2882 -0.0104 0.01587
## 47 Lycopodiopsida -0.00808 0.08012 2882 -0.1652 0.14902
## 47 Magnoliopsida -0.00205 0.00522 2882 -0.0123 0.00819
## 47 Mammalia -0.02341 0.01687 2882 -0.0565 0.00966
## 47 Pinopsida 0.00341 0.04155 2882 -0.0781 0.08488
## 47 Polypodiopsida 0.00185 0.02034 2882 -0.0380 0.04174
## 47 Psilotopsida nonEst NA NA NA NA
## 47 Reptilia -0.03944 0.05755 2882 -0.1523 0.07340
##
## Confidence level used: 0.95
Nucleotide diversty changes with latitude, with higher values at low latitues.
mundi <- rnaturalearth::ne_coastline(scale = 110, returnclass = "sv")
mundi <- crop(mundi, ext(c(-180,180,-60,90)))
# get vect gen div
vect_div <- terra::vect(gen_div %>%
dplyr::filter(spp %in% biov1$sp_name_std_v1),
geom=c("Longitude","Latitude"),crs=crs(mundi))
# get vect centroids bioshifts
vect_biov1 <- terra::vect(biov1 %>%
filter(spp %in% gen_div$spp),
geom=c("centroid.x","centroid.y"),crs=crs(mundi))
# get raster bioshifts shp files study areas
get_raster_bioshifts = "NO"
if(get_raster_bioshifts=="YES"){
my_ext = terra::ext(mundi)
my_crs = crs(mundi)
rast_biov1 <- terra::rast(my_ext, crs = my_crs, res = 0.5)
values(rast_biov1) <- 0
fgdb <- "C:/Users/brunn/ShadowDrive/CreateGeodatabaseBioShifts/Data/Study_Areas_v1/Study_Areas.gdb"
fc_list <- terra::vector_layers(fgdb)
fc_list <- fc_list[which(fc_list %in% unique(as.data.frame(vect_biov1)$ID))]
for(i in 1:length(fc_list)){ cat("\r",i,"from",length(fc_list))
tmp = terra::vect(fgdb, layer = fc_list[i])
tmp = terra::cells(rast_biov1,tmp)
tmp_cell = tmp[,2]
tmp_vals = rast_biov1[tmp_cell][,1]
rast_biov1[tmp_cell] = tmp_vals+1
}
names(rast_biov1) <- "SA"
rast_biov1[rast_biov1==0] <- NA
writeRaster(rast_biov1, here::here("Data/raster_bioshifts_SA_fonseca.tif"), overwrite = TRUE)
} else {
rast_biov1 <- terra::rast(here::here("Data/raster_bioshifts_SA_fonseca.tif"))
rast_biov1 <- crop(rast_biov1, ext(mundi))
}
values_biov1 <- na.omit(values(rast_biov1))
plot_data <- gen_div %>%
dplyr::filter(spp %in% biov1$sp_name_std_v1) %>%
mutate(x = round(Longitude,3),
y = round(Latitude,3))
gen_fig <- ggplot()+
geom_spatvector(data=mundi)+
geom_hex(data=plot_data,
binwidth = c(4, 4),
aes(x=x, y=y))+
scale_fill_continuous(trans="log",
alpha=.7,
labels=function(x) round(x,0),
breaks=function(x) exp(seq(log(min(x)),log(max(x)),length.out=5)),
type = "viridis", name = "Species with\ngenetic data")+
theme_void()
gen_fig +
ggtitle("Genetic diversity data")
bio_fig <- ggplot() +
geom_spatraster(data=rast_biov1)+
scale_fill_whitebox_c(
name = "BioShifts\nstudy areas",
palette = "viridi",
alpha=.7,
na.value = "white",
# trans="log",
# labels=function(x) round(x,0),
# breaks=function(x) exp(seq(log(min(x)),log(max(x)),length.out=5))
)+
geom_spatvector(data=mundi)+
theme_void()
bio_fig +
ggtitle("Bioshifts study areas")
bio_gen_fig <- bio_fig +
geom_spatvector(data=vect_div, alpha = .3)
bio_gen_fig +
ggtitle("Genetic diversity data & BioShifts study areas")
ggsave(filename = here::here("Figs/Map_sp-level_GD.pdf"),
plot = bio_gen_fig,
width = 12,
height = 5,
device = "pdf",
units = "in")
bio_gen_fig2 <- ggpubr::ggarrange(gen_fig,
bio_fig,
labels = c("A", "B"),
ncol=1)
bio_gen_fig2
ggsave(filename = here::here("Figs/Map_sp-level_GD2.pdf"),
plot = bio_gen_fig2,
width = 10,
height = 8,
device = "pdf",
units = "in")
gen_data_v1_avg <- read.csv(here::here("Data/gen_data_final_fonseca.csv"))
# Example of species with genetic data outside range shift study area
i=1
spp2go <- gen_data_v1_avg$spp[i]
spp2go
## [1] "Abax parallelus"
SA_spp <- gen_data_v1_avg$ID[i]
SA_spp
## [1] "A34_P1"
fgdb <- "C:/Users/brunn/ShadowDrive/CreateGeodatabaseBioShifts/Data/Study_Areas_v1/Study_Areas.gdb"
leaflet() %>% addTiles() %>%
addPolygons(data = terra::vect(fgdb, layer = SA_spp), color = "red") %>%
addCircles(data = terra::vect(gen_div %>%
dplyr::filter(spp == spp2go),
geom=c("Longitude","Latitude"),crs=crs(mundi)),
color = "blue") %>%
addLegend(colors = c("red","blue"),labels =c("Study area","Genetic data"), position = "bottomright")
## Warning: SpatVector layer has inconsistent datum (+proj=longlat +ellps=WGS84 +towgs84=0,0,0,0,0,0,0 +no_defs).
## Need '+proj=longlat +datum=WGS84'
## Warning: [vect] Z coordinates ignored
# Example of species with genetic data within range shift study area
i=2
spp2go <- gen_data_v1_avg$spp[i]
spp2go
## [1] "Abies balsamea"
SA_spp <- gen_data_v1_avg$ID[i]
SA_spp
## [1] "A40_P1"
leaflet() %>% addTiles() %>%
addPolygons(data = terra::vect(fgdb, layer = SA_spp), color = "red") %>%
addCircles(data = terra::vect(gen_div %>%
dplyr::filter(spp == spp2go),
geom=c("Longitude","Latitude"),crs=crs(mundi)),
color = "blue") %>%
addLegend(colors = c("red","blue"),labels =c("Study area","Genetic data"), position = "bottomright")
## Warning: SpatVector layer has inconsistent datum (+proj=longlat +ellps=WGS84 +towgs84=0,0,0,0,0,0,0 +no_defs).
## Need '+proj=longlat +datum=WGS84'
## Warning: [vect] Z coordinates ignored
# n range shifts
nrow(gen_data_v1_avg)
## [1] 10679
# N species
length(unique(gen_data_v1_avg$spp))
## [1] 2908
# Use only plus plus or minus minus
table(gen_data_v1_avg$shift_vel_sign)
##
## negneg negpos posneg pospos
## 190 3906 283 6300
# N shifts and species
gen_data_v1_avg %>%
filter(shift_vel_sign == "pospos" | shift_vel_sign == "negneg") %>%
summarise(N_Shift = length(spp),
N_Species = length(unique(spp)))
## N_Shift N_Species
## 1 6490 2408
toplot_ <- gen_data_v1_avg %>%
filter(shift_vel_sign == "pospos" | shift_vel_sign == "negneg") %>%
group_by(Group) %>%
dplyr::summarise(N_Shift = length(spp),
Species = length(unique(spp))) %>%
gather(var, N, -c(Group))
ggplot(toplot_, aes(x = Group, y = N))+
geom_bar(stat="identity")+
geom_text(aes(y=N, label=N,
hjust = ifelse(N<1000,-.1,1.1)), vjust=0.2, size=3,
position = position_dodge(0.9))+
theme_classic()+
coord_flip()+
facet_wrap(.~var, scales = "free", ncol=1)
toplot <- rbind(
data.frame(dataset = "Full",
lags = biov1$lag2,
shifts = biov1$SHIFT),
data.frame(dataset = "Subset",
lags = gen_data_v1_avg$lag2,
shifts = gen_data_v1_avg$SHIFT))
# lags
ggplot(toplot, aes(x = dataset, y=lags, fill = dataset, color = dataset))+
geom_boxplot(alpha = .5, outlier.shape = NA)+
scale_y_continuous(limits = quantile(toplot$lags, c(0.1, 0.9), na.rm = T))
## Warning: Removed 7888 rows containing non-finite values (`stat_boxplot()`).
tapply(toplot$lags, toplot$dataset, summary)
## $Full
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -145.1653 -0.4375 0.6237 0.3779 2.1868 13.1259
##
## $Subset
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -145.16531 -0.97034 0.51824 0.09531 2.15833 13.12588
mod1 <- aov(lags~dataset, data = toplot)
summary(mod1)
## Df Sum Sq Mean Sq F value Pr(>F)
## dataset 1 628 628.0 27.55 1.53e-07 ***
## Residuals 40490 922790 22.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# shift
ggplot(toplot, aes(x = dataset, y=shifts, fill = dataset, color = dataset))+
geom_boxplot(alpha = .5, outlier.shape = NA)+
scale_y_continuous(limits = quantile(toplot$shifts, c(0.1, 0.9), na.rm = T))
## Warning: Removed 8093 rows containing non-finite values (`stat_boxplot()`).
tapply(toplot$shifts, toplot$dataset, summary)
## $Full
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -119.1696 -0.4468 0.3056 1.1671 2.4345 146.3000
##
## $Subset
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -94.6628 -0.3001 0.3848 1.3481 2.7390 146.3000
mod1 <- aov(shifts~dataset, data = toplot)
summary(mod1)
## Df Sum Sq Mean Sq F value Pr(>F)
## dataset 1 258 257.53 8.538 0.00348 **
## Residuals 40490 1221333 30.16
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
There are difference in mean values of lags, but no difference in mean shift values, between the full and subset datasets.
# gen_data_v1_avg <- gen_data_v1_avg_fon
# gen_data_v1_avg$Nucleotide_diversity <- gen_data_v1_avg$He
gen_data_v1_avg %>%
dplyr::filter(Type == "LAT",
shift_vel_sign == "pospos" | shift_vel_sign == "negneg") %>%
ggplot(aes(x = Nucleotide_diversity, y = log(abs(SHIFT)), color = lag))+
ggtitle("Latitude")+
xlab("Genetic diversity")+
ylab("Range shift (km/year)") +
scale_color_viridis_c(direction = -1)+
geom_point(alpha = .5, size = 3)+
theme_bw()+
geom_smooth(method = "lm", color = "black", size=1)+
stat_smooth(geom="line", method = "lm", alpha=0.3, se = FALSE,
aes(by=as.factor(Article_ID)), color = "black")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in stat_smooth(geom = "line", method = "lm", alpha = 0.3, se = FALSE, :
## Ignoring unknown aesthetics: by
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
gen_data_v1_avg %>%
dplyr::filter(Type == "ELE",
shift_vel_sign == "pospos" | shift_vel_sign == "negneg") %>%
ggplot(aes(x = Nucleotide_diversity, y = log(abs(SHIFT)), color = lag))+
ggtitle("Elevation")+
xlab("Genetic diversity")+
ylab("Range shift (km/year)") +
scale_color_viridis_c(direction = -1)+
geom_point(alpha = .5, size = 3)+
theme_bw()+
geom_smooth(method = "lm", color = "black", size=1)+
stat_smooth(geom="line", method = "lm", alpha=0.3, se = FALSE,
aes(by=as.factor(Article_ID)), color = "black")
## Warning in stat_smooth(geom = "line", method = "lm", alpha = 0.3, se = FALSE, :
## Ignoring unknown aesthetics: by
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
gen_data_v1_avg %>%
dplyr::filter(Type == "LAT",
shift_vel_sign == "pospos" | shift_vel_sign == "negneg") %>%
ggplot(aes(x = Nucleotide_diversity, y = log(abs(SHIFT)), color = lag))+
ggtitle("Latitude")+
xlab("Genetic diversity")+
ylab("Range shift (km/year)") +
scale_color_viridis_c(direction = -1)+
geom_point(alpha = .5, size = 3)+
theme_bw()+
geom_smooth(method = "lm", color = "black", size=1)+
stat_smooth(geom="line", method = "lm", alpha=0.3, se = FALSE,
aes(by=as.factor(Article_ID)), color = "black")+
facet_wrap(.~Param, scales = "free", ncol = 3)
## Warning in stat_smooth(geom = "line", method = "lm", alpha = 0.3, se = FALSE, :
## Ignoring unknown aesthetics: by
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
gen_data_v1_avg %>%
dplyr::filter(Type == "ELE") %>%
ggplot(aes(x = Nucleotide_diversity, y = log(abs(SHIFT)), color = lag))+
ggtitle("Elevation")+
xlab("Genetic diversity")+
ylab("Range shift (km/year)") +
scale_color_viridis_c(direction = -1)+
geom_point(alpha = .5, size = 3)+
theme_bw()+
geom_smooth(method = "lm", color = "black", size=1)+
stat_smooth(geom="line", method = "lm", alpha=0.3, se = FALSE,
aes(by=as.factor(Article_ID)), color = "black")+
facet_wrap(.~Param, scales = "free", ncol = 3)
## Warning in stat_smooth(geom = "line", method = "lm", alpha = 0.3, se = FALSE, :
## Ignoring unknown aesthetics: by
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 222 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 222 rows containing non-finite values (`stat_smooth()`).
gen_data_v1_avg %>%
dplyr::filter(Type == "LAT") %>%
ggplot(aes(x = Nucleotide_diversity, y = log(abs(SHIFT)), color = lag))+
ggtitle("Latitude")+
xlab("Genetic diversity")+
ylab("Range shift (km/year)") +
scale_color_viridis_c(direction = -1)+
geom_point(alpha = .5, size = 3)+
theme_bw()+
geom_smooth(method = "lm", color = "black", size=1)+
stat_smooth(geom="line", method = "lm", alpha=0.3, se = FALSE,
aes(by=as.factor(Article_ID)), color = "black")+
facet_wrap(.~Group, scales = "free", ncol = 3)
## Warning in stat_smooth(geom = "line", method = "lm", alpha = 0.3, se = FALSE, :
## Ignoring unknown aesthetics: by
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 408 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 408 rows containing non-finite values (`stat_smooth()`).
gen_data_v1_avg %>%
dplyr::filter(Type == "ELE") %>%
ggplot(aes(x = Nucleotide_diversity, y = log(abs(SHIFT)), color = lag))+
ggtitle("Elevation")+
xlab("Genetic diversity")+
ylab("Range shift (km/year)") +
scale_color_viridis_c(direction = -1)+
geom_point(alpha = .5, size = 3)+
theme_bw()+
geom_smooth(method = "lm", color = "black", size=1)+
stat_smooth(geom="line", method = "lm", alpha=0.3, se = FALSE,
aes(by=as.factor(Article_ID)), color = "black")+
facet_wrap(.~Group, scales = "free", ncol = 3)
## Warning in stat_smooth(geom = "line", method = "lm", alpha = 0.3, se = FALSE, :
## Ignoring unknown aesthetics: by
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 222 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 222 rows containing non-finite values (`stat_smooth()`).
gen_data_v1_avg %>%
dplyr::filter(Type == "LAT") %>%
ggplot(aes(x = Nucleotide_diversity, y = log(abs(SHIFT)), color = lag))+
ggtitle("Latitude")+
xlab("Genetic diversity")+
ylab("Range shift (km/year)") +
scale_color_viridis_c(direction = -1)+
geom_point(alpha = .5, size = 3)+
theme_bw()+
geom_smooth(method = "lm", color = "black", size=1)+
stat_smooth(geom="line", method = "lm", alpha=0.3, se = FALSE,
aes(by=as.factor(Article_ID)), color = "black")+
facet_wrap(Group~Param, scales = "free")
## Warning in stat_smooth(geom = "line", method = "lm", alpha = 0.3, se = FALSE, :
## Ignoring unknown aesthetics: by
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 408 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 408 rows containing non-finite values (`stat_smooth()`).
gen_data_v1_avg %>%
dplyr::filter(Type == "ELE") %>%
ggplot(aes(x = Nucleotide_diversity, y = log(abs(SHIFT)), color = lag))+
ggtitle("Elevation")+
xlab("Genetic diversity")+
ylab("Range shift (km/year)") +
scale_color_viridis_c(direction = -1)+
geom_point(alpha = .5, size = 3)+
theme_bw()+
geom_smooth(method = "lm", color = "black", size=1)+
stat_smooth(geom="line", method = "lm", alpha=0.3, se = FALSE,
aes(by=as.factor(Article_ID)), color = "black")+
facet_wrap(Param~Group, scales = "free")
## Warning in stat_smooth(geom = "line", method = "lm", alpha = 0.3, se = FALSE, :
## Ignoring unknown aesthetics: by
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 222 rows containing non-finite values (`stat_smooth()`).
## Warning in qt((1 - level)/2, df): NaNs produced
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 222 rows containing non-finite values (`stat_smooth()`).
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf