1 Load libraries and source code

# devtools::install_github('EcologicalTraitData/traitdataform')

rm(list=ls())

list.of.packages <- c("ggplot2","data.table","dplyr","tidyr","parallel","bdc","taxadb","traitdataform","pbapply","tidyverse","readxl","lme4","coefplot","sjPlot","sjmisc","effects","rgdal","maptools","rgeos","terra","MuMIn","rnaturalearthdata","lsmeans","GGally","tidyterra","httr","purrr","rlist","usethis","ggpubr","leaflet")

new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]

if(length(new.packages)) install.packages(new.packages)

sapply(list.of.packages, require, character.only = TRUE)
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'lme4' was built under R version 4.3.2
## Warning: package 'terra' was built under R version 4.3.2
## Warning: package 'tidyterra' was built under R version 4.3.2
## Warning: package 'ggpubr' was built under R version 4.3.2
##           ggplot2        data.table             dplyr             tidyr 
##              TRUE              TRUE              TRUE              TRUE 
##          parallel               bdc            taxadb     traitdataform 
##              TRUE              TRUE              TRUE              TRUE 
##           pbapply         tidyverse            readxl              lme4 
##              TRUE              TRUE              TRUE              TRUE 
##          coefplot            sjPlot            sjmisc           effects 
##              TRUE              TRUE              TRUE              TRUE 
##             rgdal          maptools             rgeos             terra 
##              TRUE              TRUE              TRUE              TRUE 
##             MuMIn rnaturalearthdata           lsmeans            GGally 
##              TRUE              TRUE              TRUE              TRUE 
##         tidyterra              httr             purrr             rlist 
##              TRUE              TRUE              TRUE              TRUE 
##           usethis            ggpubr           leaflet 
##              TRUE              TRUE              TRUE

2 source functions

# Code to find accepted species names
# Before you can download the source code from github, make sure you have a personal github token
# run this:
# usethis::edit_r_environ()
# if you have no toke, create one following:
# 1. Generate on GitHub your personal token
# 1.1 Go to GitHub
# 2.1 In the right corner go to "Settings"
# 2.2 Then in the left part go to "Developer setting"
# 2.3 Select the option "Personal access tokens"
# 2.4 Select the option "Generate new token"
# 2.5 Copy your personal token
# run this to add the token to your .Renviron
# usethis::edit_r_environ()
# write GITHUB_PAT=YOUR_TOKEN
# Sys.getenv("GITHUB_PAT")

# source(here::here("R/fetchGHdata.R"))
# 
# fetchGHdata(gh_account = "Bioshifts", 
#             repo = "bioshifts_v1_v2", 
#             path = "R/Source_code/Clean_names.R",
#             output = here::here("R/Clean_names.R"))
# 
# fetchGHdata(gh_account = "Bioshifts", 
#             repo = "bioshifts_v1_v2", 
#             path = "R/Source_code/Find_Sci_Names.R",
#             output = here::here("R/Find_Sci_Names.R"))

source(here::here("R/Clean_names.R"))
source(here::here("R/Find_Sci_Names.R"))
source(here::here("R/mclapply_hack.R"))
## 
##     *** Microsoft Windows detected ***
##     
##     For technical reasons, the MS Windows version of mclapply()
##     is implemented as a serial function instead of a parallel
##     function.    
## 
##     As a quick hack, we replace this serial version of mclapply()
##     with a wrapper to parLapply() for this R session. Please see
## 
##       http://www.stat.cmu.edu/~nmv/2014/07/14/implementing-mclapply-on-windows 
## 
##     for details.
# Malin's transformation for moving values slightly inward. 
transform01 <- function(x) (x * (length(x) - 1) + 0.5) / (length(x))

# De Kort transformation
deKort_trans <- function(p){
  p <- scale(p)
  p <- (p - min(p, na.rm = T))/(max(p, na.rm = T)-min(p, na.rm = T))
  return(p)
}

# Malin's transformation before a logit transformation
logit_trans <- function(p){
  p <- transform01(p)
  p <- log(p/(1-p))
  return(p)
}

3 Load species list

splist <- read.csv(here::here("Data/splist.csv"), header = T)

# remove duplicated sp_names
splist <- splist %>%
  dplyr::filter(!duplicated(scientificName))%>%
  mutate(Kingdom=kingdom,
         Phylum=phylum,
         Class=class,
         Order=order,
         Family=family)%>%
  dplyr::select(scientificName,Kingdom,Phylum,Class,Order,Family,db)

splist$Genus <- sapply(splist$scientificName, function(x){
  strsplit(x," ")[[1]][1]
})

4 Load bioshifts

4.1 Geo data

if(!file.exists(here::here("data/bioshiftsv3_metadata.csv"))){
  
  # # From Google drive
  # drive_path <- readWindowsShortcut("G:/My Drive.lnk")$pathname
  # bioshifts_path <- readWindowsShortcut(paste0(drive_path,"\\BIOSHIFTS Working Group.lnk"))$pathname
  # shp_path <- here::here(bioshifts_path,("DATA/BIOSHIFTS_v3/CleanDatabasev3/ShapefilesBioShiftsv3"))
  # all_shps <- list.files(shp_path,pattern = "dbf", full.names = TRUE)
  
  # From local folder
  all_shps <- list.files("C:/Users/brunn/ShadowDrive/CreateGeodatabaseBioShifts/Data/ShapefilesBioShiftsv3",
                         pattern = "shp", full.names = TRUE)
  
  biov1_meta <- pblapply(all_shps, terra::vect)
  biov1_meta <- pblapply(biov1_meta, data.frame)
  
  biov1_meta <- data.table::rbindlist(biov1_meta)
  
  write.csv(biov1_meta, here::here("data/bioshiftsv3_metadata.csv"),row.names = FALSE)
  
} else {
  
  biov1_meta <- read.csv(here::here("data/bioshiftsv3_metadata.csv"))
  
}

4.2 Load v1

biov1 <- read.csv(here::here("Data/biov1_fixednames.csv"), header = T)

# Fix references in biov1
biov1$sp_name_std_v1 <- gsub("_"," ",biov1$sp_name_std_v1)

# Select columns of interest
biov1 <- biov1 %>%
  filter(!Type %in% c("BAT","LON")) %>%
  dplyr::select(ID,Article_ID,Study_ID,
                Type,Param,Trend,SHIFT,UNIT,DUR,
                v.lat.mean,v.ele.mean,trend.mean,
                START,END,Sampling,Uncertainty_Distribution,Uncertainty_Parameter,Nperiodes,
                N,Grain_size,Data,ID.area,
                Phylum,Class,Order,Family,Genus,sp_name_std_v1,
                group,ECO,Hemisphere) %>% # select columns
  mutate(
    Type = case_when(
      Type=="HOR" ~ "LAT",
      TRUE ~ as.character(Type)),
    Data = case_when(
      Data=="occurence-based" ~ "occurrence-based",
      TRUE ~ as.character(Data)),
    spp = sp_name_std_v1,
    SHIFT_abs = abs(SHIFT),
    velocity = ifelse(Type == "LAT", v.lat.mean, v.ele.mean),
    vel_sign = ifelse(velocity>0,"pos","neg"))

# add geospatial data
biov1_meta$ID <- biov1_meta$Name

biov1 <- biov1 %>%
  filter(ID %in% biov1_meta$ID)

# merge
biov1 <- merge(biov1,biov1_meta,by="ID",all.y = TRUE) # all.y to keep only studies with geospatial data

# Get Extent (=Lat/Ele extent)
biov1$Extent <- biov1$LatExtentk
biov1$Extent[which(biov1$Type=="ELE")] <- biov1$EleExtentm[which(biov1$Type=="ELE")]

# Number of temporal units
biov1$NtempUnits <- biov1$Nperiodes
these <- which(biov1$NtempUnits > biov1$DUR)
biov1$NtempUnits[these] <- biov1$DUR[these]

# log NtempUnits
biov1$LogNtempUnits <- log(biov1$NtempUnits)

## group irregular sampling with multiple sampling  
biov1$Sampling = ifelse(biov1$Sampling %in% c("IRR","MULTIPLE(continuous)"),"MULT", biov1$Sampling)

## Grain
biov1$Grain <- factor(biov1$Grain_size, levels = c("small", "moderate", "large","very_large"))
biov1$ContinuousGrain <- 
  as.numeric(
    as.character(
      ifelse(
        biov1$Grain == "small", 1, 
        ifelse(biov1$Grain == "moderate",
               2, #10,
               ifelse(biov1$Grain == "large",
                      3, #100,
                      4))))) #we have a few very large in the full database

#harmonize column names
biov1$Quality <- ifelse(biov1$Uncertainty_Distribution %in% c("RESAMPLING"),"BALANCED",
                        ifelse(biov1$Uncertainty_Distribution %in% c("MODEL","MODEL+RESAMPLING(same)","RESAMPLING(same)+MODEL","RESAMPLING+MODEL"),"BALANCED",
                               ifelse(biov1$Uncertainty_Distribution %in% c("DETECTABILITY","RESAMPLING(same)+DETECTABILITY"),"BALANCED",
                                      ifelse(biov1$Uncertainty_Distribution %in% c("RESAMPLING(same)"),"RESURVEYED",biov1$Uncertainty_Distribution))))

table(biov1$Quality) 
## 
##   BALANCED        RAW RESURVEYED 
##      14050       9497       6522
biov1$PrAb <- ifelse(biov1$Data %in% c("occurence-based", "occurrence-based"),"occurrence","abundance")
table(biov1$PrAb) 
## 
##  abundance occurrence 
##       9544      20571
biov1 <- biov1 %>%
  dplyr::filter(!is.na(sp_name_std_v1))

all(biov1$sp_name_std_v1 %in% splist$scientificName)
## [1] TRUE
# from continuous to categorical
q1=quantile(biov1$START,probs=c(0,0.25,0.5,0.75,1))
biov1$StartF=cut(biov1$START,breaks=q1,include.lowest=T)

q1=quantile(biov1$Extent ,probs=c(0,0.25,0.5,0.75,1))
biov1$ExtentF=cut(biov1$Extent,breaks=q1,include.lowest=T)

q1=quantile(biov1$ID.area,probs=c(0,0.25,0.5,0.75,1))
biov1$AreaF=cut(biov1$ID.area,breaks=q1,include.lowest=T)

q1=quantile(biov1$N,probs=c(0,0.25,0.5,0.75,1))
biov1$NtaxaF=cut(biov1$N,breaks=q1,include.lowest=T)

# add ID to obs
biov1$obs_ID <- paste0("S",1:nrow(biov1))

summary(biov1$velocity)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -5.8328  0.5415  2.0271  2.3071  2.8319 13.1259       1
summary(biov1$velocity[which(biov1$vel_sign=="pos")])
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##  0.009396  0.541507  2.055833  2.434770  2.831932 13.125885

4.3 Classify species

# Class species as Terrestrial, Marine or Freshwater
Terv1 <- unique(biov1$sp_name_std_v1[which(biov1$ECO == "T")])
Terrestrials = unique(c(Terv1))

Mar1 <- unique(biov1$sp_name_std_v1[which(biov1$ECO == "M")])
Marine = unique(c(Mar1))

# Freshwater fish
FFishv1 <- unique(biov1$sp_name_std_v1[(biov1$Class == "Actinopterygii" | biov1$Class == "Cephalaspidomorphi") & biov1$ECO == "T"])
FFish = unique(c(FFishv1))
# Marine fish
MFishv1 <- unique(biov1$sp_name_std_v1[biov1$Class == "Actinopterygii" & biov1$ECO == "M"])
MFish = unique(c(MFishv1))

splist$ECO = NA
splist$ECO[which(splist$scientificName %in% Terrestrials)] <- "T"
splist$ECO[which(splist$scientificName %in% Marine)] <- "M"
splist$ECO[which(splist$scientificName %in% MFish)] <- "M"

biov1$ECO = NA
biov1$ECO[which(biov1$sp_name_std_v1 %in% Terrestrials)] <- "T"
biov1$ECO[which(biov1$sp_name_std_v1 %in% Marine)] <- "M"
biov1$ECO[which(biov1$sp_name_std_v1 %in% MFish)] <- "M"

splist$Group = NA
splist$Group[which(splist$Class == "Phaeophyceae")] <- "Chromista"
splist$Kingdom[which(splist$Class == "Phaeophyceae")] <- "Chromista"
splist$Group[which(splist$Phylum == "Rhodophyta")] <- "Seaweed"
splist$Kingdom[which(splist$Phylum == "Rhodophyta")] <- "Plantae"
splist$Group[which(splist$Family == "Elminiidae")] <- "Barnacles"
splist$Group[which(splist$Kingdom == "Bacteria")] <- "Bacteria"
splist$Group[which(splist$Class == "Holothuroidea")] <- "Sea cucumber"
splist$Group[which(splist$Class == "Aves")] <- "Bird"
splist$Group[which(splist$Class == "Insecta")] <- "Insect"
splist$Group[which(splist$Class == "Mammalia")] <- "Mammal"
splist$Group[which(splist$Class == "Arachnida")] <- "Spider"
splist$Kingdom[which(splist$Kingdom == "Viridiplantae")] <- "Plantae"
splist$Kingdom[which(splist$Phylum == "Tracheophyta")] <- "Plantae"
splist$Group[which(splist$Kingdom == "Plantae")] <- "Plant"
splist$Group[which(splist$Class == "Hydrozoa")] <- "Hydrozoa"
splist$Group[which(splist$Class == "Anthozoa")] <- "Sea anemones and corals"
splist$Group[which(splist$Class == "Polychaeta")] <- "Polychaetes"
splist$Group[which(splist$Phylum == "Mollusca")] <- "Molluscs"
splist$Group[which(splist$Class == "Malacostraca")] <- "Crustacean"
splist$Group[which(splist$Class == "Hexanauplia")] <- "Crustacean"
splist$Group[which(splist$Class == "Maxillopoda")] <- "Crustacean"
splist$Group[which(splist$Class == "Ostracoda")] <- "Crustacean"
splist$Group[which(splist$Class == "Branchiopoda")] <- "Crustacean"
splist$Group[which(splist$Class == "Asteroidea")] <- "Starfish"
splist$Group[which(splist$Class == "Ascidiacea")] <- "Ascidians tunicates and sea squirts"
splist$Class[which(splist$Class == "Actinopteri")] <- "Actinopterygii"
splist$Group[which(splist$Class == "Actinopterygii")] <- "Fish"
splist$Group[which(splist$Class == "Elasmobranchii")] <- "Fish"
splist$Group[which(splist$Order == "Perciformes")] <- "Fish"
splist$Group[which(splist$Class == "Chondrichthyes")] <- "Fish"
splist$Group[which(splist$Class == "Holocephali")] <- "Fish"
splist$Group[which(splist$Class == "Cephalaspidomorphi")] <- "Fish"
splist$Group[which(splist$Class == "Echinoidea")] <- "Sea urchin"
splist$Group[which(splist$Class == "Crinoidea")] <- "Crinoid"
splist$Group[which(splist$Class == "Holothuroidea")] <- "Sea cucumber"
splist$Group[which(splist$Class == "Reptilia")] <- "Reptile"
splist$Group[which(splist$Order == "Squamata")] <- "Reptile"
splist$Group[which(splist$Class == "Ophiuroidea")] <- "Brittle stars"
splist$Group[which(splist$Class == "Chilopoda")] <- "Centipedes"
splist$Group[which(splist$Class == "Diplopoda")] <- "Millipedes"
splist$Group[which(splist$Class == "Amphibia")] <- "Amphibian"
splist$Group[which(splist$Kingdom == "Fungi")] <- "Fungi"
splist$Group[which(splist$Order == "Balanomorpha")] <- "Barnacles"
splist$Group[which(splist$Phylum == "Nematoda")] <- "Nematodes"
splist$Group[which(splist$Class == "Myxini")] <- "Hagfish"
splist$Group[which(splist$Kingdom == "Chromista")] <- "Chromista"
splist$Family[which(splist$Genus == "Dendrocopus")] <- "Picidae"

######################################
biov1 <- merge(biov1[,-which(names(biov1) %in% c("Phylum","Class","Order","Family","Genus","Group","ECO"))], 
               splist[,c("Kingdom","Phylum","Class","Order","Family","Genus","Group","ECO","scientificName")], 
               by.x = "sp_name_std_v1", by.y = "scientificName", 
               all.x = T)

4.4 Filter LAT / ELE shifts

Use only latitudinal and elevational shifts

# v1
biov1 <- biov1 %>%
  dplyr::filter(Type %in% c("ELE","LAT"))  # Use LAT ELE shifts

# splist
sps <- unique(biov1$sp_name)
splist <- splist %>% dplyr::filter(scientificName %in% sps)

4.5 Remove species identified to the genus level or cf.

if(any(grep("sp[.]",biov1$sp_reported_name_v1))){
  biov1 <- biov1 %>% dplyr::filter(!grepl("sp[.]",sp_reported_name_v1))
}
if(any(grep("sp[.]",biov1$sp_name_std_v1))){
  biov1 <- biov1 %>% dplyr::filter(!grepl("sp[.]",sp_name_std_v1))
}
if(any(grep("cf[.]",biov1$sp_reported_name_v1))){
  biov1 <- biov1 %>% dplyr::filter(!grepl("cf[.]",sp_reported_name_v1))
}
if(any(grep("cf[.]",biov1$sp_name_std_v1))){
  biov1 <- biov1 %>% dplyr::filter(!grepl("cf[.]",sp_name_std_v1))
}

#remove freshwater fishes
biov1 <- biov1[-which((biov1$Class == "Actinopterygii" | biov1$Group == "Fish") & biov1$ECO=="T"),]

#remove marine birds
biov1 <- biov1[-which(biov1$Class == "Aves" & biov1$ECO=="M"),]


if(any(grep("sp[.]",splist$scientificName))){
  splist <- splist %>% dplyr::filter(!grepl("sp[.]",scientificName))
}
if(any(grep("sp[.]",splist$scientificName))){
  splist <- splist %>% dplyr::filter(!grepl("sp[.]",scientificName))
}
if(any(grep("cf[.]",splist$scientificName))){
  splist <- splist %>% dplyr::filter(!grepl("cf[.]",scientificName))
}
if(any(grep("cf[.]",splist$scientificName))){
  splist <- splist %>% dplyr::filter(!grepl("cf[.]",scientificName))
}

4.6 Reclassify methodological variables

unique(biov1$Data)
## [1] "abundance-based"  "occurrence-based"
biov1$Data[biov1$Data=="occurence-based"] <- "occurrence-based"
biov1$Data <- factor(biov1$Data, levels = unique(biov1$Data))
table(biov1$Data)
## 
##  abundance-based occurrence-based 
##             9421            20393
unique(biov1$Sampling)
## [1] "TWO"        "CONTINUOUS" "MULT"
biov1$Sampling = ifelse(biov1$Sampling %in% c("IRR","MULTIPLE(continuous)"),"MULTIPLE", biov1$Sampling)
biov1$Sampling <- ordered(biov1$Sampling, 
                          levels = c("TWO","MULTIPLE","CONTINUOUS"))
table(biov1$Sampling)
## 
##        TWO   MULTIPLE CONTINUOUS 
##      25466          0       3105
unique(biov1$Grain_size)
## [1] "small"      "large"      "moderate"   "very_large" NA
biov1$Grain_size <- ifelse(biov1$Grain_size %in% c("large","very_large"),"large",biov1$Grain_size)
biov1$Grain_size <- ordered(biov1$Grain_size, 
                            levels = c("small","moderate","large"))
table(biov1$Grain_size)
## 
##    small moderate    large 
##     9740    10657     9416
unique(biov1$Uncertainty_Distribution)
## [1] "RESAMPLING(same)"               "RAW"                           
## [3] "RESAMPLING"                     "MODEL"                         
## [5] "RESAMPLING+MODEL"               "MODEL+RESAMPLING(same)"        
## [7] "RESAMPLING(same)+DETECTABILITY" "RESAMPLING(same)+MODEL"        
## [9] "DETECTABILITY"
biov1$Uncertainty_Distribution <- ifelse(biov1$Uncertainty_Distribution %in% c("RESAMPLING","RESAMPLING(same)"),"RESAMPLING",
                                         ifelse(biov1$Uncertainty_Distribution %in% c("MODEL","MODEL+RESAMPLING(same)","RESAMPLING+MODEL"),"MODEL",
                                                ifelse(biov1$Uncertainty_Distribution %in% c("DETECTABILITY","RESAMPLING(same)+DETECTABILITY"),"DETECTABILITY",
                                                       biov1$Uncertainty_Distribution)))
biov1$Uncertainty_Distribution <- ifelse(biov1$Uncertainty_Distribution == "RAW","OPPORTUNISTIC","PROCESSED")  
table(biov1$Uncertainty_Distribution)
## 
## OPPORTUNISTIC     PROCESSED 
##          9449         20365
#transform study area
biov1$ID.area <- log(biov1$ID.area)

4.7 Get centroids of study areas

This will be used to merge genetic data with bioshifts based on distance of observations

# # The input file geodatabase
# fgdb <- "C:/Users/brunn/nextCloud/bioshifts_v1_v2/v1/Study_Areas_v1/Study_Areas.gdb"
# 
# # List all feature classes in a file geodatabase
# fc_list <- ogrListLayers(fgdb)
# 
# # Get centroids
# cl <- makeCluster(detectCores()-1)
# clusterExport(cl, c("readOGR", "gCentroid", "fgdb"))
# 
# centroids <- pblapply(fc_list, function(x){
#     fc <- readOGR(dsn=fgdb,layer=x)
#     c <- data.frame(gCentroid(fc))
#     names(c) <- c("centroid.x", "centroid.y")
# 
#     geomet <- data.frame(terra::geom(terra::vect(fc)))
# 
#     max_lat <- order(geomet[,"y"], decreasing = T)[1]
#     max_lat <- geomet[max_lat,c("x","y")]
#     names(max_lat) <- c("max_lat.x", "max_lat.y")
# 
#     min_lat <- order(geomet[,"y"])[1]
#     min_lat <- data.frame(geomet[min_lat,c("x","y")])
#     names(min_lat) <- c("min_lat.x", "min_lat.y")
# 
#     cbind(c, max_lat, min_lat)
# }, cl = cl)
# 
# stopCluster(cl)
# 
# centroids <- do.call("rbind",centroids)
# centroids$ID <- fc_list
# 
# write.csv(centroids, "Data/centroids_study_areas.csv", row.names = FALSE)

centroids <- read.csv(here::here("Data/centroids_study_areas.csv"))

biov1 <- merge(biov1, centroids, by = "ID")

5 Direction of shift

Identify direction of shift

biov1 <- biov1 %>%
  mutate(shift_sign = ifelse(SHIFT>0,"pos","neg"),
         shift_vel_sign = paste0(shift_sign,vel_sign)) %>%
  filter(!is.na(velocity))

ggplot(biov1, aes(x=shift_vel_sign))+
  geom_bar()+
  coord_flip()+
  facet_wrap(Type~Param)

summary(biov1$velocity)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -5.8328  0.5415  2.0271  2.2860  2.7543 13.1259
summary(biov1$velocity[which(biov1$shift_vel_sign=="pospos")])
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##  0.009396  0.541507  2.073300  2.470806  2.831932 13.125880
summary(biov1$velocity[which(biov1$shift_vel_sign=="negneg")])
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -3.79347 -2.30487 -1.78004 -1.59872 -0.58152 -0.02945

6 Calculate lags raw

# calculate lags
# positive values are a lag (range shift lower smaller than expected or in opposite directions) and negative values are range shift larger than expected
biov1$lag <- biov1$velocity-biov1$SHIFT
position = which(biov1$vel_sign == "neg")
biov1$lag[position] <- biov1$lag[position] * -1 # Any negative velocity means the sign of lag has to shift.

# Lag 2
# When shift is in opposite sign of velocity, lag = abs(velocity)
biov1$lag2 = biov1$lag
position <- which(biov1$shift_vel_sign == "posneg" | biov1$shift_vel_sign == "negpos")
biov1$lag2[position] <- abs(biov1$velocity[position])

# Lag 3
# When shift is > velocity, lag = 0
biov1$lag3 = biov1$lag2
position <- which((biov1$shift_sign == "pos" & biov1$SHIFT > biov1$velocity) |
                    (biov1$shift_sign == "neg" & biov1$SHIFT < biov1$velocity))
biov1$lag3[position] <- 0


{
  par(mfrow=c(2,2))
  plot(lag~velocity, biov1)
  plot(lag2~velocity, biov1)
  plot(lag3~velocity, biov1)
}

ggplot(biov1, aes(x = Param, y=lag, fill = Param, color = Param))+
  geom_boxplot(alpha = .5, outlier.shape = NA)+
  scale_y_continuous(limits = quantile(biov1$lag2, c(0.1, 0.9), na.rm = T))+
  facet_wrap(.~Type, scales = "free")
## Warning: Removed 8756 rows containing non-finite values (`stat_boxplot()`).
ggplot(biov1, aes(x = Param, y=lag2, fill = Param, color = Param))+
  geom_boxplot(alpha = .5, outlier.shape = NA)+
  scale_y_continuous(limits = quantile(biov1$lag2, c(0.1, 0.9), na.rm = T))+
  facet_wrap(.~Type, scales = "free")
## Warning: Removed 5850 rows containing non-finite values (`stat_boxplot()`).
ggplot(biov1, aes(x = Param, y=log1p(lag3), fill = Param, color = Param))+
  geom_boxplot(alpha = .5, outlier.shape = NA)+
  # scale_y_continuous(limits = quantile(log1p(biov1$lag3), c(0.1, 0.9), na.rm = T))+
  facet_wrap(.~Type, scales = "free")

7 All shifts vs same direction

ggplot(biov1, aes(x = velocity, y = SHIFT))+
  geom_point(aes(color = lag2), alpha = .1)+
  scale_color_viridis_c()+
  geom_smooth(method = "lm")+
  theme_classic()+
  facet_wrap(Param~Type+ECO)
## `geom_smooth()` using formula = 'y ~ x'

ggplot(biov1 %>%
         filter(shift_vel_sign == "pospos" | shift_vel_sign == "negneg"), 
       aes(x = abs(velocity), y = abs(SHIFT)))+
  geom_point(aes(color = lag2), alpha = .1)+
  scale_color_viridis_c()+
  geom_smooth(method = "lm")+
  theme_classic()+
  facet_wrap(Param~Type+ECO)
## `geom_smooth()` using formula = 'y ~ x'

8 Load genetic diversity data

  • Fonseca et al. 2023 Evo Letters: Mitochondrial DNA (for vertebrates, arachnids, and insects. Chloroplast DNA and nucleotide diversity (for plants). (Ï€) was calculated to describe patterns of intraspecific genetic diversity.
# Fonseca et al. 2023
gen_div <- fread(here::here("Data/Fonseca_etal_2023_EvoLetters.txt"))
gen_div$spp <- gen_div$Species
gen_div$spp <-  gsub("-"," ",gen_div$spp)
gen_div$spp <-  gsub("_"," ",gen_div$spp)
gen_div$spp <- Clean_Names(gen_div$spp,
                           return_gen_sps = TRUE)

##############################
# Remove species identified to the genus level or cf. species
all_sps <- unique(gen_div$spp)
length(all_sps)

spgen <- sapply(all_sps, function(x){
  tmp. <- strsplit(x, " ")[[1]]
  any(tmp. == "sp" | tmp. == "spp" | grepl("sp[.]",tmp.) | grepl("spp[.]",tmp.) | tmp. == "cf[.]" | tmp. == "cf" | length(tmp.) == 1 | length(tmp.) > 2)
})
spgen <- which(spgen)
spgen <- all_sps[-spgen]
all(spgen %in% all_sps)

all_sps <- all_sps[which(all_sps %in% spgen)]
length(all_sps) # N species after filtering species identified to the species level

# filter dataset
gen_div <- gen_div[which(gen_div$spp %in% all_sps),]
length(unique(gen_div$spp)) # 35219

8.1 Fix names at the genetic data

# Species to find
mycols <- c("reported_name_fixed","scientificName","kingdom","phylum","class","order","family","db_code")
sps_accepted_names <- data.frame(matrix(ncol = length(mycols), nrow = length(unique(all_sps))))
names(sps_accepted_names) <- mycols

sps_accepted_names$reported_name_fixed <- unique(all_sps)
tofind_ <- sps_accepted_names[which(is.na(sps_accepted_names$scientificName)),]
tofind_ <- unique(tofind_$reported_name_fixed)

tofind <- data.frame(matrix(nrow = length(tofind_), ncol = 8))
names(tofind) = c("scientificName", "kingdom", "phylum", "class", "order", "family", "db", "db_code")

tofind <- data.frame(species = tofind_, tofind)

tofind <- tofind %>%
  mutate(across(everything(), as.character))


# retrieve sp names
sp_names_found <- Find_Sci_Names(sp_name = tofind$species)

# ----------------
#  Summary 
# ----------------
# 
# N taxa:
# 36697
# N taxa found:
# |db   |     N|
# |:----|-----:|
# |GBIF | 35903|
# |ITIS |    76|
# |NCBI |   278|
# N taxa not found:
# 440


## Add found species names to the sps_accepted_names

all(sp_names_found$requested_name %in% sps_accepted_names$reported_name_fixed)

for(i in 1:length(sp_names_found$requested_name)){
  tofill <- unique(which(sps_accepted_names$reported_name_fixed == sp_names_found$requested_name[i]))
  sps_accepted_names$scientificName[tofill] <- sp_names_found$scientificName[i]
  sps_accepted_names$kingdom[tofill] <- sp_names_found$kingdom[i]
  sps_accepted_names$phylum[tofill] <-sp_names_found$phylum[i]
  sps_accepted_names$class[tofill] <- sp_names_found$class[i]
  sps_accepted_names$order[tofill] <- sp_names_found$order[i]
  sps_accepted_names$family[tofill] <- sp_names_found$family[i]
  sps_accepted_names$db[tofill] <- sp_names_found$db[i]
  sps_accepted_names$db_code[tofill] <- sp_names_found$db_code[i]
  sps_accepted_names$method[tofill] <- sp_names_found$method[i]
}

sps_accepted_names$spp = sps_accepted_names$reported_name_fixed


## Keep original taxa information for those species we could not find a name at GBIF

pos <- which(is.na(sps_accepted_names$scientificName))
sps_accepted_names$scientificName[pos] <- sps_accepted_names$spp[pos]

for(i in 1:nrow(gen_div)){
  gen_div$spp_new[i] <- sps_accepted_names$scientificName[which(sps_accepted_names$spp == gen_div$spp[i])]
}

write.csv(gen_div, 
          here::here('Data/Fonseca_etal_2023_EvoLetters_harmo.txt'), row.names = FALSE)

9 Load harmonized genetic data

gen_div <- fread(here::here('Data/Fonseca_etal_2023_EvoLetters_harmo.txt'))

9.1 How many species with >1 gen div measurements ?

N_obs <- gen_div %>%
  group_by(spp) %>%
  tally() %>%
  dplyr::filter(n>1)
# total N
length(unique(gen_div$spp))
## [1] 35156
# total N with > 1 observation
nrow(N_obs)
## [1] 57
# these species
DT::datatable(merge(gen_div[,c("spp","Species")], N_obs, by = "spp"))

The duplicates comes from the grouping of species names in the original data. Fix by deleting F. fusca complex and T. caespitum x T. immigrans.

gen_div <- gen_div %>% filter(!Species %in% c("Formica-fusca_complex", "Tetramorium-caespitum_x_Tetramorium_immigrans"))

9.2 Stats

# Gen
gen_sps <- unique(gen_div$spp_new)

bsi_v1 <- intersect(biov1$spp, gen_sps)

# Break down
tot1 <- data.frame(N_species_with_gen_data = length(gen_sps),
                   v1_matches = length(bsi_v1))
tot1
##   N_species_with_gen_data v1_matches
## 1                   34702       2960
# N_species_with_gen_data v1_matches
#                   34702   2982

# Select only species in Bioshifts
gen_div <- gen_div %>% dplyr::filter(spp %in% bsi_v1)

9.3 Which species?

toplot_ <- gen_div %>%
  group_by(Group) %>%
  dplyr::summarise(Obs = length(spp),
                   Species = length(unique(spp))) %>%
  gather(var, N, -c(Group))

ggplot(toplot_, aes(x = Group, y = N))+
  geom_bar(stat="identity")+
  geom_text(aes(y=N, label=N, 
                hjust = ifelse(N<max(N),-.1,1.1)), vjust=0.2, size=3, 
            position = position_dodge(0.9))+
  theme_classic()+
  coord_flip()+
  facet_wrap(.~var, scales = "free", ncol=1)

9.4 Distribution of genetic diversity

unique(gen_div$Group)
##  [1] "Actinopterygii"  "Insecta"         "Arachnida"       "Magnoliopsida"  
##  [5] "Aves"            "Mammalia"        "Amphibia"        "Reptilia"       
##  [9] "Pinopsida"       "Polypodiopsida"  "Liliopsida"      "Bryopsida"      
## [13] "Lycopodiopsida"  "Elasmobranchii"  "Psilotopsida"    "Holocephali"    
## [17] "Florideophyceae"
to_plot <- gen_div %>%
  select(Group,Nucleotide_diversity, TajimasD) %>%
  filter(Group %in% c("Actinopterygii","Insecta","Aves", "Magnoliopsida", "Mammalia")) %>%
  gather(metric, value, -Group)

ggplot(to_plot, aes(value, fill = Group, color = Group))+
  geom_density(alpha = .3)+
  scale_color_viridis_d()+
  scale_fill_viridis_d()+
  facet_wrap(.~metric, scales = "free")

9.5 High genetic diversity with latitude?

toplot <- gen_div %>%
  mutate(Hemisphere = ifelse(Latitude<0, "South", "North"),
         lat_abs = abs(Latitude),
         Nucleotide_diversity = scale(Nucleotide_diversity),
         TajimasD = scale(TajimasD))

ggplot(toplot, aes(x = lat_abs, y = Nucleotide_diversity))+
  geom_point(alpha = .1)+
  stat_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

mod1 <- lm(Nucleotide_diversity~lat_abs, toplot)
summary(mod1)
## 
## Call:
## lm(formula = Nucleotide_diversity ~ lat_abs, data = toplot)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2743 -0.5904 -0.3532  0.1754  6.7954 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.528347   0.079678   6.631 3.96e-11 ***
## lat_abs     -0.011234   0.001648  -6.815 1.14e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9923 on 2912 degrees of freedom
## Multiple R-squared:  0.0157, Adjusted R-squared:  0.01536 
## F-statistic: 46.44 on 1 and 2912 DF,  p-value: 1.142e-11
ggplot(toplot, aes(x = lat_abs, y = Nucleotide_diversity, color = Group))+
  geom_point(alpha = .1)+
  stat_smooth(method = "lm")+
  facet_wrap(.~Group,scales = "free")
## `geom_smooth()` using formula = 'y ~ x'
## Warning in qt((1 - level)/2, df): NaNs produced
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

mod1 <- lm(Nucleotide_diversity~lat_abs*Group, toplot)
emtrends(mod1, ~ lat_abs*Group, var = "lat_abs")
##  lat_abs Group           lat_abs.trend      SE   df lower.CL upper.CL
##       47 Actinopterygii       -0.02001 0.00540 2882  -0.0306 -0.00941
##       47 Amphibia             -0.01105 0.01918 2882  -0.0487  0.02656
##       47 Arachnida            -0.00856 0.01281 2882  -0.0337  0.01655
##       47 Aves                 -0.02014 0.00329 2882  -0.0266 -0.01368
##       47 Bryopsida             0.00515 0.03441 2882  -0.0623  0.07262
##       47 Elasmobranchii        0.00251 0.01710 2882  -0.0310  0.03605
##       47 Florideophyceae        nonEst      NA   NA       NA       NA
##       47 Holocephali           0.00842 0.10358 2882  -0.1947  0.21152
##       47 Insecta              -0.00824 0.00251 2882  -0.0132 -0.00331
##       47 Liliopsida            0.00274 0.00670 2882  -0.0104  0.01587
##       47 Lycopodiopsida       -0.00808 0.08012 2882  -0.1652  0.14902
##       47 Magnoliopsida        -0.00205 0.00522 2882  -0.0123  0.00819
##       47 Mammalia             -0.02341 0.01687 2882  -0.0565  0.00966
##       47 Pinopsida             0.00341 0.04155 2882  -0.0781  0.08488
##       47 Polypodiopsida        0.00185 0.02034 2882  -0.0380  0.04174
##       47 Psilotopsida           nonEst      NA   NA       NA       NA
##       47 Reptilia             -0.03944 0.05755 2882  -0.1523  0.07340
## 
## Confidence level used: 0.95

Nucleotide diversty changes with latitude, with higher values at low latitues.

9.6 Spatial pattern

mundi <- rnaturalearth::ne_coastline(scale = 110, returnclass = "sv")
mundi <- crop(mundi, ext(c(-180,180,-60,90)))

# get vect gen div
vect_div <- terra::vect(gen_div %>%
                          dplyr::filter(spp %in% biov1$sp_name_std_v1),
                        geom=c("Longitude","Latitude"),crs=crs(mundi))

# get vect centroids bioshifts
vect_biov1 <- terra::vect(biov1 %>%
                            filter(spp %in% gen_div$spp),
                          geom=c("centroid.x","centroid.y"),crs=crs(mundi))

# get raster bioshifts shp files study areas
get_raster_bioshifts = "NO"
if(get_raster_bioshifts=="YES"){
  my_ext = terra::ext(mundi)
  my_crs = crs(mundi)
  rast_biov1 <- terra::rast(my_ext, crs = my_crs, res = 0.5)
  values(rast_biov1) <- 0
  
  fgdb <- "C:/Users/brunn/ShadowDrive/CreateGeodatabaseBioShifts/Data/Study_Areas_v1/Study_Areas.gdb"
  fc_list <- terra::vector_layers(fgdb)
  fc_list <- fc_list[which(fc_list %in% unique(as.data.frame(vect_biov1)$ID))]
  
  for(i in 1:length(fc_list)){ cat("\r",i,"from",length(fc_list))
    tmp = terra::vect(fgdb, layer = fc_list[i])
    tmp = terra::cells(rast_biov1,tmp)
    tmp_cell = tmp[,2]
    tmp_vals = rast_biov1[tmp_cell][,1]
    rast_biov1[tmp_cell] = tmp_vals+1
  }
  names(rast_biov1) <- "SA"
  rast_biov1[rast_biov1==0] <- NA
  
  writeRaster(rast_biov1, here::here("Data/raster_bioshifts_SA_fonseca.tif"), overwrite = TRUE)
  
} else {
  rast_biov1 <- terra::rast(here::here("Data/raster_bioshifts_SA_fonseca.tif"))
  rast_biov1 <- crop(rast_biov1, ext(mundi))
}

values_biov1 <- na.omit(values(rast_biov1))

plot_data <- gen_div %>%
  dplyr::filter(spp %in% biov1$sp_name_std_v1) %>%
  mutate(x = round(Longitude,3),
         y = round(Latitude,3))

gen_fig <- ggplot()+
  geom_spatvector(data=mundi)+
  geom_hex(data=plot_data,
           binwidth = c(4, 4),
           aes(x=x, y=y))+
  scale_fill_continuous(trans="log", 
                        alpha=.7,
                        labels=function(x) round(x,0),
                        breaks=function(x) exp(seq(log(min(x)),log(max(x)),length.out=5)),
                        type = "viridis", name = "Species with\ngenetic data")+
  theme_void()

gen_fig +
  ggtitle("Genetic diversity data")

bio_fig <- ggplot() +
  geom_spatraster(data=rast_biov1)+
  scale_fill_whitebox_c(
    name = "BioShifts\nstudy areas",
    palette = "viridi",
    alpha=.7,
    na.value = "white",
    # trans="log", 
    # labels=function(x) round(x,0),
    # breaks=function(x) exp(seq(log(min(x)),log(max(x)),length.out=5))
    )+
  geom_spatvector(data=mundi)+
  theme_void()

bio_fig +
  ggtitle("Bioshifts study areas")

bio_gen_fig <- bio_fig +
  geom_spatvector(data=vect_div, alpha = .3)

bio_gen_fig +
  ggtitle("Genetic diversity data & BioShifts study areas")

ggsave(filename = here::here("Figs/Map_sp-level_GD.pdf"), 
       plot = bio_gen_fig, 
       width = 12,
       height = 5,
       device = "pdf", 
       units = "in")
bio_gen_fig2 <- ggpubr::ggarrange(gen_fig,
                                  bio_fig,
                                  labels = c("A", "B"),
                                  ncol=1)

bio_gen_fig2

ggsave(filename = here::here("Figs/Map_sp-level_GD2.pdf"), 
       plot = bio_gen_fig2, 
       width = 10,
       height = 8,
       device = "pdf", 
       units = "in")

10 Merge genetic and bioshifts data

11 Check if gen data overlap with study areas

12 Get distance of genetic data to the study area in BioShifts

12.1 Save data

gen_data_v1_avg <- read.csv(here::here("Data/gen_data_final_fonseca.csv"))

13 Examples species in and out of study areas

# Example of species with genetic data outside range shift study area
i=1
spp2go <- gen_data_v1_avg$spp[i]
spp2go
## [1] "Abax parallelus"
SA_spp <- gen_data_v1_avg$ID[i]
SA_spp
## [1] "A34_P1"
fgdb <- "C:/Users/brunn/ShadowDrive/CreateGeodatabaseBioShifts/Data/Study_Areas_v1/Study_Areas.gdb"

leaflet() %>% addTiles() %>%
  addPolygons(data = terra::vect(fgdb, layer = SA_spp), color = "red") %>%
  addCircles(data = terra::vect(gen_div %>%
                                  dplyr::filter(spp == spp2go),
                                geom=c("Longitude","Latitude"),crs=crs(mundi)), 
             color = "blue") %>%
  addLegend(colors = c("red","blue"),labels =c("Study area","Genetic data"), position = "bottomright")
## Warning: SpatVector layer has inconsistent datum (+proj=longlat +ellps=WGS84 +towgs84=0,0,0,0,0,0,0 +no_defs).
## Need '+proj=longlat +datum=WGS84'
## Warning: [vect] Z coordinates ignored
# Example of species with genetic data within range shift study area
i=2

spp2go <- gen_data_v1_avg$spp[i]
spp2go
## [1] "Abies balsamea"
SA_spp <- gen_data_v1_avg$ID[i]
SA_spp
## [1] "A40_P1"
leaflet() %>% addTiles() %>%
  addPolygons(data = terra::vect(fgdb, layer = SA_spp), color = "red") %>%
  addCircles(data = terra::vect(gen_div %>%
                                  dplyr::filter(spp == spp2go),
                                geom=c("Longitude","Latitude"),crs=crs(mundi)), 
             color = "blue") %>%
  addLegend(colors = c("red","blue"),labels =c("Study area","Genetic data"), position = "bottomright")
## Warning: SpatVector layer has inconsistent datum (+proj=longlat +ellps=WGS84 +towgs84=0,0,0,0,0,0,0 +no_defs).
## Need '+proj=longlat +datum=WGS84'

## Warning: [vect] Z coordinates ignored

14 Explore data

# n range shifts
nrow(gen_data_v1_avg)
## [1] 10679

14.1 Which species?

# N species
length(unique(gen_data_v1_avg$spp))
## [1] 2908
# Use only plus plus or minus minus
table(gen_data_v1_avg$shift_vel_sign)
## 
## negneg negpos posneg pospos 
##    190   3906    283   6300
# N shifts and species
gen_data_v1_avg %>% 
  filter(shift_vel_sign == "pospos" | shift_vel_sign == "negneg") %>%
  summarise(N_Shift = length(spp),
            N_Species = length(unique(spp)))
##   N_Shift N_Species
## 1    6490      2408
toplot_ <- gen_data_v1_avg %>%
  filter(shift_vel_sign == "pospos" | shift_vel_sign == "negneg") %>%
  group_by(Group) %>%
  dplyr::summarise(N_Shift = length(spp),
                   Species = length(unique(spp))) %>%
  gather(var, N, -c(Group))

ggplot(toplot_, aes(x = Group, y = N))+
  geom_bar(stat="identity")+
  geom_text(aes(y=N, label=N, 
                hjust = ifelse(N<1000,-.1,1.1)), vjust=0.2, size=3, 
            position = position_dodge(0.9))+
  theme_classic()+
  coord_flip()+
  facet_wrap(.~var, scales = "free", ncol=1)

14.2 Compare shift values of the full bioshifts vs the subset (genetic) dataset

toplot <- rbind(
  data.frame(dataset = "Full", 
             lags = biov1$lag2, 
             shifts = biov1$SHIFT),
  data.frame(dataset = "Subset", 
             lags = gen_data_v1_avg$lag2, 
             shifts = gen_data_v1_avg$SHIFT))

# lags
ggplot(toplot, aes(x = dataset, y=lags, fill = dataset, color = dataset))+
  geom_boxplot(alpha = .5, outlier.shape = NA)+
  scale_y_continuous(limits = quantile(toplot$lags, c(0.1, 0.9), na.rm = T))
## Warning: Removed 7888 rows containing non-finite values (`stat_boxplot()`).

tapply(toplot$lags, toplot$dataset, summary)
## $Full
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -145.1653   -0.4375    0.6237    0.3779    2.1868   13.1259 
## 
## $Subset
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -145.16531   -0.97034    0.51824    0.09531    2.15833   13.12588
mod1 <- aov(lags~dataset, data = toplot)
summary(mod1)
##                Df Sum Sq Mean Sq F value   Pr(>F)    
## dataset         1    628   628.0   27.55 1.53e-07 ***
## Residuals   40490 922790    22.8                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# shift
ggplot(toplot, aes(x = dataset, y=shifts, fill = dataset, color = dataset))+
  geom_boxplot(alpha = .5, outlier.shape = NA)+
  scale_y_continuous(limits = quantile(toplot$shifts, c(0.1, 0.9), na.rm = T))
## Warning: Removed 8093 rows containing non-finite values (`stat_boxplot()`).

tapply(toplot$shifts, toplot$dataset, summary)
## $Full
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -119.1696   -0.4468    0.3056    1.1671    2.4345  146.3000 
## 
## $Subset
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -94.6628  -0.3001   0.3848   1.3481   2.7390 146.3000
mod1 <- aov(shifts~dataset, data = toplot)
summary(mod1)
##                Df  Sum Sq Mean Sq F value  Pr(>F)   
## dataset         1     258  257.53   8.538 0.00348 **
## Residuals   40490 1221333   30.16                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

There are difference in mean values of lags, but no difference in mean shift values, between the full and subset datasets.

15 General pattern

# gen_data_v1_avg <- gen_data_v1_avg_fon
# gen_data_v1_avg$Nucleotide_diversity <- gen_data_v1_avg$He

gen_data_v1_avg %>%
  dplyr::filter(Type == "LAT",
                shift_vel_sign == "pospos" | shift_vel_sign == "negneg") %>%
  ggplot(aes(x = Nucleotide_diversity, y = log(abs(SHIFT)), color = lag))+
  ggtitle("Latitude")+
  xlab("Genetic diversity")+
  ylab("Range shift (km/year)") +
  scale_color_viridis_c(direction = -1)+
  geom_point(alpha = .5, size = 3)+
  theme_bw()+
  geom_smooth(method = "lm", color = "black", size=1)+
  stat_smooth(geom="line", method = "lm", alpha=0.3, se = FALSE, 
              aes(by=as.factor(Article_ID)), color = "black")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in stat_smooth(geom = "line", method = "lm", alpha = 0.3, se = FALSE, :
## Ignoring unknown aesthetics: by
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).

gen_data_v1_avg %>%
  dplyr::filter(Type == "ELE",
                shift_vel_sign == "pospos" | shift_vel_sign == "negneg") %>%
  ggplot(aes(x = Nucleotide_diversity, y = log(abs(SHIFT)), color = lag))+
  ggtitle("Elevation")+
  xlab("Genetic diversity")+
  ylab("Range shift (km/year)") +
  scale_color_viridis_c(direction = -1)+
  geom_point(alpha = .5, size = 3)+
  theme_bw()+
  geom_smooth(method = "lm", color = "black", size=1)+
  stat_smooth(geom="line", method = "lm", alpha=0.3, se = FALSE, 
              aes(by=as.factor(Article_ID)), color = "black")
## Warning in stat_smooth(geom = "line", method = "lm", alpha = 0.3, se = FALSE, :
## Ignoring unknown aesthetics: by
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).

16 By Param

gen_data_v1_avg %>%
  dplyr::filter(Type == "LAT",
                shift_vel_sign == "pospos" | shift_vel_sign == "negneg") %>%
  ggplot(aes(x = Nucleotide_diversity, y = log(abs(SHIFT)), color = lag))+
  ggtitle("Latitude")+
  xlab("Genetic diversity")+
  ylab("Range shift (km/year)") +
  scale_color_viridis_c(direction = -1)+
  geom_point(alpha = .5, size = 3)+
  theme_bw()+
  geom_smooth(method = "lm", color = "black", size=1)+
  stat_smooth(geom="line", method = "lm", alpha=0.3, se = FALSE, 
              aes(by=as.factor(Article_ID)), color = "black")+
  facet_wrap(.~Param, scales = "free", ncol = 3)
## Warning in stat_smooth(geom = "line", method = "lm", alpha = 0.3, se = FALSE, :
## Ignoring unknown aesthetics: by
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).

gen_data_v1_avg %>%
  dplyr::filter(Type == "ELE") %>%
  ggplot(aes(x = Nucleotide_diversity, y = log(abs(SHIFT)), color = lag))+
  ggtitle("Elevation")+
  xlab("Genetic diversity")+
  ylab("Range shift (km/year)") +
  scale_color_viridis_c(direction = -1)+
  geom_point(alpha = .5, size = 3)+
  theme_bw()+
  geom_smooth(method = "lm", color = "black", size=1)+
  stat_smooth(geom="line", method = "lm", alpha=0.3, se = FALSE, 
              aes(by=as.factor(Article_ID)), color = "black")+
  facet_wrap(.~Param, scales = "free", ncol = 3)
## Warning in stat_smooth(geom = "line", method = "lm", alpha = 0.3, se = FALSE, :
## Ignoring unknown aesthetics: by
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 222 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 222 rows containing non-finite values (`stat_smooth()`).

17 By Group

gen_data_v1_avg %>%
  dplyr::filter(Type == "LAT") %>%
  ggplot(aes(x = Nucleotide_diversity, y = log(abs(SHIFT)), color = lag))+
  ggtitle("Latitude")+
  xlab("Genetic diversity")+
  ylab("Range shift (km/year)") +
  scale_color_viridis_c(direction = -1)+
  geom_point(alpha = .5, size = 3)+
  theme_bw()+
  geom_smooth(method = "lm", color = "black", size=1)+
  stat_smooth(geom="line", method = "lm", alpha=0.3, se = FALSE, 
              aes(by=as.factor(Article_ID)), color = "black")+
  facet_wrap(.~Group, scales = "free", ncol = 3)
## Warning in stat_smooth(geom = "line", method = "lm", alpha = 0.3, se = FALSE, :
## Ignoring unknown aesthetics: by
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 408 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 408 rows containing non-finite values (`stat_smooth()`).

gen_data_v1_avg %>%
  dplyr::filter(Type == "ELE") %>%
  ggplot(aes(x = Nucleotide_diversity, y = log(abs(SHIFT)), color = lag))+
  ggtitle("Elevation")+
  xlab("Genetic diversity")+
  ylab("Range shift (km/year)") +
  scale_color_viridis_c(direction = -1)+
  geom_point(alpha = .5, size = 3)+
  theme_bw()+
  geom_smooth(method = "lm", color = "black", size=1)+
  stat_smooth(geom="line", method = "lm", alpha=0.3, se = FALSE, 
              aes(by=as.factor(Article_ID)), color = "black")+
  facet_wrap(.~Group, scales = "free", ncol = 3)
## Warning in stat_smooth(geom = "line", method = "lm", alpha = 0.3, se = FALSE, :
## Ignoring unknown aesthetics: by
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 222 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 222 rows containing non-finite values (`stat_smooth()`).

18 By Group / Param

gen_data_v1_avg %>%
  dplyr::filter(Type == "LAT") %>%
  ggplot(aes(x = Nucleotide_diversity, y = log(abs(SHIFT)), color = lag))+
  ggtitle("Latitude")+
  xlab("Genetic diversity")+
  ylab("Range shift (km/year)") +
  scale_color_viridis_c(direction = -1)+
  geom_point(alpha = .5, size = 3)+
  theme_bw()+
  geom_smooth(method = "lm", color = "black", size=1)+
  stat_smooth(geom="line", method = "lm", alpha=0.3, se = FALSE, 
              aes(by=as.factor(Article_ID)), color = "black")+
  facet_wrap(Group~Param, scales = "free")
## Warning in stat_smooth(geom = "line", method = "lm", alpha = 0.3, se = FALSE, :
## Ignoring unknown aesthetics: by
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 408 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 408 rows containing non-finite values (`stat_smooth()`).

gen_data_v1_avg %>%
  dplyr::filter(Type == "ELE") %>%
  ggplot(aes(x = Nucleotide_diversity, y = log(abs(SHIFT)), color = lag))+
  ggtitle("Elevation")+
  xlab("Genetic diversity")+
  ylab("Range shift (km/year)") +
  scale_color_viridis_c(direction = -1)+
  geom_point(alpha = .5, size = 3)+
  theme_bw()+
  geom_smooth(method = "lm", color = "black", size=1)+
  stat_smooth(geom="line", method = "lm", alpha=0.3, se = FALSE, 
              aes(by=as.factor(Article_ID)), color = "black")+
  facet_wrap(Param~Group, scales = "free")
## Warning in stat_smooth(geom = "line", method = "lm", alpha = 0.3, se = FALSE, :
## Ignoring unknown aesthetics: by
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 222 rows containing non-finite values (`stat_smooth()`).
## Warning in qt((1 - level)/2, df): NaNs produced
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 222 rows containing non-finite values (`stat_smooth()`).
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf