Correlates of cultural awareness in birds

Libraries

Various r libraries we need

library(tidyverse)
library(ggplot2)
library(plotly)
library(dplyr)
library(readxl)   
library(sjPlot)
library(data.table)
library(MuMIn)

Load datasets

Here we combine a number of datasets.

The first all_iucn_titles.csv is a combined data from onezoom which combines columns for iucn taxonid, binomial, wikidata id (from Millard et al)

#import dataset all icun titles
iucn_og <-read.csv("all_iucn_titles.csv")

# Subset iucn data for just Aves and only from English Wikipedia (for now)
iucn_bird_data = iucn_og %>% #neater version
  filter(class_name == "AVES")%>%
  filter(site == "enwiki")

Wikipedia data

The second is extracted monthly total page views from wikipedia from (from Millard et al)

# Total views (this adds a row for wikipedia view per month)
wiki_views = readRDS("total_monthly_views_10-languages.rds")

# Get just Aves from English wikipedia
wiki_views_birds_es = wiki_views[['^es_']]$aves
wiki_views_birds_es$language = "es"
wiki_views_birds_fr = wiki_views[["^fr_"]]$aves
wiki_views_birds_fr$language = "fr"
wiki_views_birds_de = wiki_views[['^de_']]$aves
wiki_views_birds_de$language = "de"
wiki_views_birds_ja = wiki_views[['^ja_']]$aves
wiki_views_birds_ja$language = "ja"
wiki_views_birds_it = wiki_views[['^it_']]$aves
wiki_views_birds_it$language = "it"
wiki_views_birds_ar = wiki_views[['^ar_']]$aves
wiki_views_birds_ar$language = "ar"
wiki_views_birds_ru = wiki_views[['^ru_']]$aves
wiki_views_birds_ru$language = "ru"
wiki_views_birds_pt = wiki_views[['^pt_']]$aves
wiki_views_birds_pt$language = "pt"
wiki_views_birds_zh = wiki_views[['^zh_']]$aves
wiki_views_birds_zh$language = "zh"
wiki_views_birds_en = wiki_views[['^en_']]$aves
wiki_views_birds_en$language = "en"

wiki_views_birds = rbind(wiki_views_birds_es,
                         wiki_views_birds_fr,
                         wiki_views_birds_de,
                         wiki_views_birds_ja,
                         wiki_views_birds_it,
                         wiki_views_birds_ar,
                         wiki_views_birds_ru,
                         wiki_views_birds_pt,
                         wiki_views_birds_zh,
                         wiki_views_birds_en)

Calcualte average monthtly page views

Distil monthly total page views (which were accidentally called ‘av_views’) into average total monthly pageviews (av_total_views) per species and the same but only for the summer months (April - Sep): av_summer_views

wiki_views_birds_monthly = wiki_views_birds %>% 
  group_by(q_wikidata, article, language) %>% 
  dplyr::summarise(av_total_views = mean(av_views), 
                   av_summer_views = mean(av_views[month %in% c("04", "05", "06", "07", "08", "09")]))
## `summarise()` has grouped output by 'q_wikidata', 'article'. You can override
## using the `.groups` argument.

Life history data

Next is trait data, here using AVONET (Tobias et al)

# Load life history data

#Life_hist_char = fread("Life-history characteristics of European birds.txt")

AVONET_data = fread("/Users/rob/Dropbox/R/british_birds_cultural_awareness_2022/AVONET/ELEData/TraitData/AVONET1_BirdLife.csv")


p = ggplot(AVONET_data, aes(x = log10(Mass), y = log10(Range.Size), color = Trophic.Level, text = Species1)) +
  geom_point() + theme_minimal() +
  scale_color_brewer(type = "div", palette = "Spectral") + 
  coord_equal() +
  xlab("Body Mass (log10))") + 
  ylab("Range Size (log10)") + 
  labs(title = "Trail Data from AVONET") + facet_wrap(~Habitat)

fig <- ggplotly(p, width = 1200, height = 1200)
fig

Color data

Then color data from Cooney et al (https://www.nature.com/articles/s41559-022-01714-1)

# Colour data
colour_data = read.csv("Colorfull_Birds_NatEcoEvo_CooneyEtal.csv")
colour_data$Binomial = gsub("_", " ", colour_data$Binomial)


p = ggplot(colour_data, aes(x = log10(VolumeVS), y = log10(VolumeUVS), color = log10(VolumeVS*VolumeUVS), text = Binomial)) +
  geom_point() + theme_minimal() +
  scale_color_viridis_c() + 
  coord_equal() +
  xlab("Visual spectrum volume (VolumeVS)") + 
  ylab("Ultraviolet spectrum volume (VolumeVS)") + 
  labs(title = "Color Volume vs UV Volume", subtitle = "Avian color data from Cooney et al")

fig <- ggplotly(p, width = 1200, height = 1200)
fig
p2 = ggplot(colour_data, aes(x = log10(LociVS), y = log10(LociUVS), color = log10(LociVS*LociUVS), text = Binomial)) +
  geom_point() + theme_minimal() +
  scale_color_viridis_c() + 
  coord_equal() +
  xlab("Visual spectrum volume (VolumeVS)") + 
  ylab("Ultraviolet spectrum volume (VolumeVS)") + 
  labs(title = "Color Loci vs UV Loci", subtitle = "Avian color data from Cooney et al")

fig2 <- ggplotly(p2, width = 1200, height = 1200)
fig2
# 
# 
# View(Life_hist_char)

Combine datasets

merge each of these datasets together, checking where we lose species

# Merge AVONET trait data with out list of species
lifehist_merge_global = dplyr::left_join(iucn_bird_data, AVONET_data, by = c("scientific_name" = "Species1"))

# Merge this with the colour data
lifehist_merge_col_global = left_join(lifehist_merge_global, colour_data, by = c("scientific_name" = "Binomial"))

# What's missing traits?
birds_with_no_trait_data = subset(lifehist_merge_col_global, is.na(Mass)) %>%
  select(scientific_name, title) %>%
  distinct()

cat("Number of birds with no trait data")
## Number of birds with no trait data
nrow(birds_with_no_trait_data)
## [1] 52
# What's missing color?
birds_with_no_colour_data = subset(lifehist_merge_col_global, is.na(LociUVS)) %>%
  select(scientific_name, title) %>%
  distinct()

cat("Number of birds with no colour data")
## Number of birds with no colour data
nrow(birds_with_no_colour_data)
## [1] 5507
# Merge wiki data with species with life history
merged_data_LH_col_global = left_join(lifehist_merge_col_global, wiki_views_birds_monthly, by = "q_wikidata")

Make model data

Make a dataset for modelling, transforming some variables and selecting just the columns we want.

As color data is available for fewer species than the trait data, make two datasets one with colour data and one without

model_data_col = merged_data_LH_col_global %>% mutate(
  log_av_total_views = log10(av_total_views + 1),
  log_av_summer_views = log10(av_summer_views + 1),
  log10_Range.size = log10(Range.Size + 1),
  log10_Mass = log10(Mass + 1),
  Migration = factor(Migration),
  
) %>% select(name, article, language,
             log_av_total_views, log10_Range.size, Habitat, Trophic.Level,
            log10_Mass, LociUVS, LociVS, VolumeUVS, VolumeVS,
            Migration, Min.Latitude, Max.Latitude)

model_data_nocol = model_data_col %>% select(name, article, language,
                                             log_av_total_views, log10_Range.size, 
                                             Habitat, Trophic.Level, log10_Mass,
                                             Migration, Min.Latitude, Max.Latitude)


cat("Number of birds for models with color data")
## Number of birds for models with color data
model_data_col = model_data_col %>% filter(language == "en") %>% drop_na()
nrow(model_data_col)
## [1] 6958
#[1] 6958
cat("Number of birds for models not including color data")
## Number of birds for models not including color data
model_data_nocol = model_data_nocol %>% filter(language == "en") %>% drop_na()
nrow(model_data_nocol)
## [1] 12358
#[1] 12368

Model exploration

Here, defining a ‘complete’ model with the various interactions we think are interesting, then dredge to explore sub models and their quality.

library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
model_data_col2 = subset(model_data_col, language %in% c("en"))

full_model_global = glm(log_av_total_views ~ scale(log10_Range.size)*scale(log10_Mass) + 
                                        Habitat*Trophic.Level + 
                                        scale(LociVS)*scale(VolumeVS) + 
                                        Migration + scale(Min.Latitude) + scale(Max.Latitude),  
                                     data = model_data_col, na.action = "na.fail")


md = dredge(full_model_global)
## Fixed term is "(Intercept)"
cat("Top 10 models from dredge")
## Top 10 models from dredge
head(md, 10)
## Global model call: glm(formula = log_av_total_views ~ scale(log10_Range.size) * 
##     scale(log10_Mass) + Habitat * Trophic.Level + scale(LociVS) * 
##     scale(VolumeVS) + Migration + scale(Min.Latitude) + scale(Max.Latitude), 
##     data = model_data_col, na.action = "na.fail")
## ---
## Model selection table 
##      (Int) Hbt Mgr   scl(LVS) scl(l10_Mss) scl(l10_Rng.siz) scl(Max.Ltt)
## 4096 2.679   +   +  0.0009678       0.1524         -0.04217       0.4399
## 3072 2.673   +   +  0.0143600       0.1507         -0.04257       0.4396
## 3068 2.672   +   +                  0.1499         -0.04216       0.4390
## 2048 2.671   +   +  0.0019550       0.1515         -0.04307       0.4418
## 2944 2.673   +   +  0.0407200       0.1507         -0.04284       0.4405
## 1024 2.665   +   +  0.0151000       0.1499         -0.04345       0.4416
## 1020 2.665   +   +                  0.1490         -0.04303       0.4410
## 896  2.665   +   +  0.0407300       0.1499         -0.04369       0.4424
## 3584 2.513   +   + -0.0004024       0.1513         -0.04072       0.4384
## 2032 2.718   +   +  0.0001145       0.1511                        0.3909
##      scl(Min.Ltt) scl(VVS) Trp.Lvl Hbt:Trp.Lvl scl(LVS):scl(VVS)
## 4096      -0.2430  0.07015       +           +         -0.005231
## 3072      -0.2428  0.03260       +           +                  
## 3068      -0.2423  0.04350       +           +                  
## 2048      -0.2441  0.06856       +           +         -0.005137
## 2944      -0.2438                +           +                  
## 1024      -0.2439  0.03169       +           +                  
## 1020      -0.2433  0.04315       +           +                  
## 896       -0.2449                +           +                  
## 3584      -0.2405  0.07072       +                     -0.005390
## 2032      -0.1987  0.07003       +           +         -0.005289
##      scl(l10_Mss):scl(l10_Rng.siz) df    logLik   AICc delta weight
## 4096                       0.02053 40 -3445.295 6971.1  0.00  0.978
## 3072                       0.02032 39 -3450.597 6979.6  8.58  0.013
## 3068                       0.02050 38 -3452.136 6980.7  9.64  0.008
## 2048                               39 -3454.547 6987.5 16.48  0.000
## 2944                       0.01980 38 -3459.037 6994.5 23.44  0.000
## 1024                               38 -3459.648 6995.7 24.66  0.000
## 1020                               37 -3461.347 6997.1 26.04  0.000
## 896                                37 -3467.611 7009.6 38.56  0.000
## 3584                       0.02331 23 -3482.674 7011.5 40.44  0.000
## 2032                               38 -3471.500 7019.4 48.36  0.000
## Models ranked by AICc(x)
#top_models <- get.models(md, subset = delta < 4)

Explore top models

Extract the top set of models (95% of the AICc) or the ‘best’ model. Here I chose the 2nd model (Which is the full model with all the fixed effects present to explore its coefficients.)

top_model = get.models(md, subset = 1)[[1]]

top_model2 = get.models(md, subset = 2)[[1]]
# 
plot_model(top_model2, type = "std") 
## Warning: Model matrix is rank deficient. Parameters
##   HabitatWoodland:Trophic.LevelHerbivore were not estimable.

plot_model(top_model2) 
## Warning: Model matrix is rank deficient. Parameters
##   HabitatWoodland:Trophic.LevelHerbivore were not estimable.

# plot_model(top_model, type = "pred", terms = c("log10_Range.size", "log10_Mass"))
# plot_model(top_model, type = "pred", terms = c("Habitat", "Trophic.Level"))
# plot_model(top_model, type = "pred", terms = c("Migration", "Min.Latitude", "Max.Latitude"))
# plot_model(top_model, type = "pred", terms = c("VolumeUVS", "VolumeVS"))
# plot_model(top_model, type = "pred", terms = c("LociVS", "LociUVS"))

plot_model(top_model2, type = "pred", terms = c("log10_Range.size", "log10_Mass"))

Larger birds are generally more popular, but this graph suggests large range-size are negatively associated… worried this might be confounded by min/max latitute below. ALso some evidence that large birds becoming more and more popular as their range size increases

plot_model(top_model2, type = "pred", terms = c("Habitat", "Trophic.Level")) + coord_flip()

Omnivores and Herbivores are generally more popular than carnivores (although perhaps reversed in Rock habitats).

plot_model(top_model2, type = "pred", terms = c("Migration", "Min.Latitude", "Max.Latitude"))

But confusing this one - looks like migratory birds (3) are a little more popular than sedentary (1), but birds with more northerly max latitudues are much more popular, but that’s more clear with bird with more southerly minimum latitudes - so perhaps this is just a proxy for range-size?

plot_model(top_model2, type = "pred", terms = c("VolumeVS"))

More colourful birds are more popular, there’s a weak interaction between UV/visible ‘volume’

plot_model(top_model2, type = "pred", terms = c("LociVS"))

#plot_model(top_model2, type = "resid")

Birds with more colours (LociUVS) don’t appear more popular - wonder why this isn’t lost in the dredge?

Big coefficient table from this model, R^2 is about 0.25

#plot_model(top_model, type = "int")
tab_model(top_model2, show.aicc = TRUE)
## Warning: Model matrix is rank deficient. Parameters
##   HabitatWoodland:Trophic.LevelHerbivore were not estimable.
  log_av_total_views
Predictors Estimates CI p
(Intercept) 2.67 2.28 – 3.07 <0.001
Habitat [Desert] -0.68 -1.11 – -0.26 0.001
Habitat [Forest] -0.79 -1.19 – -0.40 <0.001
Habitat [Grassland] -0.79 -1.18 – -0.39 <0.001
Habitat [Human Modified] -0.59 -1.00 – -0.17 0.005
Habitat [Riverine] -0.61 -1.02 – -0.21 0.003
Habitat [Rock] -0.64 -1.04 – -0.23 0.002
Habitat [Shrubland] -0.70 -1.09 – -0.30 0.001
Habitat [Wetland] -0.78 -1.18 – -0.38 <0.001
Habitat [Woodland] -0.63 -1.02 – -0.23 0.002
Migration [2] 0.12 0.08 – 0.15 <0.001
Migration [3] 0.35 0.32 – 0.39 <0.001
LociVS 0.01 -0.00 – 0.03 0.080
log10 Mass 0.15 0.14 – 0.16 <0.001
log10 Range size -0.04 -0.06 – -0.03 <0.001
Max Latitude 0.44 0.42 – 0.46 <0.001
Min Latitude -0.24 -0.27 – -0.22 <0.001
VolumeVS 0.03 0.02 – 0.05 <0.001
Trophic Level [Herbivore] 0.27 0.19 – 0.36 <0.001
Trophic Level [Omnivore] -0.23 -0.78 – 0.33 0.423
Habitat [Desert] *
Trophic Level [Herbivore]
0.01 -0.24 – 0.26 0.922
Habitat [Forest] *
Trophic Level [Herbivore]
-0.15 -0.24 – -0.06 0.001
Habitat [Grassland] *
Trophic Level [Herbivore]
-0.08 -0.19 – 0.04 0.205
Habitat [Human Modified]
* Trophic Level
[Herbivore]
0.00 -0.24 – 0.24 0.999
Habitat [Riverine] *
Trophic Level [Herbivore]
-0.42 -0.83 – -0.00 0.048
Habitat [Rock] * Trophic
Level [Herbivore]
-0.36 -0.57 – -0.15 0.001
Habitat [Shrubland] *
Trophic Level [Herbivore]
0.01 -0.09 – 0.11 0.849
Habitat [Wetland] *
Trophic Level [Herbivore]
-0.01 -0.17 – 0.16 0.932
Habitat [Desert] *
Trophic Level [Omnivore]
0.32 -0.29 – 0.93 0.299
Habitat [Forest] *
Trophic Level [Omnivore]
0.32 -0.23 – 0.88 0.257
Habitat [Grassland] *
Trophic Level [Omnivore]
0.40 -0.16 – 0.96 0.164
Habitat [Human Modified]
* Trophic Level
[Omnivore]
0.78 0.19 – 1.37 0.010
Habitat [Riverine] *
Trophic Level [Omnivore]
-0.33 -1.12 – 0.46 0.417
Habitat [Rock] * Trophic
Level [Omnivore]
0.27 -0.32 – 0.85 0.369
Habitat [Shrubland] *
Trophic Level [Omnivore]
0.34 -0.22 – 0.90 0.231
Habitat [Wetland] *
Trophic Level [Omnivore]
0.45 -0.13 – 1.03 0.130
Habitat [Woodland] *
Trophic Level [Omnivore]
0.31 -0.25 – 0.87 0.281
log10 Mass * log10 Range
size
0.02 0.01 – 0.03 <0.001
Observations 6958
R2 0.477
AICc 6979.645
# # Average of 95% confidence set:
# avgmod.95p <- model.avg(md, cumsum(weight) <= .95, fit = TRUE)
# #confint(avgmod.95p)
# tab_model(avgmod.95p, show.aicc = TRUE)

So perhaps conspicuousness/noticability drives popularity in global birds… which is interesting and feels like it’s obvious and makes sense. If these features of ‘conspicuousness’ drive the overall popularity, it might be interesting to look at the residuals from this..

So we can extract the residuals and plot the top and bottom 100 species in terms of their residuals. So the 100 most unexpectedly popular and 100 most unexpectedly 100 unpopular birds.

model_data_col_sorted = model_data_col %>%
  mutate(global_resid = resid(top_model2),
         sorted_article = fct_reorder(article, global_resid, .desc = TRUE),
  )

ggplot(rbind(head(model_data_col_sorted, 100), tail(model_data_col_sorted, 100)), aes(x = sorted_article, y = global_resid, fill = sign(global_resid))) + 
        geom_bar(stat = "identity") + 
        theme_minimal() + 
        coord_flip()

library(performance)
check_model(top_model2)

UK Species

Restict species down to just those in UK list

# Get UK bird list
birdlist_rsbp_og <- read.csv("birdlist.csv")

# Restrict UK bird list for only categories A/AC2  # ? Is this ok?
birdlist_rsbp = birdlist_rsbp_og %>%
  filter(category == "A" | category == "AC2")

merged_data_LH_col_uk = merged_data_LH_col_global %>%
  filter(name %in% birdlist_rsbp$name)

UK Species model data

Get a dataset for UK species (with and without color)

uk_model_data_col = merged_data_LH_col_uk %>% mutate(
  log_av_total_views = log10(av_total_views + 1),
  log_av_summer_views = log10(av_summer_views + 1),
  log10_Range.size = log10(Range.Size + 1),
  log10_Mass = log10(Mass + 1),
  Migration = factor(Migration)
) %>% select(name, article, log_av_total_views, language, log10_Range.size, Habitat, Trophic.Level,
            log10_Mass, LociUVS, LociVS, VolumeUVS, VolumeVS,
            Migration, Min.Latitude, Max.Latitude)

uk_model_data_nocol = uk_model_data_col %>% select(name, article, language, log_av_total_views, log10_Range.size, 
                                             Habitat, Trophic.Level, log10_Mass,
                                             Migration, Min.Latitude, Max.Latitude)



uk_model_data_col = uk_model_data_col  %>% filter(language == "en") %>% drop_na()
nrow(uk_model_data_col)
## [1] 350
#[1] 12368
uk_model_data_nocol = uk_model_data_nocol  %>% filter(language == "en") %>% drop_na()
nrow(uk_model_data_nocol)
## [1] 570
#[1] 12368

Model exploration

Define and dredge model for UK species

full_model_uk = glm(log_av_total_views ~ scale(log10_Range.size)*scale(log10_Mass) + 
                                        Habitat*Trophic.Level + 
                                        scale(LociVS)*scale(VolumeVS) + 
                                        Migration + scale(Min.Latitude) + scale(Max.Latitude),  
                                     data = uk_model_data_col, na.action = "na.fail")



md_uk = dredge(full_model_uk)
## Fixed term is "(Intercept)"
head(md_uk, 10)
## Global model call: glm(formula = log_av_total_views ~ scale(log10_Range.size) * 
##     scale(log10_Mass) + Habitat * Trophic.Level + scale(LociVS) * 
##     scale(VolumeVS) + Migration + scale(Min.Latitude) + scale(Max.Latitude), 
##     data = uk_model_data_col, na.action = "na.fail")
## ---
## Model selection table 
##      (Int) Hbt Mgr scl(LVS) scl(l10_Mss) scl(l10_Rng.siz) scl(Max.Ltt)
## 128  2.462   +   +  0.09167       0.1785          0.08698       0.1380
## 2176 2.448   +   +  0.09093       0.1875          0.08725       0.1343
## 256  2.467   +   +  0.10380       0.1770          0.08765       0.1386
## 2304 2.452   +   +  0.10390       0.1860          0.08796       0.1349
## 1280 2.458   +   +  0.09751       0.1715          0.08855       0.1391
## 384  2.460   +   +  0.09102       0.1792          0.08719       0.1375
## 112  2.569   +   +  0.09712       0.1844                        0.1968
## 3328 2.444   +   +  0.09766       0.1805          0.08885       0.1354
## 126  2.436   +      0.09715       0.1934          0.09186       0.1339
## 2432 2.448   +   +  0.09075       0.1902          0.08696       0.1348
##      scl(Min.Ltt) scl(VVS) Trp.Lvl scl(LVS):scl(VVS)
## 128       -0.1591                                   
## 2176      -0.1601                                   
## 256       -0.1583 -0.01476                          
## 2304      -0.1593 -0.01579                          
## 1280      -0.1575 -0.02491                  0.008481
## 384       -0.1595                +                  
## 112       -0.1964                                   
## 3328      -0.1584 -0.02584                  0.008407
## 126       -0.1721                                   
## 2432      -0.1606                +                  
##      scl(l10_Mss):scl(l10_Rng.siz) df   logLik  AICc delta weight
## 128                                17 -157.536 350.9  0.00  0.379
## 2176                      -0.02021 18 -157.046 352.2  1.24  0.204
## 256                                18 -157.463 353.0  2.08  0.134
## 2304                      -0.02044 19 -156.962 354.2  3.31  0.072
## 1280                               19 -157.206 354.7  3.80  0.057
## 384                                19 -157.525 355.4  4.44  0.041
## 112                                16 -161.113 355.9  4.94  0.032
## 3328                      -0.02034 20 -156.709 356.0  5.06  0.030
## 126                                15 -162.343 356.1  5.21  0.028
## 2432                      -0.02108 20 -157.015 356.6  5.67  0.022
## Models ranked by AICc(x)

Color doesn’t seem to be as important for these UK species (maybe as there’s not such a range).

Best models (UK Species)

top_model_uk = get.models(md_uk, subset = 1)[[1]]

plot_model(top_model_uk)

plot_model(top_model_uk, type = "pred", terms = c("log10_Range.size", "log10_Mass"))

Larger birds, with greater range sizes are generally more popular

plot_model(top_model_uk, type = "pred", terms = c("Habitat"))

Species from human modified habitats are more popular (there don’t appear to be any ‘coastal’ birds??? The intercept has switched to ‘Desert’)

plot_model(top_model_uk, type = "pred", terms = c("Migration", "Min.Latitude", "Max.Latitude"))

But confusing this one - looks like migratory birds (3) are a little less popular than sedentary (1), but birds with more northerly max latitudues are more popular. Again, possibly complicated with range-size..

plot_model(top_model_uk, type = "pred", terms = c("LociVS"))

Birds with more colors present (higher UV Loci) appear to be more popular

Big coefficient table from this model, R^2 is about 0.25

#plot_model(top_model, type = "int")
tab_model(top_model_uk, show.aicc = TRUE)
  log_av_total_views
Predictors Estimates CI p
(Intercept) 2.46 1.90 – 3.02 <0.001
Habitat [Forest] 0.84 0.27 – 1.42 0.004
Habitat [Grassland] 0.54 -0.05 – 1.12 0.072
Habitat [Human Modified] 1.02 0.42 – 1.63 0.001
Habitat [Riverine] 0.65 -0.01 – 1.30 0.054
Habitat [Rock] 0.58 -0.02 – 1.17 0.059
Habitat [Shrubland] 0.59 0.02 – 1.17 0.043
Habitat [Wetland] 0.61 0.02 – 1.21 0.042
Habitat [Woodland] 0.82 0.23 – 1.40 0.006
Migration [2] -0.17 -0.33 – -0.01 0.039
Migration [3] -0.21 -0.35 – -0.08 0.002
LociVS 0.09 0.05 – 0.13 <0.001
log10 Mass 0.18 0.13 – 0.23 <0.001
log10 Range size 0.09 0.02 – 0.15 0.009
Max Latitude 0.14 0.07 – 0.20 <0.001
Min Latitude -0.16 -0.21 – -0.11 <0.001
Observations 350
R2 0.542
AICc 350.915

We can also look at the average of the top models, and explore the beta-coefficients to get some sense of the most influential effects in the model,

# # Average of 95% confidence set:
# avgmod.95p_uk <- model.avg(md_uk, cumsum(weight) <= .95, fit = TRUE)
# 
# tab_model(avgmod.95p_uk, show.aicc = TRUE)

Again, we can extract the residuals and plot the top and bottom 100 species in terms of their residuals. So the 100 most unexpectedly popular and 100 most unexpectedly 100 unpopular birds.

uk_model_data_col_sorted = uk_model_data_col %>%
  mutate(global_resid = resid(top_model_uk),
         sorted_article = fct_reorder(article, global_resid, .desc = TRUE),
  )

ggplot(rbind(head(uk_model_data_col_sorted, 100), tail(uk_model_data_col_sorted, 100)), aes(x = sorted_article, y = global_resid, fill = sign(global_resid))) + 
        geom_bar(stat = "identity") + 
        theme_minimal() + 
        coord_flip()

library(performance)
check_model(top_model_uk)