---
title: "Mojave desert hydric physiology"
subtitle: "Technical supplement"
author: "Alvaro Ramos and Trevor Ruiz"
author-title: "AUTHORS"
date: today
published-title: "UPDATED"
format:
html:
code-fold: false
code-tools: true
callout-appearance: simple
math: mathjax
toc: false
execute:
warning: false
message: false
echo: false
cache: true
---
```{r}
library(tidyverse)
library(magrittr)
library(ggridges)
library(patchwork)
library(ape)
library(geiger)
library(phytools)
library(nlme)
library(lme4)
library(brms)
library(emmeans)
library(pander)
library(broom)
library(sf)
library(bayesplot)
# read in data
data_dir <- 'data/processed/'
here::here(data_dir, 'mojave.RData') |> load()
# color palettes
cb_pal <- c(BG = "#FF00B2", DI = "#ECC901", CW = "#696969", LB = "#0072B2",
SB = "#56B4E9", TW = "#D52C00", ZT = "#009E73", MF = "#E67E00",
LN = "#5600B9")
mh_pal <- c(Floor = "orange", Road = "gray", Rock = "saddlebrown",
Sand = "wheat2", Veg = "forestgreen")
```
This is the analysis notebook for the working paper *Phylogeny vs. physiology: how a guild of desert lizards maintain water balance*. The project examines cutaneous evaporative water loss (CEWL) among a sample of lizards of 9 distinct species in the Mojave desert before and after acclimation to a controlled environment.
The primary research question is:
> To what extent do osmolality, ambient temperature, vapor pressure deficit, body mass, species differences, and shared phylogeny explain variation in cutaneous evaporative water loss at capture and after acclimation?
::: callout-note
Comments appear like this.
:::
::: {.callout-caution appearance='simple'}
To-do's and placeholders for missing content appear like this.
:::
# Descriptive summaries
Primary measurements taken both at capture and after acclimation (2 days after capture) are:
- `cewl`: mean cutaneous evaporative water loss (in $g/m^2h$) measured in technical replicates
- `osml`: mean blood plasma osmolality ($mmol/kg$), or the concentration of solutes in the blood (proxy for hydraton)
- `mass`: mass ($g$) at time of measurement
- `clo.temp`: cloacal temperature ($^\circ C$)
- `temp`: ambient temperature ($^\circ C$) at time of measurement
- `vpd`: ambient vapor pressure deficit ($kPa$) at time of measurement
<!-- - `rh`: ambient relative humidity (%) at time of measurement -->
<!-- - `hema.pct`: hematocrit (%) -->
Example rows are shown below.
```{r}
# example rows
raw |>
arrange(desc(id), date) |>
head() |>
select(-ends_with('.sd'), -hema.pct, -rh) |>
pander(caption = 'Table: example data rows')
```
For most species, about ten individuals were captured for the study, but sample sizes range from 2 to 24 individuals.
```{r}
# sample sizes
raw |>
distinct(id, species) |>
count(species) %>%
left_join(species, ., join_by(sp.abbrv == species)) |>
select(-activity, -diet) |>
pander(caption = 'Table: sample sizes by species')
```
## Capture sites
Individuals were captured from 9 locales in the Mojave desert.
```{r}
#| fig-width: 6
#| fig-height: 6
#| fig-cap: "Figure: Map of study sites (blue) and capture locations (inset)"
# read in shape files
shape_california_sf <- here::here("data/_raw/map_shapefiles/California_State_Boundary.geojson") |>
st_read(quiet = TRUE)
shape_capture_sf <- here::here("data/_raw/map_shapefiles/capture_sites.gpkg") |>
st_read(quiet = TRUE)
shape_rectangle_sf <- here::here("data/_raw/map_shapefiles/custom_moja_rectangle.shp") |>
st_read(quiet = TRUE)
st_crs(shape_california_sf) <- 4326
st_crs(shape_capture_sf) <- 4326
st_crs(shape_rectangle_sf) <- 4326
target_crs <- st_crs(shape_california_sf)
shape_capture_sf <- st_transform(shape_capture_sf, target_crs)
shape_rectangle_sf <- st_transform(shape_rectangle_sf, target_crs)
pts_sf <- st_as_sf(metadata, coords = c('lon', 'lat'), crs = 4326) |>
st_transform(target_crs)
# plot CA map
label_col <- 'id'
p1 <- ggplot() +
geom_sf(data = shape_california_sf, fill = "gray90", color = "gray60") +
geom_sf(data = shape_rectangle_sf, fill = NA, color = "darkblue", linewidth = 0.7) +
geom_sf(data = shape_capture_sf, color = "steelblue", size = 2, alpha = 0.8) +
theme_minimal(base_size = 12) +
coord_sf(expand = FALSE) +
labs(x = "Longitude", y = "Latitude") +
theme(panel.grid = element_blank())
# Get bounding box of the rectangle
bbox_rect <- st_bbox(shape_rectangle_sf)
xpad <- 0.02 * (bbox_rect["xmax"] - bbox_rect["xmin"])
ypad <- 0.02 * (bbox_rect["ymax"] - bbox_rect["ymin"])
xlim <- c(bbox_rect["xmin"] - xpad, bbox_rect["xmax"] + xpad)
ylim <- c(bbox_rect["ymin"] - ypad, bbox_rect["ymax"] + ypad)
label_col <- 'id'
p2 <- ggplot() +
geom_sf(data = shape_california_sf, fill = "gray95", color = "gray80") +
geom_sf(data = shape_rectangle_sf, fill = NA, color = "darkblue", linewidth = 0.8) +
geom_sf(data = shape_capture_sf, color = "steelblue", size = 15, alpha = 0.2) +
geom_sf(data = pts_sf, color = "darkred", size = 2, alpha = 0.7) +
theme_minimal(base_size = 12) +
coord_sf(xlim = xlim, ylim = ylim, expand = FALSE) +
labs(x = NULL,
y = NULL) +
ggrepel::geom_text_repel(data = shape_capture_sf,
aes(label = locale, geometry = geom),
stat = 'sf_coordinates',
size = 3) +
theme(panel.grid = element_blank())
p1 + inset_element(p2 + theme_void(),
left = 0.20, bottom = 0.40,
right = 0.98, top = 0.98,
align_to = 'panel')
ggsave(filename = here::here('img/fig1.png'), dpi = 400, width = 5, height = 5, units = 'in')
```
Capture locations were classified by microhabitat type. The capture site for individual TW10 was not classified.
```{r}
metadata |>
count(locale, mh) |>
spread(mh, n) |>
pander(caption = 'Table: sample sizes by locale and microhabitat type',
missing = '-')
```
The microhabitat distribution of species sampled for the study is shown below. Most species were sampled from 1 or 2 primary microhabitats.
```{r}
#| fig-width: 6
#| fig-height: 4
#| fig-cap: "Figure: Microhabitat allocation of individuals sampled by species"
# microhabitat allocation
metadata |>
select(species, sp.common, mh) |>
mutate(species = toupper(species),
mh = str_to_title(mh)) |>
filter(is.na(mh) == FALSE) |>
group_by(mh, species) |>
summarize(count = n()) |>
group_by(species) |>
mutate(prop = count / sum(count)) |>
ggplot(aes(x = species,
y = prop,
fill = mh)) +
geom_bar(stat = "identity",
color = "black",
linewidth = 0.2,
alpha = 0.7) +
scale_fill_manual(values = mh_pal, labels = c("Floor", "Road", "Rock", "Sand", "Vegetation")) +
geom_text(aes(label = str_glue("{100*round(prop, 2)}% ({count})")),
position = position_stack(vjust = 0.5),
color = "black",
size = 2) +
labs(x = "Species",
y = "Microhabitat allocation",
fill = NULL) +
theme_minimal() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggsave(filename = here::here('img/sfig1.png'), dpi = 400, width = 6, height = 5, units = 'in')
```
Microhabitats are not considered in subsequent analysis because, although there are apparent marginal differences in CEWL and osmolality at capture when grouped by microhabitat, species is a confounding factor and after accounting for species the apparent marginal differences vanish.
```{r}
#| fig-width: 6
#| fig-height: 6
#| fig-cap: "Figure: Distributions of CEWL and osmolality by microhabitat (top); Distributions of residual CEWL and osmolality by microhabitat after accounting for species (bottom)."
# capture data
capture <- raw |>
filter(day == "capture") |>
select(id, species, mass, osml, cewl, temp, vpd) |>
left_join(select(species, sp.abbrv, sp.common), join_by(species == sp.abbrv)) |>
rename(common.name = sp.common) |>
mutate(spp = toupper(species) |> fct_reorder(cewl, median),
name = str_to_title(common.name) |> fct_reorder(cewl, median))
# cewl by microhabitat
p1 <- capture |>
left_join(metadata, by = join_by(id)) |>
filter(is.na(mh) == FALSE) |>
mutate(mh = as.factor(str_to_title(mh))) |>
ggplot(aes(y = fct_reorder(mh, cewl, median), # order species based on median CEEWL
x = cewl,
fill = mh)) +
geom_density_ridges(scale = 1, alpha = 0.8, show.legend = F) +
geom_boxplot(width = 0.25, alpha = 0.8, show.legend = F) +
geom_vline(xintercept = median(capture$cewl),
linetype = 5,
alpha = 0.5) +
theme_bw() +
labs(y = "Microhabitat",
x = NULL,
title = "CEWL") +
geom_label(data = subset(capture |>
left_join(metadata, by = join_by(id)) |>
filter(is.na(mh) == FALSE) |>
mutate(mh = as.factor(str_to_sentence(mh))),
(cewl > 20 & mh == "Sand") |
(cewl > 15 & mh == "Floor")),
aes(label = id),
show.legend = FALSE,
vjust = -0.2,
size = 3,
alpha = 0.8) +
scale_fill_manual(values = mh_pal) +
theme(panel.grid.minor = element_blank())
# osml by microhabitat
p2 <- capture |>
left_join(metadata, by = join_by(id)) |>
filter(is.na(mh) == FALSE) |>
mutate(mh = as.factor(str_to_title(mh))) |>
ggplot(aes(y = fct_reorder(mh, cewl, median),
x = osml,
fill = mh)) +
geom_density_ridges(scale = 1, alpha = 0.8, show.legend = F) +
geom_boxplot(width = 0.25, alpha = 0.8, show.legend = F,
data = subset(capture |>
left_join(metadata, by = join_by(id)) |>
filter(is.na(mh) == FALSE) |>
mutate(mh = as.factor(str_to_sentence(mh))),
mh != "Road")) +
geom_point(data = subset(capture |>
left_join(metadata, by = join_by(id)) |>
filter(is.na(mh) == FALSE) |>
mutate(mh = as.factor(str_to_sentence(mh))),
mh == "Road"),
show.legend = FALSE) +
geom_vline(xintercept = median(capture$osml, na.rm = T),
linetype = 5,
alpha = 0.5) +
theme_bw() +
labs(y = NULL,
x = NULL,
title = "Osmolality") +
geom_label(data = subset(capture |>
left_join(metadata, by = join_by(id)) |>
filter(is.na(mh) == FALSE) |>
mutate(mh = as.factor(str_to_sentence(mh))),
(osml < 290 & mh == "Floor") |
(osml < 400 & mh == "Road")),
aes(label = id),
show.legend = FALSE,
vjust = -0.2,
size = 3,
alpha = 0.8) +
scale_fill_manual(values = mh_pal) +
theme(panel.grid.minor = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank())
# cewl residuals by mh -- add outlier annotations?
capture_cres <- capture %>%
modelr::add_residuals(., lm(cewl ~ species, data = .), var = 'cewl.res')
p3 <- capture_cres |>
left_join(metadata, by = join_by(id)) |>
filter(is.na(mh) == FALSE) |>
mutate(mh = as.factor(str_to_title(mh))) |>
ggplot(aes(y = mh,
x = cewl.res,
fill = mh)) +
geom_density_ridges(scale = 1, alpha = 0.8, show.legend = F) +
geom_boxplot(width = 0.25, alpha = 0.8, show.legend = F) +
geom_vline(xintercept = 0,
linetype = 5,
alpha = 0.5) +
theme_bw() +
labs(y = "Microhabitat",
x = NULL,
title = "Residual CEWL") +
scale_fill_manual(values = mh_pal) +
theme(panel.grid.minor = element_blank())
# osml residuals by mh -- add outlier annotations?
capture_ores <- capture %>%
drop_na(osml) %>%
modelr::add_residuals(., lm(osml ~ species, data = .), var = 'osml.res') %>%
modelr::add_residuals(., lm(cewl ~ species, data = .), var = 'cewl.res')
p4 <- capture_ores |>
left_join(metadata, by = join_by(id)) |>
filter(is.na(mh) == FALSE) |>
mutate(mh = as.factor(str_to_title(mh))) |>
ggplot(aes(y = mh,
x = osml.res,
fill = mh)) +
geom_density_ridges(scale = 1, alpha = 0.8, show.legend = F) +
geom_boxplot(width = 0.25, alpha = 0.8, show.legend = F,
data = subset(capture_ores |>
left_join(metadata, by = join_by(id)) |>
filter(is.na(mh) == FALSE) |>
mutate(mh = as.factor(str_to_sentence(mh))),
mh != "Road")) +
geom_point(data = subset(capture_ores |>
left_join(metadata, by = join_by(id)) |>
filter(is.na(mh) == FALSE) |>
mutate(mh = as.factor(str_to_sentence(mh))),
mh == "Road"),
show.legend = FALSE) +
geom_vline(xintercept = 0,
linetype = 5,
alpha = 0.5) +
theme_bw() +
labs(y = NULL,
x = NULL,
title = "Residual osmolality") +
scale_fill_manual(values = mh_pal) +
theme(panel.grid.minor = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank())
(p1 + p2)/(p3 + p4) + plot_layout(nrow = 2)
ggsave(filename = here::here('img/sfig2.png'), dpi = 400, width = 6, height = 6, units = 'in')
```
Indeed, microhabitat does not explain a significant amount of variation in CEWL after adjusting for species.
```{r}
lm(cewl ~ species*mh,
data = left_join(capture, metadata, by = join_by(id, species))) |>
anova() |>
pander(caption = "Table: ANOVA of CEWL by species and microhabitat.")
```
Moreover, there's no obvious strong relationship between microhabitat temperatures and CEWL or osmolality after capture.
```{r}
#| fig-width: 6
#| fig-height: 3
#| fig-cap: "Figure: Microhabitat ambient temperature at capture site and CEWL and osmolality values after capture"
# microhabitat temperature
left_join(capture, metadata, join_by(id, species)) |>
rename(CEWL = cewl,
Osmolality = osml) |>
pivot_longer(c(CEWL, Osmolality), names_to = 'yvar') |>
ggplot(aes(x = mh.temp, y = value)) +
facet_wrap(~yvar, scales = 'free_y') +
geom_point() +
theme_bw() +
labs(x = 'Microhabitat temperature', y = NULL, color = 'Species')
ggsave(filename = here::here('img/sfig3.png'), dpi = 400, width = 6, height = 3, units = 'in')
```
## Missing values
Several osmolality values are missing both at capture and after acclimation.
```{r}
# missing values
raw |>
pivot_longer(c(osml, cewl, mass, clo.temp, temp, vpd)) |>
group_by(day, name) |>
summarize(n.missing = sum(is.na(value))) |>
spread(day, n.missing) |>
pander(caption = 'Table: counts of missing values')
```
::: callout-note
The missing cloacal temperature value is from individual LB02. We'll ignore this value since it was agreed on 8/20/25 that we'd use ambient temperatures rather than cloacal temperatures in statistical models.
:::
The missing osmolality values are distributed across species as follows:
```{r}
raw |>
pivot_longer(c(osml, cewl, mass, clo.temp, temp, vpd)) |>
group_by(species, day, name) |>
summarize(n.missing = sum(is.na(value))) |>
spread(species, n.missing) |>
filter(name == 'osml') |>
rename(variable = name) |>
pander(caption = 'Table: counts of missing osmolality values by species')
```
Here is a list of the individuals with missing values:
- acclimation only: CW01, CW11, CW12, CW13, CW14, DI18, LB04
- capture only: CW02, SB01, SB02, SB08
- both: LB05, LB06, LB07, LB08, LB09, LB10, LB11, SB10, SB11, SB13, SB14, SB15, SB16, SB17, SB18
::: callout-note
Missing osmolality measurements may become available for Chuckwallas at a later date per discussion on 8/20/25.
:::
Sample sizes after removing missing values are rendered below.
```{r}
# tabulate missing osmolality
raw |>
select(id, day, species, osml) |>
spread(day, osml) |>
mutate(missing = case_when(
is.na(acclimation) | is.na(capture) ~ F, # either column missing
TRUE ~ T)) |>
group_by(na.osml = factor(missing,
levels = c(T, F),
labels = c('osml missing', 'osml recorded'))) |>
count(species) |>
spread(na.osml, n) |>
mutate(across(everything(), ~replace_na(.x, 0))) |>
mutate(n = `osml missing` + `osml recorded`) |>
select(species, n, starts_with('osml')) |>
janitor::adorn_totals() |>
pander(caption = 'Table: sample sizes by species after removing missing osmolality values')
```
## Which temperature to use?
It is worth noting that cloacal and ambient temperatures are related. Some exploration was carried out in consideration of whether both should factor in the analysis.
```{r}
#| fig-width: 6
#| fig-height: 6
#| fig-cap: "Figure: Cloacal and ambient temperatures at capture and after acclimation (top); changes in value after acclimation (bottom)"
# cloacal and ambient temperature correlations
p1 <- raw |>
drop_na(clo.temp) |>
group_by(day) |>
summarize(r = cor(temp, clo.temp)) %>%
left_join(raw, ., join_by(day)) |>
mutate(day = paste(str_to_title(day), ' (r = ', round(r, 3), ')', sep = '')) |>
ggplot(aes(x = temp, y = clo.temp)) +
facet_wrap(~fct_rev(day)) +
geom_point() +
geom_abline(slope = 1, intercept = 0, color = 'grey', alpha = 0.6) +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
labs(x = 'Ambient temperature', y = 'Cloacal temperature')
# pairwise differences
p2 <- raw |>
mutate(temp.diff = temp - clo.temp) |>
ggplot(aes(x = temp.diff)) +
facet_wrap(~str_to_title(day) |> fct_rev(), scales = 'free_x') +
geom_vline(xintercept = 0, linetype = 2, color = "gray") +
geom_histogram(bins = 15, color = "black", fill = "white") +
theme_bw() +
labs(x = "Pairwise differences (Ambient - Internal)", y = 'Count') +
theme(panel.grid.minor = element_blank())
# change after acclimation
p3 <- raw |>
select(id, day, clo.temp, temp) |>
drop_na(clo.temp) |>
pivot_wider(id_cols = id, names_from = day, values_from = c(clo.temp, temp)) |>
transmute(`Ambient temperature` = temp_acclimation - temp_capture,
`Cloacal temperature` = clo.temp_acclimation - clo.temp_capture) |>
pivot_longer(everything()) |>
ggplot(aes(x = value)) +
facet_wrap(~name, nrow = 1, scales = 'free_x') +
geom_vline(xintercept = 0, linetype = 2, color = "gray") +
geom_histogram(bins = 15, color = "black", fill = "white") +
theme_bw() +
labs(x = "Change in value (acclimation - capture)", y = 'Count') +
theme(panel.grid.minor = element_blank())
# need to fix layout
p1 + p3 + plot_layout(ncol = 1)
ggsave(filename = here::here('img/sfig4.png'), dpi = 400, width = 6, height = 6, units = 'in')
```
Contrary to expectation: (a) the two are only moderately correlated and (b) the correlation is weaker after acclimation, as shown in the top panel. This also makes visible that the ordering of temperature measurements reverses after acclimation -- on average, cloacal temperatures are higher than ambient temperatures at capture but lower than ambient temperatures after acclimation. The bottom panels show that while ambient temperatures did not appear to change systematically after acclimation, cloacal temperatures decreased on average.
::: callout-note
Per discussion on 8/20/25, ambient temperature is of greater interest as a possible predictor of cutaneous evaporative water loss. We may wish to revisit this at a later date.
:::
## Distributions of primary measurements
```{r}
# capture data
capture <- raw |>
filter(day == "capture") |>
select(id, species, mass, osml, cewl, temp, vpd) |>
left_join(select(species, sp.abbrv, sp.common), join_by(species == sp.abbrv)) |>
rename(common.name = sp.common) |>
mutate(spp = toupper(species) |> fct_reorder(cewl, median),
name = str_to_title(common.name) |> fct_reorder(cewl, median))
# add species annotations to change data
change_ann <- change |>
left_join(select(species, sp.abbrv, sp.common), join_by(species == sp.abbrv)) |>
rename(common.name = sp.common) |>
mutate(spp = toupper(species) |> fct_reorder(cewl, ~median(.x, na.rm = T)),
name = str_to_title(common.name) |> fct_reorder(cewl, ~median(.x, na.rm = T)))
```
The marginal distributions of individual measurements by species at capture and changes after acclimation are shown below.
```{r}
#| fig-width: 8
#| fig-height: 5
#| fig-cap: "Figure: Distributions of CEWL, osmolality, and mass by species and ambient measurement conditions at capture (top); distributions of changes after acclimation (bottom)."
# # cewl
# p1 <- capture |>
# ggplot(aes(y = name,
# x = cewl,
# fill = spp)) +
# geom_density_ridges(scale = 1, alpha = 0.8,
# show.legend = F) +
# geom_boxplot(width = 0.25, alpha = 0.8,
# show.legend = FALSE,
# data = capture |> filter(spp != "LN")) +
# geom_point(data = capture |> filter(spp == "LN"),
# show.legend = FALSE) +
# geom_vline(xintercept = median(capture$cewl),
# linetype = 5,
# alpha = 0.5) +
# scale_fill_manual(values = cb_pal) +
# theme_bw() +
# labs(y = NULL,
# x = NULL,
# title = "CEWL") +
# geom_label(data = subset(capture,
# (cewl > 20 & spp == "SB") |
# (cewl > 13 & spp == "MF") |
# (cewl < 5 & spp == "MF")),
# aes(label = id),
# show.legend = FALSE,
# vjust = -0.2,
# size = 3,
# alpha = 0.8,
# color = "white") +
# theme(panel.grid.minor = element_blank())
#
# # osmolality
# p2 <- capture |>
# ggplot(aes(y = spp,
# x = osml,
# fill = spp)) +
# geom_density_ridges(scale = 1, alpha = 0.8,
# show.legend = F) +
# geom_boxplot(width = 0.25, alpha = 0.8,
# show.legend = FALSE,
# data = capture |> filter(spp != "LN")) +
# geom_point(data = capture |> filter(spp == "LN"),
# show.legend = FALSE) +
# geom_vline(xintercept = median(capture$osml, na.rm = T),
# linetype = 5,
# alpha = 0.5) +
# scale_fill_manual(values = cb_pal) +
# theme_bw() +
# labs(y = NULL,
# x = NULL,
# title = "Osmolality") +
# geom_label(data = subset(capture,
# (osml < 350 & spp == "TW") |
# (osml > 375 & spp == "ZT")),
# aes(label = id),
# show.legend = FALSE,
# vjust = -0.2,
# size = 3,
# alpha = 0.8,
# color = "white") +
# theme(axis.text.y = element_blank(),
# axis.ticks.y = element_blank(),
# panel.grid.minor = element_blank())
#
# # mass (add outlier labels?)
# p3 <- capture |>
# ggplot(aes(y = spp,
# x = mass,
# fill = spp)) +
# geom_density_ridges(scale = 1, alpha = 0.8,
# show.legend = F) +
# geom_boxplot(width = 0.25, alpha = 0.8,
# show.legend = FALSE,
# data = capture |> filter(spp != "LN")) +
# geom_point(data = capture |> filter(spp == "LN"),
# show.legend = FALSE) +
# geom_vline(xintercept = median(capture$mass, na.rm = T),
# linetype = 5,
# alpha = 0.5) +
# scale_fill_manual(values = cb_pal) +
# theme_bw() +
# labs(y = NULL,
# x = NULL,
# title = 'Mass (log scale)') +
# scale_x_log10() +
# theme(axis.text.y = element_blank(),
# axis.ticks.y = element_blank(),
# panel.grid.minor = element_blank())
# individual attributes at capture
p1 <- capture |>
mutate(log.mass = log(mass)) |>
rename(CEWL = cewl,
Osmolality = osml,
`log(Mass)` = log.mass) |>
pivot_longer(c(CEWL, Osmolality, `log(Mass)`), names_to = 'variable') |>
ggplot(aes(y = name,
x = value,
color = spp)) +
facet_grid(~variable, scales = 'free_x') +
geom_boxplot() +
scale_color_manual(values = cb_pal) +
labs(x = 'Value at capture',
y = NULL,
color = NULL) +
theme_bw() +
guides(color = guide_none()) +
theme(panel.grid.minor = element_blank())
# ambient measurement conditions at capture
p2 <- capture |>
select(temp, vpd) |>
rename(`Temperature (C)` = temp,
`VPD (kPa)` = vpd) |>
pivot_longer(everything()) |>
ggplot(aes(x = value, y = ..density..)) +
facet_wrap(~name, scales = 'free', nrow = 2) +
geom_histogram(bins = 12) +
theme_bw() +
labs(x = NULL, y = NULL) +
theme(panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank())
# change (after - before) in individual attributes
p3 <- change_ann |>
rename(`log(CEWL)` = log.cewl,
Osmolality = osml,
`log(Mass)` = log.mass) |>
pivot_longer(c(`log(CEWL)`, Osmolality, `log(Mass)`), names_to = 'variable') |>
ggplot(aes(y = name,
x = value,
color = spp)) +
facet_grid(~variable, scales = 'free_x') +
geom_vline(xintercept = 0,
linetype = 1,
alpha = 0.4) +
geom_boxplot() +
scale_color_manual(values = cb_pal) +
labs(x = 'Change in value (acclimation - capture)',
y = NULL,
color = NULL) +
theme_bw() +
guides(color = guide_none()) +
theme(panel.grid.minor = element_blank())
# change (after - before) in ambient measurement conditions
p4 <- change_ann |>
rename(Temperature = temp, VPD = vpd) |>
pivot_longer(c(Temperature, VPD), names_to = 'variable') |>
ggplot(aes(x = value)) +
facet_wrap(~variable, scales = 'free_x', nrow = 2) +
geom_histogram(bins = 10) +
theme_bw() +
theme(panel.grid.minor = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(x = NULL, y = NULL)
# panel layout
p_dist_cap <- (p1 | p2) + plot_layout(widths = c(3, 1))
p_dist_acc <- (p3 | p4) + plot_layout(widths = c(3, 1))
p_dist_cap / p_dist_acc
ggsave(here::here("img/fig2.png"), width = 8, height = 5, units = 'in', dpi = 400)
```
At a glance, only Chuckwallas showed a dramatic change (increase, in this case) in CEWL due to acclimation, yet they experienced little change in mass or osmolality (though the latter changes exhibit a relatively large variance). Owing to the difference in scale of change among Chuckwallas compared with other species, change in CEWL is shown above (and later modeled) on the log scale.
Other observations regarding marginal distributions of primary measruements:
- ambient conditions at times of measurement are typically 24-26 degrees C and 2.0-2.4 kPa
- a few species (LN, TW, LT, ZT) exhibited changes in osmolality, but they were of relatively small magnitude (20-40 mmol/kg)
- ambient temperatures fluctuated by 1-2 degrees between measurements taken after capture and measurements taken after acclimation
- vapor pressure deficits fluctuated by 0.2-0.3 kPa between measurements taken after capture and measurements taken after acclimation
- changes in mass are negligible
# Statistical modeling
The framework for analysis is:
$$
\text{CEWL} \longleftarrow \text{osml} + \text{mass} + \text{temp} + \text{vpd}
$$
Marginal relationships between the primary covariates and CEWL are shown below. Mass is log-transformed owing to large species differences in typical body size.
```{r}
#| fig-width: 8
#| fig-height: 6
#| fig-cap: "Figure: Marginal relationships between change in CEWL and changes in other primary measurements; logratio changes in CEWL shown at top; and arithmetic difference in CEWL shown at bottom"
# cewl at capture
p1 <- capture |>
mutate(mass = log(mass)) |>
pivot_longer(cols = c("osml", "mass", "temp", "vpd"),
names_to = "var",
values_to = "value") |>
mutate(var = fct_recode(as.factor(var),
"Osmolality" = "osml",
"log(Mass)" = "mass",
"Temperature" = "temp",
"VPD" = "vpd")) |>
ggplot(aes(x = value,
y = cewl)) +
geom_point() +
facet_grid(~ var, scales = "free_x") +
theme_bw() +
labs(x = "Value at capture", y = 'CEWL at capture', color = NULL)
# change in cewl
p2 <- change_ann |>
pivot_longer(cols = c("osml", "mass", "temp", "vpd"),
names_to = "var",
values_to = "value") |>
mutate(var = fct_recode(as.factor(var),
"Osmolality" = "osml",
"log(Mass)" = "mass",
"Temperature" = "temp",
"VPD" = "vpd")) |>
ggplot(aes(x = value,
y = log.cewl)) +
geom_point() +
facet_grid(~ var, scales = "free_x") +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
labs(x = "Change in value (acclimation - capture)",
y = "Change in log(CEWL)")
p1 / p2
ggsave(filename = here::here('img/sfig5.png'), dpi = 400, width = 6, height = 4, units = 'in')
```
::: callout-note
Considering that changes in mass are negligible, do we want to consider instead compare change in CEWL directly with mass at capture?
:::
Individuals with missing osmolality measurements (discussed above) are removed from analysis, and covariates are centered for ease of interpretation when fitting models.
```{r}
# analysis-ready capture data
cdata <- capture |>
drop_na(osml) |>
mutate(log.mass = log(mass)) |>
select(id, species, cewl, log.mass, osml, temp, vpd) |>
mutate(across(c(log.mass, osml, temp, vpd), ~scale(.x, scale = F)))
# analysis-ready change data
ddata <- change |>
drop_na(osml) |>
select(id, species, log.cewl, log.mass, osml, temp, vpd) |>
mutate(across(c(log.mass, osml, temp, vpd), ~scale(.x, scale = F)))
```
## Bayesian phylogenetic mixed model
Denote the vector of CEWL measurements (or change in CEWL measurements) by $Y$ and the covariate measurements (or change in covariate measurements) by:
$$
X = (1 \; \log(\text{mass}) \; \text{osml} \; \text{temp} \; \text{vpd})
$$
To enter species into the model, encode the species membership of each individual by the $n \times S$ matrix:
$$
Z = \text{diag}(\mathbf{1}_{n_1}, \mathbf{1}_{n_2}, \dots, \mathbf{1}_{n_S})
$$
Here $\mathbf{1}_{n_s}$ denotes a vector of ones of length $n_s$, where $n_s$ denotes the sample size for species $s = 1, \dots, S$ (note $\sum_s n_s = n$).
Assuming the relationship between CEWL and each covariate does not depend on species yields the linear mixed model:
$$
Y = X\beta + Z \gamma + \epsilon
$$
Here the random vector $\gamma$ captures species differences in mean CEWL. Species-specific intercepts are $\beta_0 + \gamma_s$ for species $s = 1, \dots, S$. The random vector $\epsilon \sim N(0, \sigma^2 I_n)$ represents individual variation, and it is assumed that $\epsilon \perp \gamma$.
::: callout-note
Preliminary analysis using a model with uncorrelated random effects was used to test for species/covariate interactions; no such interactions were significant. These results are not shown here, but codes to reproduce the inference are included in the source code for this notebook.
```{r, include = F}
## ---- at capture ----
# fit model 1
fit1_cap <- lmer(cewl ~ log.mass + osml + temp + vpd + (1 | species),
data = cdata)
# fit model 2
fit2_cap <- lmer(cewl ~ log.mass + osml + temp + vpd +
(1 + log.mass + osml + vpd || species),
data = cdata,
control = lmerControl(optimizer = 'bobyqa',
optCtrl = list(maxfun = 1e6)))
# test for interactions
anova(fit1_cap, fit2_cap, type = 'LRT')
## ---- after acclimation ----
# fit model 1
fit1_acc <- lmer(log.cewl ~ log.mass + osml + temp + vpd + (1 | species),
data = ddata)
# fit model 2 -- note singular fit with log.mass
fit2_acc <- lmer(log.cewl ~ log.mass + osml + temp + vpd +
(1 + osml + temp + vpd || species),
data = ddata,
control = lmerControl(optimizer = 'bobyqa',
optCtrl = list(maxfun = 1e6)))
# test for interactions
anova(fit1_acc, fit2_acc, type = 'LRT')
```
:::
If $\gamma \sim N(0, \sigma^2_\gamma I_S)$ species random effects are uncorrelated, but phylogenetic relationships may induce trait dependence between species in proportion to the length of shared evolutionary history. For the species in this study, an ultrametric (*i.e.*, time-calibrated) tree was constructed from the phylogeny in Pyron *et al.* (2013) and is shown below.
```{r}
# use abbreviations for tip labels
tree_relabeled <- tree
tree_relabeled$tip.label <- pull(species, sp.abbrv, name = sp.sci)[tree$tip.label]
# Plot the tree
par(mar = c(4, 4, 5, 4), xpd = NA)
plot(tree_relabeled, show.tip.label = TRUE, cex = 1)
# Add branch length annotations
edgelabels(
text = round(tree$edge.length, 3), # label text (rounded lengths)
frame = "none", # no box around labels
adj = c(0.5, -0.5), # adjust label position
cex = 0.8, # text size
col = "blue" # text color
)
```
Under a Brownian motion model of evolutionary drift, this structure induces the following expected correlations among an arbitrary continuous trait:
```{r, results='asis'}
# trait covariance under bm
v_mx <- tree_relabeled |> drop.tip('bg') |> vcv(model = 'Brownian', corr = T)
# Convert to LaTeX bmatrix format (from GPT)
latex_matrix_named <- function(M, digits = 3, fmt = "fg", na_label = "") {
stopifnot(is.matrix(M))
M <- round(M, digits)
rn <- rownames(M); cn <- colnames(M)
if (is.null(rn)) rn <- as.character(seq_len(nrow(M)))
if (is.null(cn)) cn <- as.character(seq_len(ncol(M)))
# Escape LaTeX special chars in labels
esc <- function(x) {
x <- gsub("\\\\", "\\\\textbackslash{}", x)
x <- gsub("([#%&_{}$])", "\\\\\\1", x, perl = TRUE)
x <- gsub("\\^", "\\\\textasciicircum{}", x)
x <- gsub("~", "\\\\textasciitilde{}", x)
x
}
rn <- esc(rn); cn <- esc(cn)
# Build header and body
header <- paste0("& ", paste(cn, collapse = " & "), " \\\\")
num2tex <- function(v) paste(formatC(v, format = fmt, digits = digits), collapse = " & ")
body <- paste(
mapply(function(lbl, row) paste0(lbl, " & ", num2tex(row)), rn, split(M, row(M))),
collapse = " \\\\ \n"
)
# Array spec: one left label col + n centered numeric cols
spec <- paste0("l|", paste(rep("c", ncol(M)), collapse = ""))
out <- paste0(
"$$\n",
"V = \\quad \\begin{array}{", spec, "}\n",
header, "\n\\hline\n",
body, "\n",
"\\end{array}\n",
"$$"
)
knitr::asis_output(out)
}
cat(latex_matrix_named(v_mx, 3))
```
```{r, results = 'hide'}
# combine above for paper figure
labels <- species |>
select(sp.sci, sp.abbrv) |>
filter(sp.abbrv %in% tree_relabeled$tip.label) |>
mutate(sp.abbrv2 = paste('(', sp.abbrv, ')', sep = '')) |>
unite('label', sp.sci, sp.abbrv2, sep = ' ')
tree_fig <- tree_relabeled
tree_fig$tip.label <- labels$label[match(tree_fig$tip.label, labels$sp.abbrv)]
png(filename = here::here('img/fig3.png'), res = 400, width = 8, height = 3, units = 'in')
# Split screen: 1 row, 2 columns (left matrix, right plot)
layout(matrix(c(2, 1), nrow = 1), widths = c(2, 1.5))
# ---- Left: draw matrix as text ----
par(mar = c(1, 0.5, 1, 0.5))
plot.new()
# Define the plotting area for the matrix
M <- v_mx[8:1, 8:1] |> round(3)
n_row <- nrow(M)
n_col <- ncol(M)
# Create coordinate system for text placement
plot.window(xlim = c(0, n_col + 1), ylim = c(0, n_row + 1))
# Draw column and row labels
text(1:n_col, rep(n_row + 0.5, n_col), colnames(M), font = 2, cex = 0.9)
text(rep(0, n_row), rev(1:n_row), rownames(M), font = 2, cex = 0.9)
# Print each matrix element
for (i in 1:n_row) {
for (j in 1:n_col) {
text(j, n_row - i + 1, sprintf("%0.2f", M[i, j]), cex = 0.8)
}
}
# Plot the tree
par(mar = c(2, 0.5, 2, 0.5), xpd = NA)
# Get the tree height (max distance from root to a tip)
tree_width <- max(nodeHeights(tree_fig))
# Plot using x.lim to control right-side space
plot(tree_fig,
show.tip.label = TRUE,
cex = 1,
direction = "rightwards",
x.lim = c(0, tree_width * 1.8)) # 1.05 = only 5% extra space for labels
# Add branch length annotations
edgelabels(
text = round(tree_fig$edge.length, 3), # label text (rounded lengths)
frame = "none", # no box around labels
adj = c(0.5, -0.5), # adjust label position
cex = 0.6, # text size
col = "blue" # text color
)
dev.off()
```
To account for phylogeny, the covariance structure of the random effects is specified with the phylogenetic correlation matrix $V$ shown above so that $\gamma \sim N(0, \sigma^2_\gamma V_\lambda)$ where $V_\lambda = (1 - \lambda)I_S + \lambda V$. This gives the marginal covariance structure:
$$
\Sigma = \text{var}Y = \sigma^2_\gamma Z V_\lambda Z^T + \sigma^2 I_n
$$
The model is estimated in a Bayesian framework with normal priors for regression coefficients and exponential priors for variance parameters. To estimate $\sigma_\gamma$ and $\lambda$, the variance structure was reparametrized:
$$
\Sigma = \text{var}Y = \sigma^2_S ZZ^T + \sigma^2_P Z V Z^T + \sigma^2 I_n
$$
Tighter priors were needed to ensure that $\sigma^2_S, \sigma^2_P$ were well-identified by the data; a weak prior was used for $\sigma^2$.
```{r, results = 'hide'}
## ---- fit to capture data ----
# specify priors for bpmm
priors <- c(
# fixed terms
prior(normal(0, 10), class = "Intercept"),
prior(normal(0, 10), class = "b"),
# variance terms
prior(exponential(5), class = "sd", group = "spp"),
prior(exponential(5), class = "sd", group = "phy"),
prior(exponential(1), class = "sigma")
)
# fit model
fit_phy_cap <- cdata |>
mutate(spp = factor(species),
phy = factor(species)) %>%
brm(cewl ~ log.mass + osml + temp + vpd +
(1 | spp) +
(1 | gr(phy, cov = V)),
data = .,
data2 = list(V = v_mx),
prior = priors,
chains = 6,
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.99,
max_treedepth = 15),
seed = 102625,
family = gaussian())
## ---- fit to change data ----
# specify priors for model
priors <- c(
# fixed terms
prior(normal(0, 10), class = "Intercept"),
prior(normal(0, 10), class = "b"),
# variance terms
prior(exponential(5), class = "sd", group = "spp"),
prior(exponential(5), class = "sd", group = "phy"),
prior(exponential(1), class = "sigma")
)
# fit model with phylogenetic covariance
fit_phy_acc <- ddata |>
mutate(spp = factor(species),
phy = factor(species)) %>%
brm(log.cewl ~ log.mass + osml + temp + vpd +
(1 | spp) +
(1 | gr(phy, cov = V)),
data = .,
data2 = list(V = v_mx),
prior = priors,
chains = 6,
iter = 4000,
warmup = 2000,
control = list(adapt_delta = 0.9995,
max_treedepth = 15),
seed = 102625,
family = gaussian())
```
## Model fit to capture data
```{r}
# table for variance parameters
draws <- as_draws_df(fit_phy_cap)
sd_spp <- draws$`sd_spp__Intercept`
sd_phy <- draws$`sd_phy__Intercept`
lambda <- (sd_phy^2) / (sd_spp^2 + sd_phy^2)
sigma_g <- sqrt(sd_spp^2 + sd_phy^2)
sigma <- draws$sigma
rand_tbl <- tibble(
term = c("lambda", "sigma_g", "sigma"),
estimate = c(mean(lambda), mean(sigma_g), mean(sigma)),
lwr = c(quantile(lambda, 0.025), quantile(sigma_g, 0.025), quantile(sigma, 0.025)),
upr = c(quantile(lambda, 0.975), quantile(sigma_g, 0.975), quantile(sigma, 0.975))
)
# table for regression coefficients
fixed_tbl <- fixef(fit_phy_cap, probs = c(0.025, 0.975)) %>%
as.data.frame() %>%
rownames_to_column("term") %>%
rename(estimate = Estimate,
lwr = Q2.5,
upr = Q97.5) |>
select(-Est.Error)
# table of parameter estimates
coef_tbl_cap <- bind_rows(fixed_tbl, rand_tbl)
coef_tbl_cap |>
pander(caption = 'Table: parameter estimates and 95% credible intervals')
```
```{r}
#| fig-width: 8
#| fig-height: 4
#| fig-cap: "Figure: Posterior densities (left); observed vs. predicted CEWL values (right)."
## posterior densities
# ---- Extract and transform ----
draws <- as_draws_df(fit_phy_cap) %>%
mutate(
sigma_gamma = sqrt(sd_phy__Intercept^2 + sd_spp__Intercept^2),
lambda = sd_phy__Intercept^2 / (sd_phy__Intercept^2 + sd_spp__Intercept^2)
)
# ---- Select parameters ----
keep <- c(
colnames(draws)[1:5],
c("sigma", "sigma_gamma", "lambda")
)
long <- draws %>%
select(all_of(keep)) %>%
pivot_longer(everything(), names_to = "parameter", values_to = "value")
# ---- Compute intervals ----
alpha_lo <- 0.05; alpha_hi <- 0.95
ints <- long %>%
group_by(parameter) %>%
summarise(
lo = quantile(value, alpha_lo),
hi = quantile(value, alpha_hi),
med = median(value),
.groups = "drop"
)
# ---- Create clean labels ----
labels <- c(
"b_Intercept" = "Intercept~(beta[0])",
"b_log.mass" = "log(Mass)~(beta[1])",
"b_osml" = "Osmolality~(beta[2])",
"b_temp" = "Temperature~(beta[3])",
"b_vpd" = "VPD~(beta[4])",
"sigma" = "Residual~variance~(sigma)",
"sigma_gamma" = "Species~variance~(sigma[gamma])",
"lambda" = "Phylogenetic~signal~(lambda)"
)
facet_labeller <- labeller(parameter = as_labeller(labels, default = label_parsed))
# ---- Plot ----
p_dens_cap <- ggplot(long, aes(x = value)) +
geom_density(fill = "grey85", color = "grey30", linewidth = 0.4) +
geom_vline(data = ints, aes(xintercept = lo), linetype = 2, color = "firebrick") +
geom_vline(data = ints, aes(xintercept = hi), linetype = 2, color = "firebrick") +
geom_vline(data = ints, aes(xintercept = med), linetype = 2, color = "black") +
facet_wrap(~ parameter, scales = "free", labeller = facet_labeller, nrow = 4) +
labs(
x = "Parameter value",
y = "Posterior density"
) +
theme_minimal(base_size = 11) +
theme(
panel.background = element_rect(fill = "white", colour = NA),
strip.background = element_rect(fill = "white", colour = "white"),
panel.grid = element_blank(),
axis.text.y = element_blank(),
)
## posterior predictive check
p_pp_cap <- pp_check(fit_phy_cap, ndraws = 100, type = 'scatter_avg') +
guides(color = guide_none()) +
labs(y = 'Observed CEWL values', x = 'Predicted CEWL values') +
theme_minimal(base_family = "Helvetica", base_size = 11) +
theme(
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = "white", color = NA), # white background
panel.background = element_rect(fill = "white", color = NA) # white plotting area
)
p_dens_cap + p_pp_cap
```
Findings at capture:
- There are significant positive associations between CEWL and each of (log) mass and ambient temperature
- There is a significant negative association between CEWL and vapor pressure deficit
- There is a significant species effect
- There is roughly as much estimated interspcies as intraspecies variation in CEWL
- Phylogenetic signal is negligible ($\lambda \in (0, 0.12)$)
Owing to the random intercepts, each species has a distinct estimated baseline CEWL when covariates are fixed at their means. The table below shows the species-specific estimates.
```{r}
# coefficient estimates
coef(fit_phy_cap)$spp[, 1, ] |>
pander(caption = 'Table: species-specific relationships after adjusting for phylogeny')
```
The plot below shows population-level marginal trends in each variable according to the model.
```{r}
#| fig-width: 6
#| fig-height: 2.5
#| fig-cap: "Figure: Posterior means with 95% credible intervals in each variable while holding other terms fixed at population averages."
## posterior densities
# prediction grid
library(modelr)
pred_grid <- bdiag(seq_range(cdata$log.mass, n = 100),
seq_range(cdata$osml, n = 100),
seq_range(cdata$temp, n = 100),
seq_range(cdata$vpd, n = 100)) |>
as.matrix() |>
as_tibble() |>
rename(log.mass = V1,
osml = V2,
temp = V3,
vpd = V4)
# predictions
preds <- fitted(fit_phy_cap, newdata = pred_grid, re_formula = NA, summary = T)
# variable means
means <- capture |>
mutate(log.mass = log(mass)) |>
drop_na(osml) |>
summarize(across(c(log.mass, osml, temp, vpd), mean)) |>
gather(name, mean)
# combine for plotting
pred_df <- bind_cols(pred_grid, preds) |>
mutate(across(log.mass:vpd, ~na_if(.x, 0))) |>
pivot_longer(log.mass:vpd) |>
drop_na(value) |>
rename(cewl = Estimate,
lwr = Q2.5,
upr = Q97.5,
err = Est.Error) |>
left_join(means, join_by(name)) |>
mutate(value = value + mean) |>
select(name, value, cewl, err, lwr, upr) |>
mutate(name = fct_recode(as.factor(name),
"Osmolality" = "osml",
"log(Mass)" = "log.mass",
"Temperature" = "temp",
"VPD" = "vpd")) |>
rename(var = name) |>
arrange(var, value)
# plot
p_scatter_cap <- capture |>
drop_na(osml) |>
mutate(mass = log(mass)) |>
pivot_longer(cols = c("osml", "mass", "temp", "vpd"),
names_to = "var",
values_to = "value") |>
mutate(var = fct_recode(as.factor(var),
"Osmolality" = "osml",
"log(Mass)" = "mass",
"Temperature" = "temp",
"VPD" = "vpd")) |>
ggplot(aes(x = value,
y = cewl)) +
geom_point() +
geom_path(data = pred_df, color = 'blue', linewidth = 1) +
geom_ribbon(data = pred_df, aes(ymin = lwr, ymax = upr),
fill = 'grey', alpha = 0.4) +
facet_grid(~ var, scales = "free_x") +
theme_bw() +
labs(x = "Value at capture", y = 'CEWL at capture', color = NULL)
p_scatter_cap
```
Diagnostic MCMC trace and density plots show good chain mixing and consistent posteriors, indicating successful convergence and reliable parameter estimates.
```{r}
#| fig-width: 8
#| fig-height: 6
#| fig-cap: "Figure: Trace (left) and posterior density (right) plots for each model parameter."
# label customization
labels <- c(
"b_Intercept" = "Intercept~(beta[0])",
"b_log.mass" = "log(Mass)~(beta[1])",
"b_osml" = "Osmolality~(beta[2])",
"b_temp" = "Temperature~(beta[3])",
"b_vpd" = "VPD~(beta[4])",
"sigma" = "Residual~variance~(sigma)",
"sd_spp__Intercept" = "Reparametrized~species~(sigma[S])",
"sd_phy__Intercept" = "Reparametrized~phylo~(sigma[P])"
)
facet_labeller <- labeller(parameter = as_labeller(labels, default = label_parsed))
bayesplot::color_scheme_set("blue") # same scheme mcmc_plot uses by default
cols <- bayesplot::color_scheme_get() |> as_vector()
names(cols) <- 6:1
# mcmc trace plots
p_trace <- mcmc_plot(fit_phy_cap,
type = "trace",
facet_args = list(nrow = 4, ncol = 2)) +
guides(color = guide_none()) +
scale_colour_manual(values = cols, name = "Chain") +
facet_wrap(~ parameter, nrow = 4, ncol = 2, labeller = facet_labeller, scales = 'free_y') +
theme_minimal(base_family = "Helvetica", base_size = 11) +
theme(
panel.grid.minor = element_blank(),
axis.text.x = element_blank(),
plot.background = element_rect(fill = "white", color = NA), # white background
panel.background = element_rect(fill = "white", color = NA) # white plotting area
) +
labs(x = 'Iteration', y = 'Draw')
# posterior densities
plot_pars <- names(labels)
df <- as_draws_df(fit_phy_cap) %>%
select(.chain, all_of(plot_pars)) %>%
pivot_longer(-.chain,
names_to = "parameter",
values_to = "value") %>%
mutate(
parameter = factor(parameter, levels = names(labels), labels = labels),
.chain = factor(.chain)
)
p_dens <- ggplot(df, aes(x = value, colour = .chain)) +
geom_line(stat = 'density', linewidth = 0.6, alpha = 0.7) +
scale_colour_manual(values = cols, name = "Chain") +
facet_wrap(
~ parameter,
labeller = label_parsed,
nrow = 4, ncol = 2, scales = "free"
) +
theme_minimal(base_family = "Helvetica", base_size = 11) +
theme(
panel.grid = element_blank(),
axis.text.y = element_blank(),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA)
) +
labs(x = "Parameter value", y = "Density")
# panel layout
(p_trace | p_dens)
# export
ggsave(here::here("img/sfig6.png"), width = 10, height = 6, dpi = 400, units = 'in')
```
Pairs plots of the posterior draws for the variance parameters indicate that the phylogenetic signal is reasonably well-identified by the data, though there may still be a slight ridge.
```{r}
#| fig-width: 6
#| fig-height: 6
#| fig-cap: "Figure: Pairs plots for variance parameters."
# pairs plots for variance components
bayesplot::bayesplot_theme_set(theme_minimal(base_size = 11))
bayesplot::bayesplot_theme_update(
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA),
panel.grid.minor = element_blank()
)
p <- pairs(fit_phy_cap, variable = c("sd_spp__Intercept",
"sd_phy__Intercept",
"sigma"))
p
ggsave(here::here("img/sfig7.png"), p, width = 5, height = 5, dpi = 400, units = 'in')
```
## Model fit to change after acclimation
::: callout-caution
Under development
:::
```{r}
# table for variance parameters
draws <- as_draws_df(fit_phy_acc)
sd_spp <- draws$`sd_spp__Intercept`
sd_phy <- draws$`sd_phy__Intercept`
lambda <- (sd_phy^2) / (sd_spp^2 + sd_phy^2)
sigma_g <- sqrt(sd_spp^2 + sd_phy^2)
sigma <- draws$sigma
rand_tbl <- tibble(
term = c("lambda", "sigma_g", "sigma"),
estimate = c(mean(lambda), mean(sigma_g), mean(sigma)),
lwr = c(quantile(lambda, 0.025), quantile(sigma_g, 0.025), quantile(sigma, 0.025)),
upr = c(quantile(lambda, 0.975), quantile(sigma_g, 0.975), quantile(sigma, 0.975))
)
# table for regression coefficients
fixed_tbl <- fixef(fit_phy_acc, probs = c(0.025, 0.975)) %>%
as.data.frame() %>%
rownames_to_column("term") %>%
rename(estimate = Estimate,
lwr = Q2.5,
upr = Q97.5) |>
select(-Est.Error)
# table of parameter estimates
coef_tbl_acc <- bind_rows(fixed_tbl, rand_tbl)
coef_tbl_acc |>
pander(caption = 'Table: parameter estimates and 95% credible intervals')
```
```{r}
#| fig-width: 8
#| fig-height: 4
#| fig-cap: "Figure: Posterior densities (left); observed vs. predicted CEWL values (right)."
## posterior densities
# ---- Extract and transform ----
draws <- as_draws_df(fit_phy_acc) %>%
mutate(
sigma_gamma = sqrt(sd_phy__Intercept^2 + sd_spp__Intercept^2),
lambda = sd_phy__Intercept^2 / (sd_phy__Intercept^2 + sd_spp__Intercept^2)
)
# ---- Select parameters ----
keep <- c(
colnames(draws)[1:5],
c("sigma", "sigma_gamma", "lambda")
)
long <- draws %>%
select(all_of(keep)) %>%
pivot_longer(everything(), names_to = "parameter", values_to = "value")
# ---- Compute intervals ----
alpha_lo <- 0.05; alpha_hi <- 0.95
ints <- long %>%
group_by(parameter) %>%
summarise(
lo = quantile(value, alpha_lo),
hi = quantile(value, alpha_hi),
med = median(value),
.groups = "drop"
)
# ---- Create clean labels ----
labels <- c(
"b_Intercept" = "Intercept~(beta[0])",
"b_log.mass" = "log(Mass)~(beta[1])",
"b_osml" = "Osmolality~(beta[2])",
"b_temp" = "Temperature~(beta[3])",
"b_vpd" = "VPD~(beta[4])",
"sigma" = "Residual~variance~(sigma)",
"sigma_gamma" = "Species~variance~(sigma[gamma])",
"lambda" = "Phylogenetic~signal~(lambda)"
)
facet_labeller <- labeller(parameter = as_labeller(labels, default = label_parsed))
# ---- Plot ----
p_dens_acc <- ggplot(long, aes(x = value)) +
geom_density(fill = "grey85", color = "grey30", linewidth = 0.4) +
geom_vline(data = ints, aes(xintercept = lo), linetype = 2, color = "firebrick") +
geom_vline(data = ints, aes(xintercept = hi), linetype = 2, color = "firebrick") +
geom_vline(data = ints, aes(xintercept = med), linetype = 2, color = "black") +
facet_wrap(~ parameter, scales = "free", labeller = facet_labeller, nrow = 4) +
labs(
x = "Parameter value",
y = "Posterior density"
) +
theme_minimal(base_size = 11) +
theme(
panel.background = element_rect(fill = "white", colour = NA),
strip.background = element_rect(fill = "white", colour = "white"),
panel.grid = element_blank(),
axis.text.y = element_blank(),
)
## posterior predictive check
p_pp_acc <- pp_check(fit_phy_acc, ndraws = 100, type = 'scatter_avg') +
guides(color = guide_none()) +
labs(y = 'Observed change in log-CEWL', x = 'Predicted change in log-CEWL') +
theme_minimal(base_family = "Helvetica", base_size = 11) +
theme(
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = "white", color = NA), # white background
panel.background = element_rect(fill = "white", color = NA) # white plotting area
)
p_dens_acc + p_pp_acc
```
Findings: ...
Owing to the random intercepts, each species has a distinct estimated baseline CEWL when covariates are fixed at their means. The table below shows the species-specific estimates.
```{r}
# coefficient estimates
coef(fit_phy_acc)$spp[, 1, ] |>
pander(caption = 'Table: species-specific relationships after adjusting for phylogeny')
```
The plot below shows population-level marginal trends according to the model.
```{r}
#| fig-width: 6
#| fig-height: 2.5
#| fig-cap: "Figure: Posterior means with 95% credible intervals in each variable while holding other terms fixed at population averages."
# same as above but for change
pred_grid <- bdiag(seq_range(ddata$log.mass, n = 100),
seq_range(ddata$osml, n = 100),
seq_range(ddata$temp, n = 100),
seq_range(ddata$vpd, n = 100)) |>
as.matrix() |>
as_tibble() |>
rename(log.mass = V1,
osml = V2,
temp = V3,
vpd = V4)
# predictions
preds <- fitted(fit_phy_acc, newdata = pred_grid, re_formula = NA, summary = T)
# variable means
means <- change |>
drop_na(osml) |>
summarize(across(c(log.mass, osml, temp, vpd), mean)) |>
gather(name, mean)
# combine for plotting
pred_df <- bind_cols(pred_grid, preds) |>
mutate(across(log.mass:vpd, ~na_if(.x, 0))) |>
pivot_longer(log.mass:vpd) |>
drop_na(value) |>
rename(log.cewl = Estimate,
lwr = Q2.5,
upr = Q97.5,
err = Est.Error) |>
left_join(means, join_by(name)) |>
mutate(value = value + mean) |>
select(name, value, log.cewl, err, lwr, upr) |>
mutate(name = fct_recode(as.factor(name),
"Osmolality" = "osml",
"log(Mass)" = "log.mass",
"Temperature" = "temp",
"VPD" = "vpd")) |>
rename(var = name) |>
arrange(var, value)
# plot
p_scatter_acc <- change_ann |>
drop_na(osml) |>
pivot_longer(cols = c("osml", "log.mass", "temp", "vpd"),
names_to = "var",
values_to = "value") |>
mutate(var = fct_recode(as.factor(var),
"Osmolality" = "osml",
"log(Mass)" = "log.mass",
"Temperature" = "temp",
"VPD" = "vpd")) |>
ggplot(aes(x = value,
y = log.cewl)) +
geom_point() +
geom_path(data = pred_df, color = 'blue', linewidth = 1) +
geom_ribbon(data = pred_df, aes(ymin = lwr, ymax = upr),
fill = 'grey', alpha = 0.4) +
facet_grid(~ var, scales = "free_x") +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
labs(x = "Change in value (acclimation - capture)",
y = "Change in log(CEWL)")
p_scatter_acc
```
Diagnostic MCMC trace and density plots show...
```{r}
#| fig-width: 8
#| fig-height: 6
#| fig-cap: "Figure: Trace (left) and posterior density (right) plots for each model parameter."
# label customization
labels <- c(
"b_Intercept" = "Intercept~(beta[0])",
"b_log.mass" = "log(Mass)~(beta[1])",
"b_osml" = "Osmolality~(beta[2])",
"b_temp" = "Temperature~(beta[3])",
"b_vpd" = "VPD~(beta[4])",
"sigma" = "Residual~variance~(sigma)",
"sd_spp__Intercept" = "Reparametrized~species~(sigma[S])",
"sd_phy__Intercept" = "Reparametrized~phylo~(sigma[P])"
)
facet_labeller <- labeller(parameter = as_labeller(labels, default = label_parsed))
bayesplot::color_scheme_set("blue") # same scheme mcmc_plot uses by default
cols <- bayesplot::color_scheme_get() |> as_vector()
names(cols) <- 6:1
# mcmc trace plots
p_trace <- mcmc_plot(fit_phy_acc,
type = "trace",
facet_args = list(nrow = 4, ncol = 2)) +
guides(color = guide_none()) +
scale_colour_manual(values = cols, name = "Chain") +
facet_wrap(~ parameter, nrow = 4, ncol = 2, labeller = facet_labeller, scales = 'free_y') +
theme_minimal(base_family = "Helvetica", base_size = 11) +
theme(
panel.grid.minor = element_blank(),
axis.text.x = element_blank(),
plot.background = element_rect(fill = "white", color = NA), # white background
panel.background = element_rect(fill = "white", color = NA) # white plotting area
) +
labs(x = 'Iteration', y = 'Draw')
# posterior densities
plot_pars <- names(labels)
df <- as_draws_df(fit_phy_acc) %>%
select(.chain, all_of(plot_pars)) %>%
pivot_longer(-.chain,
names_to = "parameter",
values_to = "value") %>%
mutate(
parameter = factor(parameter, levels = names(labels), labels = labels),
.chain = factor(.chain)
)
p_dens <- ggplot(df, aes(x = value, colour = .chain)) +
geom_line(stat = 'density', linewidth = 0.6, alpha = 0.7) +
scale_colour_manual(values = cols, name = "Chain") +
facet_wrap(
~ parameter,
labeller = label_parsed,
nrow = 4, ncol = 2, scales = "free"
) +
theme_minimal(base_family = "Helvetica", base_size = 11) +
theme(
panel.grid = element_blank(),
axis.text.y = element_blank(),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA)
) +
labs(x = "Parameter value", y = "Density")
# panel layout
(p_trace | p_dens)
# export
ggsave(here::here("img/sfig8.png"), width = 10, height = 6, dpi = 400, units = 'in')
```
Pairs plots of the posterior draws for the variance parameters suggest...
```{r}
#| fig-width: 6
#| fig-height: 6
#| fig-cap: "Figure: Pairs plots for variance parameters."
# pairs plots for variance components
bayesplot::bayesplot_theme_set(theme_minimal(base_size = 11))
bayesplot::bayesplot_theme_update(
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA),
panel.grid.minor = element_blank()
)
p <- pairs(fit_phy_acc, variable = c("sd_spp__Intercept",
"sd_phy__Intercept",
"sigma"))
p
ggsave(here::here("img/sfig9.png"), p, width = 5, height = 5, dpi = 400, units = 'in')
```
```{r}
# combined figures for paper
p <- ((p_dens_cap / p_pp_cap) | (p_dens_acc / p_pp_acc)) +
plot_annotation(tag_levels = 'A', tag_prefix = '(', tag_suffix = ')') +
plot_layout(widths = c(1, 1), heights = c(2, 1))
ggsave(here::here("img/fig4.png"), p, width = 8, height = 8, dpi = 400, units = 'in')
p <- p_scatter_cap / p_scatter_acc
ggsave(here::here("img/fig5.png"), p, width = 6, height = 4, dpi = 400, units = 'in')
```