library(tidyverse)
library(broom)
knitr::opts_chunk$set(comment="", cache=T, warning = F, message = F)

plot_beta <- function(beta) {
  beta %>% mutate(trait = fct_reorder(trait, B)) %>% 
    ggplot(aes(y=trait, x = B, xmin=B-SE, xmax=B+SE)) + 
    geom_vline(xintercept=0)+ geom_pointrange() + 
    labs(x = "Selection gradient (beta) estimated from gradients on PCs", y = "Trait")
}

Formulas

Following method of Chong et al. 2018 for reconstructing selection estimates from estimates of selection on PCs: Beta = E * A, where:

  • E is the matrix of eigenvectors from the PCA (traits in rows, PCs in columns)
  • A is the vector of regression coefficients for each trait (multiple regression of relative fitness vs. PC scores across individuals)
  • Beta is the vector of reconstituted selection gradients on the traits
  • “The variance in reconstituted regression coefficients is estimated by replacing the elements of E with their squares, and the elements of A with the square of the standard error” Lafi and Kaneene (1992), and “Standard errors for reconstituted regression coefficients are estimated as the square root of the variance.” So Beta_SE = sqrt(E^2 * A_SE^2).
  • “If one wishes to perform hypothesis testing on these reconstituted selection gradients, parameter estimates and SEs could be used to estimate t-statistics and P-values.”
  • “Using a subset of PCs can reduce multicollinearity”, but “elements of Beta represent selection within the subspace represented by the columns of E”
calculate_beta <- function(E, slopes) {
  data.frame(B = E %*% slopes$A,
             SE = sqrt(E ^ 2 %*% slopes$SE ^ 2)) %>% rownames_to_column("trait")
}

Parachnowtisch et al. 2012 floral volatiles

(para_E <- read_tsv("Parachnowitsch2012_PCs.csv") %>% column_to_rownames("Trait") %>% as.matrix())
                    PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8
flower size        0.00 -0.03  0.12 -0.06  0.03 -0.12  0.14  0.47
corolla pigment   -0.03 -0.02 -0.06 -0.04 -0.07  0.18 -0.02  0.37
first flower date  0.01 -0.03  0.05 -0.10  0.06  0.01  0.32 -0.22
daily display      0.04  0.02  0.13  0.07 -0.20 -0.05 -0.34 -0.16
days flowering    -0.02 -0.01 -0.11  0.35  0.09  0.03  0.24  0.01
plant height       0.03 -0.07 -0.03  0.36  0.02 -0.04 -0.07  0.02
flower number     -0.02  0.03 -0.03  0.34 -0.07  0.08 -0.10 -0.12
trans-b-ocimene   -0.09  0.22  0.05 -0.06 -0.06  0.15 -0.05 -0.20
cis-b-ocimene     -0.04  0.36 -0.06  0.00  0.00 -0.08 -0.05 -0.05
linalool           0.07  0.06  0.23 -0.12  0.05 -0.11 -0.04 -0.10
b-citronellol     -0.02  0.32 -0.09  0.06  0.03 -0.14  0.03  0.10
cis-jasmone        0.07  0.02 -0.06  0.04 -0.09  0.11  0.02  0.04
a-copaene         -0.01  0.06  0.12 -0.04 -0.08  0.24  0.17  0.03
a-cedrene          0.17 -0.04  0.01  0.05 -0.02 -0.11  0.09 -0.05
bergamotene        0.10 -0.04  0.00  0.09  0.13 -0.06  0.02  0.12
b-cedrene          0.19 -0.04 -0.01  0.00 -0.05 -0.10 -0.02  0.01
caryophyllene      0.19  0.00 -0.03  0.01 -0.04 -0.17  0.01  0.06
germacrene D      -0.03 -0.02  0.06 -0.02 -0.08  0.38  0.14 -0.06
a-farnasene        0.13  0.03 -0.12  0.04 -0.05 -0.05  0.02  0.17
thujol            -0.03  0.33 -0.10 -0.08  0.07 -0.04  0.12  0.11
nerolidol          0.05 -0.01 -0.04 -0.08  0.09  0.20 -0.04 -0.05
methyl benzoate    0.18 -0.14  0.13 -0.09 -0.02 -0.03 -0.07 -0.18
methyl salicylate  0.14 -0.11  0.00  0.01  0.04  0.09 -0.12 -0.12
methyl cinnamate   0.00 -0.01 -0.08  0.09  0.45 -0.13 -0.04  0.03
trans-2-octenal   -0.09 -0.06  0.15  0.05  0.17  0.13 -0.10  0.12
cis-6-nonenal     -0.02 -0.06  0.36 -0.06  0.07  0.05  0.16  0.03
unknown-1          0.01 -0.05  0.34 -0.02 -0.10 -0.12  0.08  0.08
unknown-2         -0.05 -0.09 -0.12  0.08  0.02  0.42 -0.21  0.02
unknown-3          0.02  0.02  0.11  0.07 -0.15 -0.07  0.38  0.08
unknown-4         -0.03  0.05  0.05 -0.09  0.38  0.04  0.06 -0.08
(para_slopes <- read_tsv("Parachnowitsch2012_slopes.csv") %>% column_to_rownames("PC"))
        A   SE
PC1  0.01 0.04
PC2  0.09 0.04
PC3  0.35 0.04
PC4  0.33 0.04
PC5 -0.18 0.05
PC6 -0.02 0.04
PC7 -0.19 0.04
PC8  0.01 0.04
(para_beta <- calculate_beta(para_E, para_slopes))
               trait       B          SE
1        flower size -0.0054 0.020984042
2    corolla pigment -0.0198 0.017151385
3  first flower date -0.0921 0.016493635
4      daily display  0.1708 0.019183326
5     days flowering  0.0136 0.018170581
6       plant height  0.1130 0.015169707
7      flower number  0.1330 0.015811705
8    trans-b-ocimene  0.0319 0.014600000
9      cis-b-ocimene  0.0216 0.015294443
10          linalool  0.0468 0.012862737
11     b-citronellol  0.0096 0.015305228
12       cis-jasmone  0.0053 0.007720751
13         a-copaene  0.0117 0.013687951
14         a-cedrene  0.0063 0.009501579
15       bergamotene  0.0023 0.010159232
16         b-cedrene  0.0097 0.009139475
17     caryophyllene  0.0040 0.010748023
18      germacrene D -0.0081 0.017106724
19       a-farnasene -0.0169 0.010545615
20            thujol -0.0655 0.016097515
21         nerolidol -0.0539 0.010381233
22   methyl benzoate  0.0207 0.013613229
23 methyl salicylate  0.0074 0.010673331
24  methyl cinnamate -0.0697 0.023678049
25   trans-2-octenal  0.0497 0.014037450
26     cis-6-nonenal  0.0569 0.016678429
27         unknown-1  0.1140 0.016071092
28         unknown-2  0.0039 0.020116660
29         unknown-3  0.0206 0.018271563
30         unknown-4 -0.0894 0.020048940
plot_beta(para_beta)

Chong et al. 2018 Arabidopsis

Trait correlations and selection

# Arabidopsis trait and fitness data
arab_pheno <- read_tsv("Chong2018_Arabidopsis_pheno.csv")

# Take means 
arab_geno <- arab_pheno %>% group_by(Genetic_Line) %>% summarize(across(everything(), mean, na.rm=T))

# Standardize traits
arab_geno_std <- arab_geno %>% 
  mutate(across(Flowering:Branches, ~ (.x-mean(.x))/sd(.x))) %>% 
  mutate(Relative_Fitness = Fruit_Number/mean(Fruit_Number), .keep="unused")

#Plot trait correlations
library(GGally)
ggpairs(arab_geno_std %>% select(Flowering:Branches))

# Plot fitness vs. traits
ggplot(arab_geno_std %>% pivot_longer(Flowering:Branches, names_to="Trait"), aes(x=value, y=Relative_Fitness)) +
  facet_wrap(vars(Trait), scales="free_x") + geom_point() + geom_smooth(method="lm", se=F)

Selection on PCs

# Run PCA on traits
arab_PCA <- arab_geno_std %>% column_to_rownames("Genetic_Line") %>% select(-Relative_Fitness) %>% 
  prcomp(center=F)
arab_geno_std_PCA <- bind_cols(arab_geno_std, as.data.frame(arab_PCA$x))
(arab_E <- arab_PCA$rotation[,1:4]) # only use first four PCs. flip sign to match Chong et al.
                         PC1         PC2         PC3        PC4
Flowering        -0.52252388  0.21508610 -0.15430751  0.3523835
Duration          0.52385695 -0.15698932  0.24369889 -0.4258117
Rosette_Leaves   -0.47336269 -0.30621520 -0.42547006 -0.7079019
Rosette_Diameter -0.04481703 -0.91377705  0.04720716  0.3971144
Branches          0.47588012  0.01833222 -0.85647344  0.1889054
# Regress relative fitness on PCs
arab_PCA_regression <- lm(Relative_Fitness ~ PC1 + PC2 + PC3 + PC4, data = arab_geno_std_PCA)
(arab_slopes <- tidy(arab_PCA_regression) %>% filter(term != "(Intercept)") %>% rename(A=estimate, SE=std.error))
# A tibble: 4 x 5
  term        A     SE statistic      p.value
  <chr>   <dbl>  <dbl>     <dbl>        <dbl>
1 PC1    0.189  0.0297     6.37  0.0000000892
2 PC2   -0.143  0.0500    -2.87  0.00631     
3 PC3   -0.0331 0.0911    -0.363 0.719       
4 PC4   -0.131  0.121     -1.09  0.284       
(arab_beta <- calculate_beta(arab_E, arab_slopes))
             trait           B         SE
1        Flowering -0.17080034 0.04862920
2         Duration  0.16938818 0.05865431
3   Rosette_Leaves  0.06107613 0.09612699
4 Rosette_Diameter  0.06891479 0.06639600
5         Branches  0.09102895 0.08255735
plot_beta(arab_beta)