Describing floral traits of nominally [functionally?] male flowers

Load packages we need.

library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0       ✔ purrr   0.3.1  
## ✔ tibble  2.0.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.3       ✔ stringr 1.4.0  
## ✔ readr   1.3.1       ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(readxl)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-4

Import data, and look at it, and clean it.

szukis <- read_excel("Clean_Szukis_Data20190407b.xlsx", .name_repair = "universal", col_types = c("text", "text", "text", rep("numeric",17))) 
## Warning in read_fun(path = enc2native(normalizePath(path)), sheet_i =
## sheet, : Expecting numeric in L99 / R99C12: got 'NA'
## Warning in read_fun(path = enc2native(normalizePath(path)), sheet_i =
## sheet, : Expecting numeric in S99 / R99C19: got 'n/a'
## New names:
## * `corolla L` -> corolla.L
## * `corolla W` -> corolla.W
## * `W lobe tip` -> W.lobe.tip
## * `W mouth` -> W.mouth
## * `petals number` -> petals.number
## * … and 12 more
# What is the structure?
str(szukis)
## Classes 'tbl_df', 'tbl' and 'data.frame':    103 obs. of  20 variables:
##  $ TreeID                 : chr  "Szukis7_6" "Szukis7_6" "Szukis7_6" "Szukis7_6" ...
##  $ flowerID               : chr  "2" "3" "4" "5" ...
##  $ rootstock              : chr  "wild" "wild" "wild" "wild" ...
##  $ corolla.L              : num  7.06 7.94 10.74 7.24 6.92 ...
##  $ corolla.W              : num  6.18 5.76 7.22 5.5 5.46 5.68 5.58 6.46 5.48 5.24 ...
##  $ W.lobe.tip             : num  9.98 10.82 10.26 10.34 10.24 ...
##  $ W.mouth                : num  3.4 3.76 3.36 3.54 3.46 3.24 3.08 3.08 3.14 3.22 ...
##  $ petals.number          : num  4 4 4 4 4 4 4 4 4 4 ...
##  $ stamen.number          : num  15 8 5 8 7 10 8 8 9 8 ...
##  $ staminodes.number      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ number.stigma.lobes    : num  3 4 3 3 3 3 2 3 4 3 ...
##  $ ovary.W                : num  0.31 2.68 4.42 1.22 3.32 0.54 0.64 0.64 1.86 1.4 ...
##  $ ovary.radius           : num  0.155 1.34 2.21 0.61 1.66 0.27 0.32 0.32 0.93 0.7 ...
##  $ number.ovules          : num  0 1 3 0 3 0 0 0 0 0 ...
##  $ pistil.length..ovary.L.: num  2.4 7.2 10.62 2.42 7.48 ...
##  $ lobe.tip..lobe.length. : num  1.78 4.5 4.94 1.88 3.38 0.82 1.6 1.52 2.24 1.86 ...
##  $ pistil.lobe            : num  1.35 1.6 2.15 1.29 2.21 ...
##  $ anther.length.sum      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ female.INV             : num  0.744 19.296 46.94 2.952 24.834 ...
##  $ pistil.volume          : num  0.121 27.077 108.634 1.886 43.169 ...
# count the number of missing data in each column
apply(X=szukis, MARGIN = 2, FUN = function(x) {sum(is.na(x))} )
##                  TreeID                flowerID               rootstock 
##                       0                       0                       0 
##               corolla.L               corolla.W              W.lobe.tip 
##                       0                       0                       0 
##                 W.mouth           petals.number           stamen.number 
##                       0                       0                       0 
##       staminodes.number     number.stigma.lobes                 ovary.W 
##                       0                       0                       1 
##            ovary.radius           number.ovules pistil.length..ovary.L. 
##                       1                       0                       0 
##  lobe.tip..lobe.length.             pistil.lobe       anther.length.sum 
##                       0                       0                      19 
##              female.INV           pistil.volume 
##                       1                       1
# FILTER (keep) the rows in which ovary.W is not missing
# ! means "not", and is.na(x) tells us which elements of x are missing.
szukis <- filter(szukis, !is.na(ovary.W))

# Old code
# szukis <- mutate( szukis, 
#                   PL.ratio =  log(pistil.length..ovary.L./lobe.tip..lobe.length.),
#                   ovulesPA = as.numeric(number.ovules>0),
#                   ovule01 = number.ovules/max(number.ovules),
#                   volume = 2/3 * pi * (ovary.W/2)^2 * pistil.length..ovary.L.,
#                   female = PL.ratio + volume/max(volume))

# calculate some descritptive stats
# summary(szukis) 
# not run

Creating maleness and femaleness indices. These should probably be combinations of fitness-enhancing traits. Here are some ideas for male traits: more stamens, longer stamen (ave, max), less variation in stamen length. Female traits may include lobe:pistil length ratio, number of ovules. Given the relation between lobe:pistil and no. of ovules, it seems as though there is a minimum ratio required to produce an ovule, and then there is great variation in ovule number above that minimum ratio.

An approximation of volume is \[V = \frac{2}{3} \pi r^2 h\] where \(r\) is the radius of the ovary.

How do the trees differ?

Is wild different than cultivated? Does root stock make a difference on cultivated trees?

Select only the variables that were measured directly.

# separate just the measurements, and transform each column 
Y <- sqrt( szukis[,4:17] )

# create a data set of just  cultivars ("not equal to 139")
szukis.NoWild <- filter(szukis, TreeID != "139")
Ynw <- sqrt( szukis.NoWild[,4:17] )

Perform PERMANOVA and make a picture.

# all trees
adonis2(Y ~ TreeID, data=szukis, by=NULL)
# just the cultivars
adonis2(Ynw ~ TreeID, data=szukis.NoWild, by=NULL)

These tests suggest that the wild type differs from the others, and with the wild type in the data, TreeID explains about 14% of the variation in floower structure.

The picture.

#
mds1 <- metaMDS(Y)
## Run 0 stress 0.1030214 
## Run 1 stress 0.1058896 
## Run 2 stress 0.1135132 
## Run 3 stress 0.102733 
## ... New best solution
## ... Procrustes: rmse 0.01499833  max resid 0.07102893 
## Run 4 stress 0.113772 
## Run 5 stress 0.1030338 
## ... Procrustes: rmse 0.01453097  max resid 0.06902143 
## Run 6 stress 0.1043017 
## Run 7 stress 0.1033934 
## Run 8 stress 0.1236346 
## Run 9 stress 0.1038994 
## Run 10 stress 0.1034034 
## Run 11 stress 0.1031763 
## ... Procrustes: rmse 0.009523246  max resid 0.06066815 
## Run 12 stress 0.1027338 
## ... Procrustes: rmse 7.21916e-05  max resid 0.0005363478 
## ... Similar to previous best
## Run 13 stress 0.1237123 
## Run 14 stress 0.1034046 
## Run 15 stress 0.1137748 
## Run 16 stress 0.1039354 
## Run 17 stress 0.1040763 
## Run 18 stress 0.1030801 
## ... Procrustes: rmse 0.01539115  max resid 0.07101134 
## Run 19 stress 0.1376851 
## Run 20 stress 0.1043038 
## *** Solution reached
{#set up the figure
  plot(mds1, type='n',  display='sites')
  # plot convex hulls of the groups (boundaries of flower data in NMDS space) 
  ordihull(mds1, szukis$TreeID, label=TRUE, draw="polygon", lwd=3, col=1:4)
  # plot confidence intervals for the centroids of the groups
  ordiellipse(mds1, szukis$TreeID, draw="polygon", lwd=3, col=1:4,
              kind="se")
}

Allocation to reproductive structures

H1: There is a relatively fixed investment in sexual expression and reproduction. If so, we expect a trade-off between female and male expression.

H2: Plants vary markedly in their investment in sexual expression and reproduction. If so, we expect a positive correlation between female and male expression.

szukis <- mutate(szukis,
                 sn.w = stamen.number/corolla.W, 
                 pv.w = pistil.volume/corolla.W
)
ggplot(data=szukis,  aes(sn.w, pv.w,colour=TreeID) ) + 
  geom_jitter(width=.01, height=0) + geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data=szukis,  aes(TreeID, log(pv.w),colour=TreeID) ) + 
  geom_boxplot() + 
  geom_jitter(width=.1, height=0) 

ggplot(data=szukis,  aes(TreeID, log(pistil.volume),colour=TreeID) ) + 
  geom_boxplot() + 
  geom_jitter(width=.1, height=0) 

Analyses

  • approach 1 (Simple, and conceptually clear): make subsets of data, and perform a Spearman rtank correlation on each subset. Use functions subset, or filter (dplyr package), and then cor.test(x, y, method='spearman').
  • approach 2 (Complicated): fit a generalized linear model to the entire data set using an appropriate error distribution, and test differences among trees.

Approach 1 The direct approach.

wildtype <- subset(szukis, TreeID=="139") 
# or with the dplyr package, filter(szukis, TreeID=="139")
with(wildtype, cor.test(sn.w, pv.w, method='spearman') )
## 
##  Spearman's rank correlation rho
## 
## data:  sn.w and pv.w
## S = 1860, p-value = 0.08287
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.3984962
S47 <- subset(szukis, TreeID=="Szukis4_7")
with(S47, cor.test(sn.w, pv.w, method='spearman') )
## Warning in cor.test.default(sn.w, pv.w, method = "spearman"): Cannot
## compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  sn.w and pv.w
## S = 13622, p-value = 0.001767
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.4905619
S76 <- subset(szukis, TreeID=="Szukis7_6")
with(S76, cor.test(sn.w, pv.w, method='spearman') )
## 
##  Spearman's rank correlation rho
## 
## data:  sn.w and pv.w
## S = 3216, p-value = 0.003692
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.5889328
S56 <- subset(szukis, TreeID=="Szukis5_6")
with(S56, cor.test(sn.w, pv.w, method='spearman') )
## 
##  Spearman's rank correlation rho
## 
## data:  sn.w and pv.w
## S = 2188, p-value = 0.05876
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.4207792

Another way to do Approach 1. (Make sure the dplyr and tidyr packages are loaded, with, e.g., library(dplyr)).

szukis %>% group_by(TreeID) %>% summarize(
  rho = cor.test(sn.w, pv.w, method='spearman')$estimate,
  P = cor.test(sn.w, pv.w, method='spearman')$p.value
)
## Warning in cor.test.default(sn.w, pv.w, method = "spearman"): Cannot
## compute exact p-value with ties

## Warning in cor.test.default(sn.w, pv.w, method = "spearman"): Cannot
## compute exact p-value with ties

Approach 2 This is not ready for prime time.

library(rstanarm)
options(mc.cores = parallel::detectCores())
post0 <- stan_glm(pv.w ~ 1, data = szukis, 
                  family = Gamma, 
                  chains = 3, cores = 3, seed = 1, iter = 3000)
post1 <- stan_glm(pv.w ~ sn.w, data = szukis, 
                  family = Gamma, 
                  chains = 3, cores = 3, seed = 1, iter = 3000)
post1b <- stan_glm(pv.w ~ TreeID, data = szukis, 
                  family = Gamma, 
                  chains = 3, cores = 3, seed = 1, iter = 3000)
post2 <- update(post1, formula = . ~ . + TreeID)
post3 <- update(post2, formula = . ~ . + TreeID:sn.w)
l0 <- loo(post0)
l1 <- loo(post1)
l1b <- loo(post1b)
l2 <- loo(post2)
l3 <- loo(post3)
compare_models(l0,l1, l1b, l2, l3)
tr <- ggplot(data=szukis,  aes(sn.w, 1/pv.w,colour=TreeID) ) + 
  geom_jitter(width=.01, height=0) + geom_smooth()
tr

draws <- as.data.frame(post1)
colnames(draws)[1:2] <- c("a", "b")
tr + 
  geom_abline(data = draws, aes(intercept = a, slope = b), 
              color = "skyblue", size = 0.2, alpha = 0.25) + 
  geom_abline(intercept = coef(post1)[1], slope = coef(post1)[2], 
              color = "skyblue4", size = 1)