Load packages we need.
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.3.1
## ✔ tibble 2.0.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(readxl)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-4
Import data, and look at it, and clean it.
szukis <- read_excel("Clean_Szukis_Data20190407b.xlsx", .name_repair = "universal", col_types = c("text", "text", "text", rep("numeric",17)))
## Warning in read_fun(path = enc2native(normalizePath(path)), sheet_i =
## sheet, : Expecting numeric in L99 / R99C12: got 'NA'
## Warning in read_fun(path = enc2native(normalizePath(path)), sheet_i =
## sheet, : Expecting numeric in S99 / R99C19: got 'n/a'
## New names:
## * `corolla L` -> corolla.L
## * `corolla W` -> corolla.W
## * `W lobe tip` -> W.lobe.tip
## * `W mouth` -> W.mouth
## * `petals number` -> petals.number
## * … and 12 more
# What is the structure?
str(szukis)
## Classes 'tbl_df', 'tbl' and 'data.frame': 103 obs. of 20 variables:
## $ TreeID : chr "Szukis7_6" "Szukis7_6" "Szukis7_6" "Szukis7_6" ...
## $ flowerID : chr "2" "3" "4" "5" ...
## $ rootstock : chr "wild" "wild" "wild" "wild" ...
## $ corolla.L : num 7.06 7.94 10.74 7.24 6.92 ...
## $ corolla.W : num 6.18 5.76 7.22 5.5 5.46 5.68 5.58 6.46 5.48 5.24 ...
## $ W.lobe.tip : num 9.98 10.82 10.26 10.34 10.24 ...
## $ W.mouth : num 3.4 3.76 3.36 3.54 3.46 3.24 3.08 3.08 3.14 3.22 ...
## $ petals.number : num 4 4 4 4 4 4 4 4 4 4 ...
## $ stamen.number : num 15 8 5 8 7 10 8 8 9 8 ...
## $ staminodes.number : num 0 0 0 0 0 0 0 0 0 0 ...
## $ number.stigma.lobes : num 3 4 3 3 3 3 2 3 4 3 ...
## $ ovary.W : num 0.31 2.68 4.42 1.22 3.32 0.54 0.64 0.64 1.86 1.4 ...
## $ ovary.radius : num 0.155 1.34 2.21 0.61 1.66 0.27 0.32 0.32 0.93 0.7 ...
## $ number.ovules : num 0 1 3 0 3 0 0 0 0 0 ...
## $ pistil.length..ovary.L.: num 2.4 7.2 10.62 2.42 7.48 ...
## $ lobe.tip..lobe.length. : num 1.78 4.5 4.94 1.88 3.38 0.82 1.6 1.52 2.24 1.86 ...
## $ pistil.lobe : num 1.35 1.6 2.15 1.29 2.21 ...
## $ anther.length.sum : num NA NA NA NA NA NA NA NA NA NA ...
## $ female.INV : num 0.744 19.296 46.94 2.952 24.834 ...
## $ pistil.volume : num 0.121 27.077 108.634 1.886 43.169 ...
# count the number of missing data in each column
apply(X=szukis, MARGIN = 2, FUN = function(x) {sum(is.na(x))} )
## TreeID flowerID rootstock
## 0 0 0
## corolla.L corolla.W W.lobe.tip
## 0 0 0
## W.mouth petals.number stamen.number
## 0 0 0
## staminodes.number number.stigma.lobes ovary.W
## 0 0 1
## ovary.radius number.ovules pistil.length..ovary.L.
## 1 0 0
## lobe.tip..lobe.length. pistil.lobe anther.length.sum
## 0 0 19
## female.INV pistil.volume
## 1 1
# FILTER (keep) the rows in which ovary.W is not missing
# ! means "not", and is.na(x) tells us which elements of x are missing.
szukis <- filter(szukis, !is.na(ovary.W))
# Old code
# szukis <- mutate( szukis,
# PL.ratio = log(pistil.length..ovary.L./lobe.tip..lobe.length.),
# ovulesPA = as.numeric(number.ovules>0),
# ovule01 = number.ovules/max(number.ovules),
# volume = 2/3 * pi * (ovary.W/2)^2 * pistil.length..ovary.L.,
# female = PL.ratio + volume/max(volume))
# calculate some descritptive stats
# summary(szukis)
# not run
Creating maleness and femaleness indices. These should probably be combinations of fitness-enhancing traits. Here are some ideas for male traits: more stamens, longer stamen (ave, max), less variation in stamen length. Female traits may include lobe:pistil length ratio, number of ovules. Given the relation between lobe:pistil and no. of ovules, it seems as though there is a minimum ratio required to produce an ovule, and then there is great variation in ovule number above that minimum ratio.
An approximation of volume is \[V = \frac{2}{3} \pi r^2 h\] where \(r\) is the radius of the ovary.
Is wild different than cultivated? Does root stock make a difference on cultivated trees?
Select only the variables that were measured directly.
# separate just the measurements, and transform each column
Y <- sqrt( szukis[,4:17] )
# create a data set of just cultivars ("not equal to 139")
szukis.NoWild <- filter(szukis, TreeID != "139")
Ynw <- sqrt( szukis.NoWild[,4:17] )
Perform PERMANOVA and make a picture.
# all trees
adonis2(Y ~ TreeID, data=szukis, by=NULL)
# just the cultivars
adonis2(Ynw ~ TreeID, data=szukis.NoWild, by=NULL)
These tests suggest that the wild type differs from the others, and with the wild type in the data, TreeID explains about 14% of the variation in floower structure.
The picture.
#
mds1 <- metaMDS(Y)
## Run 0 stress 0.1030214
## Run 1 stress 0.1058896
## Run 2 stress 0.1135132
## Run 3 stress 0.102733
## ... New best solution
## ... Procrustes: rmse 0.01499833 max resid 0.07102893
## Run 4 stress 0.113772
## Run 5 stress 0.1030338
## ... Procrustes: rmse 0.01453097 max resid 0.06902143
## Run 6 stress 0.1043017
## Run 7 stress 0.1033934
## Run 8 stress 0.1236346
## Run 9 stress 0.1038994
## Run 10 stress 0.1034034
## Run 11 stress 0.1031763
## ... Procrustes: rmse 0.009523246 max resid 0.06066815
## Run 12 stress 0.1027338
## ... Procrustes: rmse 7.21916e-05 max resid 0.0005363478
## ... Similar to previous best
## Run 13 stress 0.1237123
## Run 14 stress 0.1034046
## Run 15 stress 0.1137748
## Run 16 stress 0.1039354
## Run 17 stress 0.1040763
## Run 18 stress 0.1030801
## ... Procrustes: rmse 0.01539115 max resid 0.07101134
## Run 19 stress 0.1376851
## Run 20 stress 0.1043038
## *** Solution reached
{#set up the figure
plot(mds1, type='n', display='sites')
# plot convex hulls of the groups (boundaries of flower data in NMDS space)
ordihull(mds1, szukis$TreeID, label=TRUE, draw="polygon", lwd=3, col=1:4)
# plot confidence intervals for the centroids of the groups
ordiellipse(mds1, szukis$TreeID, draw="polygon", lwd=3, col=1:4,
kind="se")
}
H1: There is a relatively fixed investment in sexual expression and reproduction. If so, we expect a trade-off between female and male expression.
H2: Plants vary markedly in their investment in sexual expression and reproduction. If so, we expect a positive correlation between female and male expression.
szukis <- mutate(szukis,
sn.w = stamen.number/corolla.W,
pv.w = pistil.volume/corolla.W
)
ggplot(data=szukis, aes(sn.w, pv.w,colour=TreeID) ) +
geom_jitter(width=.01, height=0) + geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data=szukis, aes(TreeID, log(pv.w),colour=TreeID) ) +
geom_boxplot() +
geom_jitter(width=.1, height=0)
ggplot(data=szukis, aes(TreeID, log(pistil.volume),colour=TreeID) ) +
geom_boxplot() +
geom_jitter(width=.1, height=0)
subset, or filter (dplyr package), and then cor.test(x, y, method='spearman').Approach 1 The direct approach.
wildtype <- subset(szukis, TreeID=="139")
# or with the dplyr package, filter(szukis, TreeID=="139")
with(wildtype, cor.test(sn.w, pv.w, method='spearman') )
##
## Spearman's rank correlation rho
##
## data: sn.w and pv.w
## S = 1860, p-value = 0.08287
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.3984962
S47 <- subset(szukis, TreeID=="Szukis4_7")
with(S47, cor.test(sn.w, pv.w, method='spearman') )
## Warning in cor.test.default(sn.w, pv.w, method = "spearman"): Cannot
## compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: sn.w and pv.w
## S = 13622, p-value = 0.001767
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.4905619
S76 <- subset(szukis, TreeID=="Szukis7_6")
with(S76, cor.test(sn.w, pv.w, method='spearman') )
##
## Spearman's rank correlation rho
##
## data: sn.w and pv.w
## S = 3216, p-value = 0.003692
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.5889328
S56 <- subset(szukis, TreeID=="Szukis5_6")
with(S56, cor.test(sn.w, pv.w, method='spearman') )
##
## Spearman's rank correlation rho
##
## data: sn.w and pv.w
## S = 2188, p-value = 0.05876
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.4207792
Another way to do Approach 1. (Make sure the dplyr and tidyr packages are loaded, with, e.g., library(dplyr)).
szukis %>% group_by(TreeID) %>% summarize(
rho = cor.test(sn.w, pv.w, method='spearman')$estimate,
P = cor.test(sn.w, pv.w, method='spearman')$p.value
)
## Warning in cor.test.default(sn.w, pv.w, method = "spearman"): Cannot
## compute exact p-value with ties
## Warning in cor.test.default(sn.w, pv.w, method = "spearman"): Cannot
## compute exact p-value with ties
Approach 2 This is not ready for prime time.
library(rstanarm)
options(mc.cores = parallel::detectCores())
post0 <- stan_glm(pv.w ~ 1, data = szukis,
family = Gamma,
chains = 3, cores = 3, seed = 1, iter = 3000)
post1 <- stan_glm(pv.w ~ sn.w, data = szukis,
family = Gamma,
chains = 3, cores = 3, seed = 1, iter = 3000)
post1b <- stan_glm(pv.w ~ TreeID, data = szukis,
family = Gamma,
chains = 3, cores = 3, seed = 1, iter = 3000)
post2 <- update(post1, formula = . ~ . + TreeID)
post3 <- update(post2, formula = . ~ . + TreeID:sn.w)
l0 <- loo(post0)
l1 <- loo(post1)
l1b <- loo(post1b)
l2 <- loo(post2)
l3 <- loo(post3)
compare_models(l0,l1, l1b, l2, l3)
tr <- ggplot(data=szukis, aes(sn.w, 1/pv.w,colour=TreeID) ) +
geom_jitter(width=.01, height=0) + geom_smooth()
tr
draws <- as.data.frame(post1)
colnames(draws)[1:2] <- c("a", "b")
tr +
geom_abline(data = draws, aes(intercept = a, slope = b),
color = "skyblue", size = 0.2, alpha = 0.25) +
geom_abline(intercept = coef(post1)[1], slope = coef(post1)[2],
color = "skyblue4", size = 1)