Data Samples Size and Wrangling

#these are the grouping categories
brk_es_doc <- es_doc %>%
  select(categories) %>%
  filter(categories != "NA") %>%
  count(categories)

kable(brk_es_doc,"html")%>%
        kable_styling("striped", position = "left") %>%
    scroll_box(width = "50%", height = "500px")
categories n
1 clutch 41
cannibal non-diet condition 11
cannibal phenology 18
cannibal satiated 53
cannibal size comparison 6
cannibal starved 56
courting behaviour 11
diet comparison 52
experiment set-up 9
insertion behaviour 12
mating and diet interaction 20
mating status 4
more than 1 clutch 42
multiple mates 29
one male 21
species recognition 5
SSD comparison 25
victim mating status 12
victim non mating status condition 11
#cleaning document
es_general <- es_doc %>%
  select(ID, year, es_ID, female_ID, male_ID, treatment_ID, spcs, cannibal, mate_status, behaviour, categories, general_response2, exprt, habitat, data, occur, yi, vi, adaptive, aggressive, parental, choice, es_type, precision) %>%
  mutate_if(is.character,as.factor)

str(es_general)
## 'data.frame':    940 obs. of  24 variables:
##  $ ID               : Factor w/ 111 levels "ft001","ft003",..: 10 10 10 10 10 10 10 10 107 1 ...
##  $ year             : int  2012 2012 2012 2012 2012 2012 2012 2012 2019 2019 ...
##  $ es_ID            : Factor w/ 911 levels "es_1","es_10",..: 5 6 7 3 911 4 910 909 871 112 ...
##  $ female_ID        : Factor w/ 348 levels "f_1","f_10","f_100",..: 110 110 110 110 110 110 110 110 255 1 ...
##  $ male_ID          : Factor w/ 381 levels "m_1","m_10","m_100",..: 143 153 164 132 132 132 132 132 289 1 ...
##  $ treatment_ID     : Factor w/ 829 levels "trt_1","trt_10",..: 822 823 824 820 818 821 817 816 772 111 ...
##  $ spcs             : Factor w/ 50 levels "Agelenopsis pennsylvanica",..: 1 1 1 1 1 1 1 1 2 2 ...
##  $ cannibal         : Factor w/ 2 levels "female","male": 1 1 1 1 1 1 1 1 1 1 ...
##  $ mate_status      : Factor w/ 3 levels "mated","unknown",..: 3 3 3 3 3 3 3 3 1 3 ...
##  $ behaviour        : Factor w/ 2 levels "attack","cannibal": 2 2 2 2 2 2 2 2 1 2 ...
##  $ categories       : Factor w/ 19 levels "1 clutch","cannibal non-diet condition",..: NA NA NA NA NA NA NA NA 17 NA ...
##  $ general_response2: Factor w/ 19 levels "age and SSD",..: 4 4 4 5 6 10 10 10 2 7 ...
##  $ exprt            : Factor w/ 3 levels "field","lab",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ habitat          : Factor w/ 3 levels "field","lab",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ data             : Factor w/ 4 levels "graph","raw",..: 1 1 1 1 1 1 1 1 4 4 ...
##  $ occur            : Factor w/ 3 levels "post","pre","unknown": 2 2 2 2 2 2 2 2 2 1 ...
##  $ yi               : num  -0.6848 -0.341 0.0254 -1.0066 -0.0307 ...
##  $ vi               : num  0.1728 0.1602 0.1551 0.037 0.0332 ...
##  $ adaptive         : Factor w/ 2 levels "N","Y": 2 2 2 1 2 1 1 1 2 1 ...
##  $ aggressive       : Factor w/ 2 levels "N","Y": 2 2 2 2 2 1 1 1 2 1 ...
##  $ parental         : Factor w/ 2 levels "N","Y": 1 1 1 1 1 2 2 2 1 1 ...
##  $ choice           : Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 2 ...
##  $ es_type          : Factor w/ 940 levels "anova_hedge_g01",..: 168 169 170 435 436 470 437 438 10 6 ...
##  $ precision        : num  2.41 2.5 2.54 5.2 5.49 ...

Distribution of Effect Sizes

A few extreme values can be noticed and must be proceeded with caution: * es < -20 * -20 < es > -10 * es > 30 * 10 < es > 20

ggplot(es_general, aes(yi)) + geom_histogram(binwidth=1)

Exploring extreme values

  • -20 < es > -10
    • ID ft035 (es_218): low precision (0.44) - multiple es values taken from this paper that seemed fine - KEEP
    • ID ft103 (es_617): low precision (0.88) for this value - mistyped
  • es > 30
    • ID ft149 check chi sq - KEEP
  • 10 < es > 20
    • ID ft003 (es_16) - mistyped but still high, KEEP
    • ID ft112 (es_673) - KEEP
    • ID ft035 (es_221) - KEEP
    • ID ft135 (es_750) - KEEP
es_20_10 <- es_general %>%
  filter(-20 < yi & yi < -10)


es_30 <- es_general %>%
  filter(yi > 30)

es_10_20 <- es_general %>%
  filter(10 < yi & yi < 20)

Phylogenetic Tree

Getting Taxonomic Data

Notes

resolved_name <- tnrs_match_names(names=levels(es_general$spcs), context_name="Animals")
# Tree couldn't find: Trichonephila edulis, Trichonephila fenestrata, Trichonephila senegalensis because in tree of life still under "nephila"

#over 2 matches in rotl
matches.2 <- resolved_name %>%
    filter(number_matches != 1)

#Hogna Helluo is ok

tree_es <- tol_induced_subtree(ott_ids = resolved_name$ott_id, label_format="name")
tree_es$tip_label <- strip_ott_ids(tree_es$tip.label, remove_underscores=TRUE)

is.binary(tree_es)
## [1] FALSE
#resolving polytomies
tree_es_2 <- multi2di(tree_es)

###Make tree binary
is.binary(tree_es_2)
## [1] TRUE
tree_es_2$node.label <- NULL

#Computing branch lengths & Final Tree

#computing branch lengths
branches2 <- compute.brlen(tree_es_2, method="Grafen", power=1)

is.ultrametric(branches2)
## [1] TRUE
#saving phylogenetic matrix
phylo_vcv2 <- vcv.phylo(branches2, corr=T)

#creating tibble to join my effect sizes with phylogenetic data 
tree_tib2 <- as_tibble(branches2)

#in label names replacing underscore to match my species name in my data fram
tree_tib3 <- tree_tib2 %>%
  mutate(label2=str_replace_all(label, "_", " "))

#full join didn't work due to the changed names from rotl to my data frame
joining_traits2 <- full_join(tree_tib3, es_general, by=c("label2" = "spcs"))

write.csv(joining_traits2, "full_model_es.csv", row.names = F)

tree_es <- as.treedata(tree_tib3)

#phylo tree with branch lengths
ggtree(tree_es) + geom_tiplab(aes(label=label2), align=TRUE)+ xlim(NA, 6)