Date rendered: Thu May 7 11:51:34 2020

Objectives

The goal of this project is to measure the effect of resource dispersion on individual social network metrics (strength, betweenness, and clustering coefficient). This script generates a null distribution of coefficients from models that use social network metrics calculated from permuted datasets. To permute the datasets and calculate individual social network metrics from each dataset, use th e “Permutation Beetlearies 2018” and “Permuted Beetleary Social Networks 2018” scripts. After generating a null distribution of coefficients, this script then compares the coefficients from the model that uses observed social network metrics and calculates p-values.

Outstanding Questions

-Should I include number of scans seen, period, sex, or elytra in models? -What model assumptions need to be addressed when testing significance by permuting data? I don’t love any of these models. Do I need to run better models? -What type of network do I want to analyze? Male only? Female only? Include mating interactions? Proximity only? -The social network metrics are calcualted from social networks that include beetles that died and moved (3MZ) during the experiment. Should I remove beetles that died (and 3MZ) from these analyses? Should they be removed from the permutations as well as observed data? Seems strange because these dead beetles weren’t treated as dead during the permutations. But if they aren’t removed from the permutations, then we compare model coefficients built from datasets that are larger than the observed dataset.

Set-Up and Data

#clear memory
rm(list=ls())

#libraries
library(lme4)

## Loading required package: Matrix

library(car)

## Loading required package: carData

## Registered S3 methods overwritten by 'car':
##   method                          from
##   influence.merMod                lme4
##   cooks.distance.influence.merMod lme4
##   dfbeta.influence.merMod         lme4
##   dfbetas.influence.merMod        lme4

library(glmmTMB)
library(DHARMa)

## Registered S3 method overwritten by 'DHARMa':
##   method        from   
##   refit.glmmTMB glmmTMB

library(effects)

## lattice theme set by effectsTheme()
## See ?effectsTheme for details.

library(tidyverse)

## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.3
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0

## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x tidyr::pack()   masks Matrix::pack()
## x dplyr::recode() masks car::recode()
## x purrr::some()   masks car::some()
## x tidyr::unpack() masks Matrix::unpack()

library(foreach)

## 
## Attaching package: 'foreach'

## The following objects are masked from 'package:purrr':
## 
##     accumulate, when

library(doParallel)

## Loading required package: iterators

## Loading required package: parallel

#function that excludes
`%sans%` <- Negate(`%in%`)

#Change R's defult to contrasts 
options(contrasts=c("contr.sum", "contr.poly")) 

#FUNCTION: check.assumptions
check.assumptions <- function(mod){
  # Multiple panels
  par(mfrow=c(2,2))
  
  # Normality
  hist(resid(mod))
  qqnorm(resid(mod))
  qqline(resid(mod))
  
  # Linearity
  # --> Plot residual versus fitted values with loess line
  plot(fitted(mod), resid(mod), main = "Residual vs fitted values")
  fr.fit <- loess(resid(mod)~fitted(mod))
  fr.vec <- seq(min(fitted(mod)), max(fitted(mod)), length.out=100)
  lines(fr.vec,predict(fr.fit,fr.vec), col="red")
  
  # Homoscedasticity
  # --> Plot the absolute value of the residuals against the fitted values
  plot(fitted(mod), sqrt(abs(resid(mod))), main="Scale-location")
  sl.fit <- loess(sqrt(abs(resid(mod)))~fitted(mod))
  sl.vec <- seq(min(fitted(mod)), max(fitted(mod)), length.out=100)
  lines(sl.vec,predict(sl.fit,sl.vec), col="red")
}

#upload observed individual-level social network data
ObsIndividSN<-read.csv("observedSN_20200406.csv")

#upload permuted individual-level social network data
PermIndividSN<-readRDS("permutedSN20200406_100.RDS")

#Dead Beetles
DeadBeetles <- c("2BO", "2BX", "2H2", "2M6", "2MS", "2N6", "2N7", "2OH", "2OW", "2PK", "2PW", "2T9", "2TS", "2VB", "2VO", "2WV", "3A6", "3AB", "3AW", "3HK", "3HX", "3KA", "3LV", "3LX", "3MN", "3NH", "3ON", "3VO", "3W9", "3XM", "3XN", "2FF", "2HF", "3F3", "3KL", "3YF")

#Treatments
Dispersed <- c("1A_1", "2A_1", "3A_1", "4A_1", "5A_1", "6A_1", "1B_2", "2B_2", "3B_2", "4B_2", "5B_2", "6B_2")
Clumped <- c("1B_1", "2B_1", "3B_1", "4B_1", "5B_1", "6B_1", "1A_2", "2A_2", "3A_2", "4A_2", "5A_2", "6A_2")

CWW Function to Calculate P-Values

# FUNCTION: CWW (2015) Calculates one- and two-tailed p-values from a permuted distribution  
# Author Corlett Wolfe Wood

# One-tailed: The proportion of permuted values that are GREATER THAN the observed value
# Two-tailed: The proportion of permuted values that are MORE EXTREME than the observed

# NB: Calculation of two-tailed p-values on asymmetric null distributions
# On symmetric distributions, one-tailed p-values can be doubled to obtained two-tailed ones
# This doesn't work with asymmetric distributions because of skewness
# --> SOLUTION: This function below calculates two-tailed p-values as the proportion of
#     permuted.values that have a probability <= the probability of the observed value
# --> SOURCE: Pratt, J. W. and J. D. Gibbons. 1981. Concepts in Nonparametric Theory. 
#     Springer-Verlag, New York, NY pp. 29-32

# NB: p-values should NEVER be zero in a permutation test
# --> the observed data is one possible permutation, so at least one value
#     in the permuted data can be equal to the observed data (P>0)
# --> SOURCE: North, B. V., D. Curtis, and P. C. Sham. 2002. A note on the calculation of empirical 
#     p values from Monte Carlo procedures. Am. J. Hum. Genet. 71:439-441.
# --> SOURCE: Davison, A. C. and D. V. Hinkley. 1997. Bootstrap methods and their application. 
#     Cambridge University Press, Cambridge, United Kingdom
# --> SOLUTION: add 1 to the numerator and the denominator

pvals = function(observed.value, permuted.values){  
  # ONE-TAILED
  # Proportion of permuted values >= (if in upper tail) OR <= (if in lower tail) the observed value
  k.greater = length(which(permuted.values>=observed.value))
  k.less = length(which(permuted.values<=observed.value))
  n = length(permuted.values)
  # One-tailed p-value
  pval.onetail = (min(k.greater, k.less)+1)/(n+1) # See NB above for explanation of +1  
  
  # TWO-TAILED
  # Proportion of permuted values with PROBABILITIES <= the observed value
  df <- data.frame(permuted.values=permuted.values, prob=NA)
  names(df) <- c("permuted.values", "prob")
  for(p in 1:nrow(df)){
    # Calculate probability of each permuted value
    # Proportion of permuted values >= (if in upper tail) OR <= (if in lower tail) each permuted value
    k.greater = length(which(df$permuted.values>=df$permuted.values[p]))
    k.less = length(which(df$permuted.values<=df$permuted.values[p]))
    n = nrow(df)
    df$prob[p] = (min(k.greater, k.less)+1)/(n+1) # See NB above for explanation of +1
  }
  
  # Proportion of permuted.values with a have a probability <= the probability of the observed value
  prob.less <- length(which(df$prob<=pval.onetail))
  pval.twotail <- (prob.less+1)/(n+1) # See NB above for explanation of +1
  
  return(c(one.tailed=pval.onetail, 
           two.tailed=pval.twotail))
}

Observed SN

Clean Up Observed SN Dataset

Separate out columns to create Condo, Period, ID columns

Social network metrics were not calculated for UK beetles.

ObsIndividSN %>%
  #create Condo column
  mutate(Condo = as.factor(substr(Condo_Period, 1, 2))) %>%
  #create Period column
  mutate(Period = as.factor(substr(Condo_Period, 10, 10))) %>%
  #edit Condo_Period column
  mutate(Condo_Period = as.factor(paste(Condo, Period, sep="_"))) %>%
  #create ID column
  mutate(ID = as.factor(substr(ids, 1, 3))) -> ObsIndividSN

Treatment

Assign each Condo in each Period to a Dispersed or Clumped Treatment

ObsIndividSN %>%
  mutate(Treatment = ifelse(Condo_Period %in% Dispersed, "Dispersed", "Clumped")) %>%
  mutate(Treatment = as.factor(Treatment)) -> ObsIndividSN

Remove beetles that died or moved (3MZ/3XY) from the analyses

These beetles are currently included in the social networks, just removed from the analyses

3MZ seen in 1B starting on 20180702_A. It is supposed to be in 6A. We know 3MZ moved to 1B. We collected 3MZ from 1B on 20180706. 3XY moved from 1A to 1B. 3XY was seen in both 1A and 1B on 20180715_C. 3XY was only in 1B for 4 scans. Moved 3XY back to 1A.

ObsIndividSN %>%
  #remove dead beetles
  filter(ID %sans% DeadBeetles) %>%
  #remove 3MZ
  filter(ID != "3MZ") %>%
  #remove 3XY from 1B
  filter(ids != "3XY_1B") -> ObsIndividSN

Strength

This model is zero-inflated and glmmTMB does not fix the zero inflation issue.

obs.strength <- glmmTMB(alpha~Treatment+(1|Condo/ID), data=ObsIndividSN)
summary(obs.strength)

##  Family: gaussian  ( identity )
## Formula:          alpha ~ Treatment + (1 | Condo/ID)
## Data: ObsIndividSN
## 
##      AIC      BIC   logLik deviance df.resid 
##   2045.1   2068.5  -1017.5   2035.1      795 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance Std.Dev.
##  ID:Condo (Intercept) 0.248518 0.49852 
##  Condo    (Intercept) 0.002868 0.05356 
##  Residual             0.535297 0.73164 
## Number of obs: 800, groups:  ID:Condo, 400; Condo, 12
## 
## Dispersion estimate for gaussian family (sigma^2): 0.535 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  2.01076    0.03911   51.41   <2e-16 ***
## Treatment1   0.03027    0.02587    1.17    0.242    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Anova(obs.strength, type=3)

## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: alpha
##                 Chisq Df Pr(>Chisq)    
## (Intercept) 2642.8742  1     <2e-16 ***
## Treatment      1.3694  1     0.2419    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(allEffects(obs.strength))

check.assumptions(obs.strength)

#test for zero inflation and overdispersion with DHARMa
obs.strength.resid<-simulateResiduals(obs.strength, n=250, integerResponse=F)

## It seems you are diagnosing a glmmTMB model. There are still a few minor limitations associated with this package. The most important is that glmmTMB doesn't implement an option to create unconditional predictions from the model, which means that predicted values (in res ~ pred) plots include the random effects. With strong random effects, this can sometimes create diagonal patterns from bottom left to top right in the res ~ pred plot. All other tests and plots should work as desired. Please see https://github.com/florianhartig/DHARMa/issues/16 for further details.

## Model family was recognized or set as continuous, but duplicate values were detected in the response. Consider if you are fitting an appropriate model.

plot(obs.strength.resid)

testDispersion(obs.strength.resid)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 0.66969, p-value < 2.2e-16
## alternative hypothesis: two.sided

testZeroInflation(obs.strength.resid)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = Inf, p-value < 2.2e-16
## alternative hypothesis: two.sided

#the model is VERY zero-inflated 
#BUT I am shocked that this is zero-inflated. What is going on here??
#when I create residuals, I get the message: Model family was recognized or set as continuous, but duplicate values were detected in the response. Consider if you are fitting an appropriate model. WHAT DOES THIS ERROR MESSAGE MEAN? It means that there are duplicate values in observed data.

#specify 0 inflation with glmmTMB
obs.strength.0inflation <- glmmTMB(alpha~Treatment+(1|Condo/ID), data=ObsIndividSN, ziformula=~1)

## Warning in nlminb(start = par, objective = fn, gradient = gr, control =
## control$optCtrl): NA/NaN function evaluation

## Warning in nlminb(start = par, objective = fn, gradient = gr, control =
## control$optCtrl): NA/NaN function evaluation

## Warning in nlminb(start = par, objective = fn, gradient = gr, control =
## control$optCtrl): NA/NaN function evaluation

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; false convergence (8).
## See vignette('troubleshooting')

summary(obs.strength.0inflation)

##  Family: gaussian  ( identity )
## Formula:          alpha ~ Treatment + (1 | Condo/ID)
## Zero inflation:         ~1
## Data: ObsIndividSN
## 
##      AIC      BIC   logLik deviance df.resid 
##       NA       NA       NA       NA      794 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance Std.Dev.
##  ID:Condo (Intercept) 0.312339 0.55887 
##  Condo    (Intercept) 0.000603 0.02456 
##  Residual             0.471096 0.68636 
## Number of obs: 800, groups:  ID:Condo, 400; Condo, 12
## 
## Dispersion estimate for gaussian family (sigma^2): 0.471 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  2.02494    0.03678   55.05   <2e-16 ***
## Treatment1   0.03682    0.02179    1.69   0.0911 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Zero-inflation model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -6.19397    0.01604  -386.2   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Anova(obs.strength.0inflation, type=3)

## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: alpha
##                 Chisq Df Pr(>Chisq)    
## (Intercept) 3031.0047  1    < 2e-16 ***
## Treatment      2.8553  1    0.09107 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(allEffects(obs.strength.0inflation))
check.assumptions(obs.strength.0inflation)

#test for zero inflation and overdispersion with DHARMa
obs.strength.0inflation.resid<-simulateResiduals(obs.strength.0inflation, n=250, integerResponse=F)

## It seems you are diagnosing a glmmTMB model. There are still a few minor limitations associated with this package. The most important is that glmmTMB doesn't implement an option to create unconditional predictions from the model, which means that predicted values (in res ~ pred) plots include the random effects. With strong random effects, this can sometimes create diagonal patterns from bottom left to top right in the res ~ pred plot. All other tests and plots should work as desired. Please see https://github.com/florianhartig/DHARMa/issues/16 for further details.
## Model family was recognized or set as continuous, but duplicate values were detected in the response. Consider if you are fitting an appropriate model.

## Model family was recognized or set as continuous, but duplicate values were detected in the simulation - changing to integer residuals (see ?simulateResiduals for details)

plot(obs.strength.0inflation.resid)

testDispersion(obs.strength.0inflation.resid)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 0.61743, p-value < 2.2e-16
## alternative hypothesis: two.sided

testZeroInflation(obs.strength.0inflation.resid)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 3.5377, p-value = 0.008
## alternative hypothesis: two.sided

#now the model is underdispersed and still very 0 inflated
#Not sure glmmTMB helped
#when I create residuals, I get the message: Model family was recognized or set as continuous, but duplicate values were detected in the response. Consider if you are fitting an appropriate model. WHAT DOES THIS ERROR MESSAGE MEAN? It means that there are duplicate values in observed data.

#extract estimate for treatment
obs.strength.treatment.coef<-summary(obs.strength.0inflation)$coefficients$cond[2,1]

Betweenness

How best deal with betweenness data? QQ Plots look awful.

obs.betweenness <- glmmTMB(betweenness~Treatment+(1|Condo/ID), data=ObsIndividSN)
summary(obs.betweenness)

##  Family: gaussian  ( identity )
## Formula:          betweenness ~ Treatment + (1 | Condo/ID)
## Data: ObsIndividSN
## 
##      AIC      BIC   logLik deviance df.resid 
##   7581.0   7604.4  -3785.5   7571.0      795 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance  Std.Dev. 
##  ID:Condo (Intercept) 1.051e+02 1.025e+01
##  Condo    (Intercept) 8.787e-07 9.374e-04
##  Residual             6.566e+02 2.562e+01
## Number of obs: 800, groups:  ID:Condo, 400; Condo, 12
## 
## Dispersion estimate for gaussian family (sigma^2):  657 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  19.8288     1.0409  19.050   <2e-16 ***
## Treatment1   -0.5505     0.9059  -0.608    0.543    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Anova(obs.betweenness, type=3)

## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: betweenness
##                Chisq Df Pr(>Chisq)    
## (Intercept) 362.9065  1     <2e-16 ***
## Treatment     0.3693  1     0.5434    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(allEffects(obs.betweenness))

check.assumptions(obs.betweenness)

#error: boundary (singular) fit. betweenness is extremely 0-inflated and non-normal. 

#specify 0-inflation and negative binomial family with glmmTMB
obs.betweenness.0inflation.nbinom2<-glmmTMB(betweenness~Treatment+(1|Condo/ID), data=ObsIndividSN, family=nbinom2, ziformula=~1)
summary(obs.betweenness.0inflation.nbinom2)

##  Family: nbinom2  ( log )
## Formula:          betweenness ~ Treatment + (1 | Condo/ID)
## Zero inflation:               ~1
## Data: ObsIndividSN
## 
##      AIC      BIC   logLik deviance df.resid 
##   6004.9   6033.0  -2996.5   5992.9      794 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance  Std.Dev. 
##  ID:Condo (Intercept) 2.942e-07 5.424e-04
##  Condo    (Intercept) 2.896e-27 5.381e-14
## Number of obs: 800, groups:  ID:Condo, 400; Condo, 12
## 
## Overdispersion parameter for nbinom2 family (): 0.676 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.17732    0.05203   61.07   <2e-16 ***
## Treatment1  -0.03006    0.04899   -0.61    0.539    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Zero-inflation model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.5610     0.1489  -10.48   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Anova(obs.betweenness.0inflation.nbinom2, type=3)

## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: betweenness
##                 Chisq Df Pr(>Chisq)    
## (Intercept) 3729.4635  1     <2e-16 ***
## Treatment      0.3766  1     0.5394    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(allEffects(obs.betweenness.0inflation.nbinom2))

## Warning in Effect.glmmTMB(predictors, mod, vcov. = vcov., ...): overriding
## variance function for effects: computed variances may be incorrect

check.assumptions(obs.betweenness.0inflation.nbinom2)

#test for zero inflation and overdispersion with DHARMa
obs.betweenness.0inflation.nbinom2.resid<-simulateResiduals(obs.betweenness.0inflation.nbinom2, n=250)

## It seems you are diagnosing a glmmTMB model. There are still a few minor limitations associated with this package. The most important is that glmmTMB doesn't implement an option to create unconditional predictions from the model, which means that predicted values (in res ~ pred) plots include the random effects. With strong random effects, this can sometimes create diagonal patterns from bottom left to top right in the res ~ pred plot. All other tests and plots should work as desired. Please see https://github.com/florianhartig/DHARMa/issues/16 for further details.

plot(obs.betweenness.0inflation.nbinom2.resid)

testDispersion(obs.betweenness.0inflation.nbinom2.resid)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 0.97704, p-value = 0.72
## alternative hypothesis: two.sided

testZeroInflation(obs.betweenness.0inflation.nbinom2.resid)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0.99828, p-value = 1
## alternative hypothesis: two.sided

#not zero inflated. BUT QQ plot residuals look awful

#extract estimate for treatment
obs.betweenness.treatment.coef<-summary(obs.betweenness.0inflation.nbinom2)$coefficients$cond[2,1]

Clustering Coefficient

obs.cc <- glmmTMB(am~Treatment+(1|Condo/ID), data=ObsIndividSN, na.action=na.omit)
summary(obs.cc)

##  Family: gaussian  ( identity )
## Formula:          am ~ Treatment + (1 | Condo/ID)
## Data: ObsIndividSN
## 
##      AIC      BIC   logLik deviance df.resid 
##  -1035.0  -1011.7    522.5  -1045.0      778 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance  Std.Dev. 
##  ID:Condo (Intercept) 5.106e-11 7.146e-06
##  Condo    (Intercept) 4.412e-04 2.101e-02
##  Residual             1.516e-02 1.231e-01
## Number of obs: 783, groups:  ID:Condo, 399; Condo, 12
## 
## Dispersion estimate for gaussian family (sigma^2): 0.0152 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 0.475218   0.007494   63.41   <2e-16 ***
## Treatment1  0.003732   0.004401    0.85    0.396    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Anova(obs.cc, type=3)

## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: am
##                 Chisq Df Pr(>Chisq)    
## (Intercept) 4020.8626  1     <2e-16 ***
## Treatment      0.7191  1     0.3964    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(allEffects(obs.cc))

check.assumptions(obs.cc)

#error: boundary (singular) fit. WHY? I removed individuals that did not have a clustering coefficient metric. 

#specify 0inflation with glmmTMB
obs.cc.0inflation<-glmmTMB(am~Treatment+(1|Condo/ID), data=ObsIndividSN, na.action=na.omit, ziformula=~1)
summary(obs.cc.0inflation)

##  Family: gaussian  ( identity )
## Formula:          am ~ Treatment + (1 | Condo/ID)
## Zero inflation:      ~1
## Data: ObsIndividSN
## 
##      AIC      BIC   logLik deviance df.resid 
##  -1044.3  -1016.3    528.1  -1056.3      777 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance  Std.Dev. 
##  ID:Condo (Intercept) 4.198e-11 6.479e-06
##  Condo    (Intercept) 3.482e-04 1.866e-02
##  Residual             1.361e-02 1.167e-01
## Number of obs: 783, groups:  ID:Condo, 399; Condo, 12
## 
## Dispersion estimate for gaussian family (sigma^2): 0.0136 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 0.478799   0.006851   69.89   <2e-16 ***
## Treatment1  0.002362   0.004225    0.56    0.576    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Zero-inflation model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -4.8521     0.4427  -10.96   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Anova(obs.cc.0inflation, type=3)

## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: am
##                 Chisq Df Pr(>Chisq)    
## (Intercept) 4884.2268  1     <2e-16 ***
## Treatment      0.3124  1     0.5762    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(allEffects(obs.cc.0inflation))

check.assumptions(obs.cc.0inflation)

#test for zero inflation and overdispersion with DHARMa
obs.cc.0inflation.resid<-simulateResiduals(obs.cc.0inflation, n=250, integerResponse=FALSE)

## It seems you are diagnosing a glmmTMB model. There are still a few minor limitations associated with this package. The most important is that glmmTMB doesn't implement an option to create unconditional predictions from the model, which means that predicted values (in res ~ pred) plots include the random effects. With strong random effects, this can sometimes create diagonal patterns from bottom left to top right in the res ~ pred plot. All other tests and plots should work as desired. Please see https://github.com/florianhartig/DHARMa/issues/16 for further details.

## Model family was recognized or set as continuous, but duplicate values were detected in the response. Consider if you are fitting an appropriate model.

## Model family was recognized or set as continuous, but duplicate values were detected in the simulation - changing to integer residuals (see ?simulateResiduals for details)

plot(obs.cc.0inflation.resid)

testDispersion(obs.cc.0inflation.resid)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 0.97802, p-value = 0.52
## alternative hypothesis: two.sided

testZeroInflation(obs.cc.0inflation.resid)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 1.1513, p-value = 0.864
## alternative hypothesis: two.sided

#not underdispersed nor zero inflated! QQ plot is a bit weird
#when I create residuals, I get the message: Model family was recognized or set as continuous, but duplicate values were detected in the response. Consider if you are fitting an appropriate model. - changing to integer residuals. WHAT IS THIS? It is changing the integerResponse to TRUE. But this is weird given that am is not an integer.

#extract estimate for treatment
obs.cc.treatment.coef<-summary(obs.cc.0inflation)$coefficients$cond[2,1]

Permuted SN

Clean Up Pemuted SN Datasets

Separate out columns to create Condo, Period, ID columns

#create Condo column
PermIndividSN<-lapply(PermIndividSN, function(x) {mutate(x, Condo=as.factor(substr(Condo_Period, 1, 2)))})

#create Period column
PermIndividSN<-lapply(PermIndividSN, function(x) {mutate(x, Period=as.factor(substr(Condo_Period, 4, 4)))})

#create ID column
PermIndividSN<-lapply(PermIndividSN, function(x) {mutate(x, ID=as.factor(substr(ids, 1, 3)))})

Treatment

Assign each Condo in each Period to a Dispersed or Clumped Treatment

PermIndividSN<-lapply(PermIndividSN, function(x) {mutate(x, Treatment=as.factor(ifelse(Condo_Period %in% Dispersed, "Dispersed", "Clumped")))})

Remove beetles that died or moved (3MZ/3XY) from the analyses

These beetles are included in the permutations. For period 1, all beetles are included in the permutation. For period 2, all beetles alive at the start of period 2 are included in the permutation. This means that these beetles are also included in the permtued social networks.

#remove dead beetles
PermIndividSN<-lapply(PermIndividSN, function(x) {filter(x, ID %sans% DeadBeetles)})

#remove 3MZ
PermIndividSN<-lapply(PermIndividSN, function(x) {filter(x, ID != "3MZ")})

#remove 3XY from 1B
PermIndividSN<-lapply(PermIndividSN, function(x) {filter(x, ids != "3XY_1B")})

Run Parallel Processing

For now, parallel process with multiple cores on local computer. In future, figure out how to send this script to Rivana.

#detect number of cores
detectCores()

## [1] 8

#set number of cores on computer to 3, that way have a 4th core free to use computer with
registerDoParallel(cores=3)

Strength Model

#write a function that runs the model
perm.strength = function(PermIndividSN){
  perm.strength.mod<-glmmTMB(alpha~Treatment+(1|Condo/ID), data=PermIndividSN, ziformula=~1)
  return(perm.strength.mod)
}

#run the model on each permutation
perm.strength.results<-lapply(PermIndividSN, FUN=perm.strength)

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

#error: Model convergence problem; non-positive-definite Hessian matrix.

Strength Model Estimates

#write a function that extracts the type 3 sum of squares (written by VAF)
AnovaT3=function(x){
  setTimeLimit(cpu = Inf,  transient = FALSE)
  return(tryCatch(Anova(x, type=3), error=function(e) NULL))
}

#write a function that extracts the model summary (written by VAF)
Model.Summary=function(x){
  setTimeLimit(cpu = Inf,  transient = FALSE)
  return(tryCatch(summary(x), error=function(e) NULL))
}

#extract type 3 sum of squares for each permutation
perm.strength.Anova<-lapply(perm.strength.results, FUN=AnovaT3)

#extract model summary for each permutation
perm.strength.summary<-lapply(perm.strength.results, FUN=Model.Summary)

#extract model estimate for treatment from model summary
perm.strength.treatment.coef<-foreach(i = 1:length(perm.strength.summary)) %dopar% {
  perm.strength.summary[[i]][["coefficients"]][["cond"]][[2]]
}

#flatten the list of permutations into a dataframe with one column that contains all the estimates from every permutation
perm.strength.treatment.coef.df<-do.call(rbind, perm.strength.treatment.coef)

Strength P-Values

#use CWW pval function to calculate pvalues
strength.pvals<-pvals(obs.strength.treatment.coef, perm.strength.treatment.coef.df)

Visualize Permuted Strength Estimates

#clean up dataframe to plot
perm.strength.treatment.coef.df<-as.data.frame(perm.strength.treatment.coef.df)
perm.strength.treatment.coef.df %>%
  dplyr::rename(PermutedEstimate = V1) -> perm.strength.treatment.coef.df

#set ggplot theme
theme_set(theme_classic(base_size=24)) 

#ggplot
#something is not quite working here
ggplot(perm.strength.treatment.coef.df, aes(x=PermutedEstimate)) +
  geom_histogram() +
  geom_vline(xintercept = obs.strength.treatment.coef, color = "red")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Betweenness Model

#write a function that runs the model
perm.betweenness = function(PermIndividSN){
  perm.betweenness.mod<-glmmTMB(betweenness~Treatment+(1|Condo/ID), data=PermIndividSN, family=nbinom2, ziformula=~1)
  return(perm.betweenness.mod)
}

#run the model on each permutation
perm.betweenness.results<-lapply(PermIndividSN, FUN=perm.betweenness)

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; extreme or very small
## eigen values detected. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

#error: Model convergence problem; non-positive-definite Hessian matrix

Betweenness Model Estimates

#write a function that extracts the type 3 sum of squares (written by VAF)
AnovaT3=function(x){
  setTimeLimit(cpu = Inf,  transient = FALSE)
  return(tryCatch(Anova(x, type=3), error=function(e) NULL))
}

#write a function that extracts the model summary (written by VAF)
Model.Summary=function(x){
  setTimeLimit(cpu = Inf,  transient = FALSE)
  return(tryCatch(summary(x), error=function(e) NULL))
}

#extract type 3 sum of squares for each permutation
perm.betweenness.Anova<-lapply(perm.betweenness.results, FUN=AnovaT3)

#extract model summary for each permutation
perm.betweenness.summary<-lapply(perm.betweenness.results, FUN=Model.Summary)

#extract model estimate for treatment from model summary
perm.betweenness.treatment.coef<-foreach(i = 1:length(perm.betweenness.summary)) %dopar% {
  perm.betweenness.summary[[i]][["coefficients"]][["cond"]][[2]]
}

#flatten the list of permutations into a dataframe with one column that contains all the estimates from every permutation
perm.betweenness.treatment.coef.df<-do.call(rbind, perm.betweenness.treatment.coef)

Betweenness P-Values

#use CWW pval function to calculate pvalues
betweenness.pvals<-pvals(obs.betweenness.treatment.coef, perm.betweenness.treatment.coef.df)

Visualize Permuted Betweenness Estimates

#clean up dataframe to plot
perm.betweenness.treatment.coef.df<-as.data.frame(perm.betweenness.treatment.coef.df)
perm.betweenness.treatment.coef.df %>%
  dplyr::rename(PermutedEstimate = V1) -> perm.betweenness.treatment.coef.df

#set ggplot theme
theme_set(theme_classic(base_size=24)) 

#ggplot
#something is not quite working here
ggplot(perm.betweenness.treatment.coef.df, aes(x=PermutedEstimate)) +
  geom_histogram() +
  geom_vline(xintercept = obs.betweenness.treatment.coef, color = "red")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

CC Model

#write a function that runs the model
perm.cc = function(PermIndividSN){
  perm.cc.mod<-glmmTMB(am~Treatment+(1|Condo/ID), data=PermIndividSN, na.action=na.omit, ziformula=~1)
  return(perm.cc.mod)
}

#run the model on each permutation
perm.cc.results<-lapply(PermIndividSN, FUN=perm.cc)

## Warning in fitTMB(TMBStruc): Model convergence problem; extreme or very small
## eigen values detected. See vignette('troubleshooting')

## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')

#error: Model convergence problem; extreme of very small eigen values detected 
#error: Model convergence problem; non-positive-definite Hessian matrix

CC Model Estimates

#write a function that extracts the type 3 sum of squares (written by VAF)
AnovaT3=function(x){
  setTimeLimit(cpu = Inf,  transient = FALSE)
  return(tryCatch(Anova(x, type=3), error=function(e) NULL))
}

#write a function that extracts the model summary (written by VAF)
Model.Summary=function(x){
  setTimeLimit(cpu = Inf,  transient = FALSE)
  return(tryCatch(summary(x), error=function(e) NULL))
}

#extract type 3 sum of squares for each permutation
perm.cc.Anova<-lapply(perm.cc.results, FUN=AnovaT3)

#extract model summary for each permutation
perm.cc.summary<-lapply(perm.cc.results, FUN=Model.Summary)

#extract model estimate for treatment from model summary
perm.cc.treatment.coef<-foreach(i = 1:length(perm.cc.summary)) %dopar% {
  perm.cc.summary[[i]][["coefficients"]][["cond"]][[2]]
}

#flatten the list of permutations into a dataframe with one column that contains all the estimates from every permutation
perm.cc.treatment.coef.df<-do.call(rbind, perm.cc.treatment.coef)

CC P-Values

#use CWW pval function to calculate pvalues
cc.pvals<-pvals(obs.cc.treatment.coef, perm.cc.treatment.coef.df)

Visualize Permuted CC Estimates

#clean up dataframe to plot
perm.cc.treatment.coef.df<-as.data.frame(perm.cc.treatment.coef.df)
perm.cc.treatment.coef.df %>%
  dplyr::rename(PermutedEstimate = V1) -> perm.cc.treatment.coef.df

#set ggplot theme
theme_set(theme_classic(base_size=24)) 

#ggplot
#something is not quite working here
ggplot(perm.cc.treatment.coef.df, aes(x=PermutedEstimate)) +
  geom_histogram() +
  geom_vline(xintercept = obs.cc.treatment.coef, color = "red")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

HabitatEffectOnIndividSN2018PermutedAnalaysis

Robin Costello

20200402

Objectives

Outstanding Questions

Set-Up and Data

CWW Function to Calculate P-Values

Observed SN

Clean Up Observed SN Dataset

Separate out columns to create Condo, Period, ID columns

Treatment

Remove beetles that died or moved (3MZ/3XY) from the analyses

Strength

Betweenness

Clustering Coefficient

Permuted SN

Clean Up Pemuted SN Datasets

Separate out columns to create Condo, Period, ID columns

Treatment

Remove beetles that died or moved (3MZ/3XY) from the analyses

Run Parallel Processing

Strength Model

Strength Model Estimates

Strength P-Values

Visualize Permuted Strength Estimates

Betweenness Model

Betweenness Model Estimates

Betweenness P-Values

Visualize Permuted Betweenness Estimates

CC Model

CC Model Estimates

CC P-Values

Visualize Permuted CC Estimates