The statistical model:

\(y_t = \beta_0 + \beta_1 * (Elevation_s)_t + \beta_2 * Slope_t + (b_s)_t + \epsilon_t\)

Where:

Let’s define the parameters:

nstand = 5
nplot = 4
b0 = -1
b1 = .005
b2 = .1
sds = 2
sd = 1

Simulate other variables:

set.seed(16)
stand = rep(LETTERS[1:nstand], each = nplot)
standeff = rep( rnorm(nstand, 0, sds), each = nplot)
ploteff = rnorm(nstand*nplot, 0, sd)

Simulate elevation and slope:

elevation = rep( runif(nstand, 1000, 1500), each = nplot)
slope = runif(nstand*nplot, 2, 75)

Simulate response variable:

resp2 = b0 + b1*elevation + b2*slope + standeff + ploteff 

Your tasks (complete each task in its’ own code chunk, make sure to use echo=TRUE so I can see your code):

library(lme4)
## Loading required package: Matrix
library(lmerTest)
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
library(Matrix)
library(purrr)
library(broom)
library(broom.mixed)
## Registered S3 methods overwritten by 'broom.mixed':
##   method         from 
##   augment.lme    broom
##   augment.merMod broom
##   glance.lme     broom
##   glance.merMod  broom
##   glance.stanreg broom
##   tidy.brmsfit   broom
##   tidy.gamlss    broom
##   tidy.lme       broom
##   tidy.merMod    broom
##   tidy.rjags     broom
##   tidy.stanfit   broom
##   tidy.stanreg   broom
## 
## Attaching package: 'broom.mixed'
## The following object is masked from 'package:broom':
## 
##     tidyMCMC
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v tibble  2.1.3     v readr   1.3.1
## v tidyr   0.8.3     v stringr 1.4.0
## v tibble  2.1.3     v forcats 0.4.0
## -- Conflicts -------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x tidyr::expand()         masks Matrix::expand()
## x dplyr::filter()         masks stats::filter()
## x dplyr::lag()            masks stats::lag()
## x broom.mixed::tidyMCMC() masks broom::tidyMCMC()
  1. Fit a linear mixed model with the response variable as a function of elevation and slope with stand as a random effect. Are the estimated parameters similar to the intial parameters as we defined them?

The estimated parameter for the new linear fix model differ to the original parameters that were previously established

model <- lmer(resp2 ~ elevation + slope + (1|stand))
anova(model)
## Type III Analysis of Variance Table with Satterthwaite's method
##           Sum Sq Mean Sq NumDF   DenDF F value    Pr(>F)    
## elevation 23.840  23.840     1  3.1135  17.557   0.02303 *  
## slope     45.436  45.436     1 15.8680  33.461 2.884e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: resp2 ~ elevation + slope + (1 | stand)
## 
## REML criterion at convergence: 82
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.65583 -0.62467 -0.01693  0.53669  1.41736 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  stand    (Intercept) 1.208    1.099   
##  Residual             1.358    1.165   
## Number of obs: 20, groups:  stand, 5
## 
## Fixed effects:
##               Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept) -21.314628   6.602053   3.001313  -3.228   0.0482 *  
## elevation     0.020600   0.004916   3.113482   4.190   0.0230 *  
## slope         0.095105   0.016441  15.868032   5.785 2.88e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr) elevtn
## elevation -0.991       
## slope      0.049 -0.148
  1. Create a function for your model and run 1000 simulations of that model.
model_func = function(nstand = 5, nplot = 4, b0 = -1, b1 = 0.005, b2 = 0.1, sds = 2, sd = 1) {
  stand = rep(LETTERS[1:nstand], each = nplot)
  standeff = rep( rnorm(nstand, 0, sds), each = nplot)
  ploteff = rnorm(nstand*nplot, 0, sd)
  elevation = rep( runif(nstand, 1000, 1500), each = nplot)
  slope = runif(nstand*nplot, 2, 75)
  resp2 = b0 + b1*elevation + b2*slope + standeff + ploteff
  dat <- data.frame(elevation, slope, stand, resp2)
  model <- lmer(resp2 ~ elevation + slope + (1|stand), data = dat)
}

set.seed(16)
model_func()
summary(model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: resp2 ~ elevation + slope + (1 | stand)
## 
## REML criterion at convergence: 82
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.65583 -0.62467 -0.01693  0.53669  1.41736 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  stand    (Intercept) 1.208    1.099   
##  Residual             1.358    1.165   
## Number of obs: 20, groups:  stand, 5
## 
## Fixed effects:
##               Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept) -21.314628   6.602053   3.001313  -3.228   0.0482 *  
## elevation     0.020600   0.004916   3.113482   4.190   0.0230 *  
## slope         0.095105   0.016441  15.868032   5.785 2.88e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr) elevtn
## elevation -0.991       
## slope      0.049 -0.148
  1. Extract the stand and residual variances from this simulation run. Print the first 6 rows of the data.
model_sim <- rerun(1000, model_func())
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00350336
## (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0045514
## (tol = 0.002, component 1)
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00363544
## (tol = 0.002, component 1)
## boundary (singular) fit: see ?isSingular
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00439561
## (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00239513
## (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00219495
## (tol = 0.002, component 1)
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00322495
## (tol = 0.002, component 1)
## boundary (singular) fit: see ?isSingular
length(model_sim)
## [1] 1000
  1. Choose three different sample sizes (your choice) and run 1000 model simulations with each sample size. Create 3 visualizations that compare distributions of the variances for each of the 3 sample sizes. Make sure that the axes are labelled correctly. What do these graphs say about the relationship between sample size and variance?
stand_sims = c(10, 20, 30) %>%
  set_names(c("10 stands", "20 stands", "30 stands")) %>%
  map(~replicate(1000, model_func(nstand = .x)))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.350468
## (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00542153
## (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.010252
## (tol = 0.002, component 1)
## boundary (singular) fit: see ?isSingular
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00256339
## (tol = 0.002, component 1)
stand_vars = stand_sims %>%
  modify_depth(2, ~tidy(.x, effects = "ran_pars", scales = "vcov")) %>%
  map_dfr(bind_rows, .id = "id") %>%
  filter(group == "stand")
add_prefix <- function(string) {
  paste("Number of stands:", string, sep = " ")
}

groupmed = stand_vars %>%
  group_by(id) %>%
  summarise(mvar = median(estimate))
ggplot(stand_vars, aes(x = estimate)) +
  geom_density(fill = "blue", alpha = "0.25") +
  facet_wrap(~id, labeller = as_labeller(add_prefix)) +
  geom_vline(aes(xintercept = 4, linetype = "True Variance"), size=0.5) +
  geom_vline(data = groupmed, aes(xintercept=mvar,linetype="Median Variance"), size = 0.5) +
  theme_bw() +
  labs(x="Estimated Variance")

  1. Plot the coefficients of the estimates of elevation and slope. Hint: the x-axis should have 1000 values. Discuss the graphs.
est_sim <- model_sim %>% 
  map_dfr(tidy, effects = "fixed") %>% 
  bind_rows()


est_sim %>% 
  dplyr::filter(term %in% c("slope", "elevation")) %>% 
  group_by(term) %>% 
  mutate(x = 1 : 1000) %>% 
  ungroup() %>%
  mutate(real_value = ifelse(term == "elevation", 0.005, 0.1)) %>% 
  ggplot(aes(x = x, y = estimate)) +
  facet_wrap(~term) +
  geom_line() +
  theme_bw() +
  geom_hline(aes(yintercept = real_value), linetype = 2, size = 1) +
  labs(x="Estimates", y = "Mean Response")

  1. Submit a link to this document in R Pubs to your Moodle. This assignment is worth 25 points.