The statistical model:

\(y_t = \beta_0 + \beta_1 * (Elevation_s)_t + \beta_2 * Slope_t + (b_s)_t + \epsilon_t\)

Where:

Let’s define the parameters:

nstand = 5
nplot = 4
b0 = -1
b1 = .005
b2 = .1
sds = 2
sd = 1

Simulate other variables:

set.seed(16)
stand = rep(LETTERS[1:nstand], each = nplot)
standeff = rep( rnorm(nstand, 0, sds), each = nplot)
ploteff = rnorm(nstand*nplot, 0, sd)

Simulate elevation and slope:

elevation = rep( runif(nstand, 1000, 1500), each = nplot)
slope = runif(nstand*nplot, 2, 75)

Simulate response variable:

resp2 = b0 + b1*elevation + b2*slope + standeff + ploteff 

Your tasks (complete each task in its’ own code chunk, make sure to use echo=TRUE so I can see your code):

  1. Fit a linear mixed model with the response variable as a function of elevation and slope with stand as a random effect. Are the estimated parameters similar to the intial parameters as we defined them?

b0 = -1b1 = 0.005b2 = 0.1

b1 = 0.005, which is much smaller than the initial parameter as we defined; b2 = 0.1, which is close to the initial parameter as we defined;

library(lme4)
## Loading required package: Matrix
fit1 = lmer(resp2 ~ 1 + elevation + slope+ (1|stand))
cat("b0 = ", b0, sep = "")
## b0 = -1
cat("b1 = ", b1, sep = "")
## b1 = 0.005
cat("b2 = ", b2, sep = "")
## b2 = 0.1
  1. Create a function for your model and run 1000 simulations of that model.
twolevel_fun<-function(nstand = 5, nplot = 4, b0 = -1, b1 = 0.005, b2 = 0.1, sds = 2, sd = 1) {
     standeff = rep( rnorm(nstand, 0, sds), each = nplot)
     stand = rep(LETTERS[1:nstand], each = nplot)
     ploteff = rnorm(nstand*nplot, 0, sd)
     elevation = rep( runif(nstand, 1000, 1500), each = nplot)
     slope = runif(nstand*nplot, 2, 75)
     resp2 = b0 + b1*elevation + b2*slope + standeff + ploteff
     dat = data.frame(resp2, elevation, slope, stand)
     lmer(resp2 ~ 1 + elevation + slope+ (1|stand))
}

sims <-replicate(1000, twolevel_fun() )
sims[[1000]]
## Linear mixed model fit by REML ['lmerMod']
## Formula: resp2 ~ 1 + elevation + slope + (1 | stand)
## REML criterion at convergence: 83.9657
## Random effects:
##  Groups   Name        Std.Dev.
##  stand    (Intercept) 2.628   
##  Residual             1.053   
## Number of obs: 20, groups:  stand, 5
## Fixed Effects:
## (Intercept)    elevation        slope  
##   3.0727412    0.0001542    0.1027728
  1. Extract the stand and residual variances from this simulation run. Print the first 6 rows of the data.
library(broom)
tidy(fit1)
## Warning in bind_rows_(x, .id): binding factor and character vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## # A tibble: 5 x 5
##   term                    estimate std.error statistic group   
##   <chr>                      <dbl>     <dbl>     <dbl> <chr>   
## 1 (Intercept)             -21.3      6.60        -3.23 fixed   
## 2 elevation                 0.0206   0.00492      4.19 fixed   
## 3 slope                     0.0951   0.0164       5.78 fixed   
## 4 sd_(Intercept).stand      1.10    NA           NA    stand   
## 5 sd_Observation.Residual   1.17    NA           NA    Residual
tidy(fit1, effects = "fixed")
## # A tibble: 3 x 4
##   term        estimate std.error statistic
##   <chr>          <dbl>     <dbl>     <dbl>
## 1 (Intercept) -21.3      6.60        -3.23
## 2 elevation     0.0206   0.00492      4.19
## 3 slope         0.0951   0.0164       5.78
tidy(fit1, effects = "ran_pars", scales = "vcov")
## # A tibble: 2 x 3
##   term                     group    estimate
##   <chr>                    <chr>       <dbl>
## 1 var_(Intercept).stand    stand        1.21
## 2 var_Observation.Residual Residual     1.36
library(purrr) # v. 0.2.4
suppressPackageStartupMessages( library(dplyr) ) # v. 0.7.4
library(ggplot2) # v. 2.2.1
stand_sims = c(5, 20, 100) %>%
     set_names() %>%
     map(~replicate(1000, twolevel_fun(nstand = .x) ) )
stand_vars = stand_sims %>%
     modify_depth(2, ~tidy(.x, effects = "ran_pars", scales = "vcov") ) %>%
     map_dfr(bind_rows, .id = "stand_num") %>%
     filter(group == "stand")
head(stand_vars)
## # A tibble: 6 x 4
##   stand_num term                  group estimate
##   <chr>     <chr>                 <chr>    <dbl>
## 1 5         var_(Intercept).stand stand    1.92 
## 2 5         var_(Intercept).stand stand   12.0  
## 3 5         var_(Intercept).stand stand    6.84 
## 4 5         var_(Intercept).stand stand    2.89 
## 5 5         var_(Intercept).stand stand    2.52 
## 6 5         var_(Intercept).stand stand    0.966
  1. Choose three different sample sizes (your choice) and run 1000 model simulations with each sample size. Create 3 visualizations that compare distributions of the variances for each of the 3 sample sizes. Make sure that the axes are labelled correctly. What do these graphs say about the relationship between sample size and variance?

As we can see the real variance of stand is 4. The estimates are more accurate when the sample is increasing.

library(purrr) # v. 0.2.4
suppressPackageStartupMessages( library(dplyr) ) # v. 0.7.4
library(ggplot2) # v. 2.2.1
stand_sims = c(5, 20, 100) %>%
     set_names(c("size_5", "size_20", "size_100")) %>%
     map(~replicate(1000, twolevel_fun(nstand = .x) ) )

stand_vars = stand_sims %>%
     modify_depth(2, ~tidy(.x, effects = "ran_pars", scales = "vcov") ) %>%
     map_dfr(bind_rows, .id = "stand_num") %>%
     filter(group == "stand")
head(stand_vars)
## # A tibble: 6 x 4
##   stand_num term                  group estimate
##   <chr>     <chr>                 <chr>    <dbl>
## 1 size_5    var_(Intercept).stand stand     7.09
## 2 size_5    var_(Intercept).stand stand     2.53
## 3 size_5    var_(Intercept).stand stand    10.3 
## 4 size_5    var_(Intercept).stand stand     1.62
## 5 size_5    var_(Intercept).stand stand     9.98
## 6 size_5    var_(Intercept).stand stand     3.19
ggplot(stand_vars, aes(x = estimate) ) +
     geom_density(fill = "blue", alpha = .25) +
     facet_wrap(~stand_num) +
     geom_vline(xintercept = 4)

stand_vars = mutate(stand_vars, stand_num = forcats::fct_inorder(stand_num) )
groupmed = stand_vars %>%
     group_by(stand_num) %>%
     summarise(mvar = median(estimate) )
ggplot(stand_vars, aes(x = estimate) ) + 
     geom_density(fill = "blue", alpha = .25) +
     facet_wrap(~stand_num) +
     geom_vline(aes(xintercept = 4, linetype = "True variance"), size = .5 ) +
     geom_vline(data = groupmed, aes(xintercept = mvar, linetype = "Median variance"),
                size = .5) +
     theme_bw() +
     scale_linetype_manual(name = "", values = c(2, 1) ) +
     theme(legend.position = "bottom",
           legend.key.width = unit(.1, "cm") ) +
     labs(x = "Estimated Variance", y = NULL)

stand_vars %>%
     group_by(stand_num) %>%
     summarise(mean(estimate < 4) )
## # A tibble: 3 x 2
##   stand_num `mean(estimate < 4)`
##   <fct>                    <dbl>
## 1 size_5                   0.604
## 2 size_20                  0.559
## 3 size_100                 0.54
  1. Plot the coefficients of the estimates of elevation and slope. Hint: the x-axis should have 1000 values. Discuss the graphs.

The estimates fluctuate aroung the real value.

library(furrr)
## Loading required package: future
## Warning: package 'future' was built under R version 3.5.2
simul_estimates <- sims %>% 
  future_map(tidy, effects = "fixed") %>% 
  bind_rows()
simul_estimates
## # A tibble: 3,000 x 4
##    term         estimate std.error statistic
##    <chr>           <dbl>     <dbl>     <dbl>
##  1 (Intercept)  10.6      10.3         1.03 
##  2 elevation    -0.00546   0.00927    -0.589
##  3 slope         0.0868    0.0140      6.21 
##  4 (Intercept) -15.9       5.63       -2.83 
##  5 elevation     0.0152    0.00426     3.57 
##  6 slope         0.121     0.0157      7.73 
##  7 (Intercept) -13.9      13.3        -1.05 
##  8 elevation     0.0156    0.0117      1.33 
##  9 slope         0.124     0.0166      7.48 
## 10 (Intercept) -12.3       4.45       -2.75 
## # ... with 2,990 more rows
simul_estimates %>% 
  dplyr::filter(term %in% c("elevation", "slope")) %>% 
  group_by(term) %>% 
  mutate(x = 1:1000) %>%
  ungroup() %>% 
  mutate(real_value = ifelse(term == "elevation", 0.005, 0.1)) %>% 
  ggplot(aes(x = x, y = estimate)) +
  geom_line() +
  facet_wrap(~ term) +
  geom_hline(aes(yintercept = real_value, color = term), linetype = 2, size = 0.8) 

  1. Submit a link to this document in R Pubs to your Moodle.