ANLY 505 - Simulated Data and Linear Mixed Models

The statistical model: \(y_t = \beta_0 + \beta_1 * (Elevation_s)_t + \beta_2 * Slope_t + (b_s)_t + \epsilon_t\)

Where: - \(\beta_0\) is the mean response when both Elevation and Slope are 0 - \(\beta_1\) is the change in mean response for a 1-unit change in elevation. Elevation is measured at the stand level, so all plots in a stand share a single value in elevation. - \(\beta_2\) is the change in mean response for a 1-unit change in slope. Slope is measured at the plot level, so every plot potentially has a unique value of slope.

Let’s define the parameters: - the intercept \((\beta_0)\) will be -1 - the coefficient for elevation \((\beta_1)\) will be set to 0.005 - the coefficient for slope \((\beta_2)\) will be set to 0.1

nstand = 5
nplot = 4
b0 = -1
b1 = .005
b2 = .1
sds = 2
sd = 1

Simulate other variables:

set.seed(16)
stand = rep(LETTERS[1:nstand], each = nplot)
standeff = rep( rnorm(nstand, 0, sds), each = nplot)
ploteff = rnorm(nstand*nplot, 0, sd)

Simulate elevation and slope:

elevation = rep( runif(nstand, 1000, 1500), each = nplot)
slope = runif(nstand*nplot, 2, 75)

Simulate response variable:

resp2 = b0 + b1*elevation + b2*slope + standeff + ploteff

Your tasks (complete each task in its’ own code chunk, make sure to use echo=TRUE so I can see your code):

Fit a linear mixed model with the response variable as a function of elevation and slope with stand as a random effect. Are the estimated parameters similar to the intial parameters as we defined them?

library(lme4)

## Loading required package: Matrix

library(lmerTest)

## 
## Attaching package: 'lmerTest'

## The following object is masked from 'package:lme4':
## 
##     lmer

## The following object is masked from 'package:stats':
## 
##     step

liner_model <- lmer(resp2 ~ 1 + elevation + slope + (1|stand))
summary_lm<-summary(liner_model)
summary_lm$coefficients

##                 Estimate  Std. Error        df   t value     Pr(>|t|)
## (Intercept) -21.31462843 6.602052944  3.001313 -3.228485 4.824164e-02
## elevation     0.02060019 0.004916391  3.113482  4.190104 2.303393e-02
## slope         0.09510535 0.016441240 15.868032  5.784560 2.884449e-05

df_coeff<-data.frame(summary_lm$coefficients)
df_coeff_compare<-data.frame(cbind(c(b0,b1,b2), df_coeff$Estimate))
colnames(df_coeff_compare)<-c("initial_para", "estimate_para")
df_coeff_compare$difference<-(df_coeff_compare$initial_para-df_coeff_compare$estimate_para)
df_coeff_compare

##   initial_para estimate_para   difference
## 1       -1.000  -21.31462843 20.314628426
## 2        0.005    0.02060019 -0.015600188
## 3        0.100    0.09510535  0.004894651

Create a function for your model and run 1000 simulations of that model.

linear_model_function<-function(nstand = 5, nplot = 4, b0 = -1, b1 = 0.005, b2 = 0.1, sds = 2, sd = 1) {
  stand <- rep(LETTERS[1:nstand], each = nplot)
  standeff <- rep(rnorm(nstand, 0, sds), each = nplot)
  ploteff <- rnorm(nstand * nplot, 0, sd)
  elevation <- rep(runif(nstand, 1000, 1500), each = nplot)
  slope <- runif(nstand * nplot, 2, 75)
  resp2 <- b0 + b1 * elevation + b2 * slope + standeff + ploteff
  df_lin_mix <- data.frame(resp2, elevation, slope, stand)
  lmer(resp2 ~ 1 + elevation + slope + (1|stand), data = df_lin_mix)
}
sim_1000<-replicate(n=1000, expr=linear_model_function())

## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular

Extract the stand and residual variances from this simulation run. Print the first 6 rows of the data.

library(purrr)
library(furrr)

## Loading required package: future

library(broom)
library(broom.mixed)

## Registered S3 methods overwritten by 'broom.mixed':
##   method         from 
##   augment.lme    broom
##   augment.merMod broom
##   glance.lme     broom
##   glance.merMod  broom
##   glance.stanreg broom
##   tidy.brmsfit   broom
##   tidy.gamlss    broom
##   tidy.lme       broom
##   tidy.merMod    broom
##   tidy.rjags     broom
##   tidy.stanfit   broom
##   tidy.stanreg   broom

## 
## Attaching package: 'broom.mixed'

## The following object is masked from 'package:broom':
## 
##     tidyMCMC

library(tidyverse)

## -- Attaching packages --------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --

## √ ggplot2 3.2.0     √ readr   1.3.1
## √ tibble  2.1.3     √ dplyr   0.8.3
## √ tidyr   0.8.3     √ stringr 1.4.0
## √ ggplot2 3.2.0     √ forcats 0.4.0

## -- Conflicts ------------------------------------------------------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x tidyr::expand()         masks Matrix::expand()
## x dplyr::filter()         masks stats::filter()
## x dplyr::lag()            masks stats::lag()
## x broom.mixed::tidyMCMC() masks broom::tidyMCMC()

extract_var<-sim_1000 %>% map_dfr(tidy, effects = "ran_pars", scales = "vcov")
extract_var%>%print(n = 6)

## # A tibble: 2,000 x 4
##   effect   group    term             estimate
##   <chr>    <chr>    <chr>               <dbl>
## 1 ran_pars stand    var__(Intercept)    5.56 
## 2 ran_pars Residual var__Observation    0.951
## 3 ran_pars stand    var__(Intercept)    2.61 
## 4 ran_pars Residual var__Observation    1.11 
## 5 ran_pars stand    var__(Intercept)    9.73 
## 6 ran_pars Residual var__Observation    1.36 
## # ... with 1,994 more rows

Choose three different sample sizes (your choice) and run 1000 model simulations with each sample size. Create 3 visualizations that compare distributions of the variances for each of the 3 sample sizes. Make sure that the axes are labelled correctly. What do these graphs say about the relationship between sample size and variance?

library(dplyr)
library(ggplot2)

simulation_3_sample = c(10, 50, 100) %>%
  set_names(c("sample size=10", "sample size=50", "sample size=100")) %>%
  map(~replicate(1000, linear_model_function(nstand = .x)))

## boundary (singular) fit: see ?isSingular

extract_var_3_sample<-simulation_3_sample %>%
  modify_depth(2, ~tidy(.x, effects = "ran_pars", scales = "vcov")) %>%
  map_dfr(bind_rows, .id = "id")

subset_var<-subset(extract_var_3_sample, group == "stand", select = colnames(extract_var_3_sample))

mean(subset_var$estimate)

## [1] 3.954695

median(subset_var$estimate)

## [1] 3.790024

ggplot(subset_var, aes(x = estimate)) +
  geom_density(fill = "navy", alpha = "0.5") +
  facet_wrap(group~id)+geom_vline(xintercept = c(mean(subset_var$estimate), median(subset_var$estimate)))+  labs(title = "Distributions of the variances for each of the 3 sample sizes")

Plot the coefficients of the estimates of elevation and slope. Hint: the x-axis should have 1000 values. Discuss the graphs.

sim_1000_coeff <- sim_1000 %>% 
  future_map(tidy, effects = "fixed") %>% 
  bind_rows()



df_graph<-sim_1000_coeff %>% 
  dplyr::filter(term %in% c("elevation", "slope")) %>% 
  group_by(term) %>% 
  mutate(range = 1 : 1000) %>%
  ungroup()

#compute mean slope and elevation
mean_slope<-mean(df_graph$estimate[df_graph$term=="slope"])
mean_elevation<-mean(df_graph$estimate[df_graph$term=="elevation"])


#add term mean estimate for slope and elevation
df_graph_revised<-df_graph%>% 
  mutate(term_mean_est = ifelse(term == "slope", mean_slope, mean_elevation))

ggplot(df_graph_revised, aes(x = range, y = estimate)) +geom_line() +facet_wrap(effect~term) +
  geom_hline(aes(yintercept = term_mean_est, color = term))+labs(title = "Plot of the coefficients of the estimates of elevation and slope")

ANLY 505 - Simulated Data and Linear Mixed Models

Assignment 2

Anvesh Raavi

2020-02-16