The statistical model:
\(y_t = \beta_0 + \beta_1 * (Elevation_s)_t + \beta_2 * Slope_t + (b_s)_t + \epsilon_t\)
Where:
Let’s define the parameters:
nstand = 5
nplot = 4
b0 = -1
b1 = .005
b2 = .1
sds = 2
sd = 1
Simulate other variables:
set.seed(16)
stand = rep(LETTERS[1:nstand], each = nplot)
standeff = rep( rnorm(nstand, 0, sds), each = nplot)
ploteff = rnorm(nstand*nplot, 0, sd)
Simulate elevation and slope:
elevation = rep( runif(nstand, 1000, 1500), each = nplot)
slope = runif(nstand*nplot, 2, 75)
Simulate response variable:
resp2 = b0 + b1*elevation + b2*slope + standeff + ploteff
Your tasks (complete each task in its’ own code chunk, make sure to use echo=TRUE so I can see your code):
# use this chunk to answer question 1
library(lme4)
## Loading required package: Matrix
fit1 <- lmer(resp2 ~ elevation + slope + (1|stand))
summary(fit1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: resp2 ~ elevation + slope + (1 | stand)
##
## REML criterion at convergence: 82
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.65583 -0.62467 -0.01693 0.53669 1.41736
##
## Random effects:
## Groups Name Variance Std.Dev.
## stand (Intercept) 1.208 1.099
## Residual 1.358 1.165
## Number of obs: 20, groups: stand, 5
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -21.314628 6.602053 -3.228
## elevation 0.020600 0.004916 4.190
## slope 0.095105 0.016441 5.785
##
## Correlation of Fixed Effects:
## (Intr) elevtn
## elevation -0.991
## slope 0.049 -0.148
#From the output, the estimated parameters are quite different from initial parameters defined. Estimated b0 is -21.31, estimated b1 is 0.02, and estiamted b2 is 0.95.
# use this chunk to answer question 2
mix_fun = function(nstand = 5, nplot = 4, b0 = -1, b1 = 0.005, b2 = 0.1, sds = 2, sd = 1) {
stand = rep(LETTERS[1:nstand], each = nplot)
standeff = rep(rnorm(nstand, 0, sds), each = nplot)
ploteff = rnorm(nstand * nplot, 0, sd)
elevation = rep(runif(nstand, 1000, 1500), each = nplot)
slope = runif(nstand * nplot, 2, 75)
resp2 = b0 + b1 * elevation + b2 * slope + standeff + ploteff
dat = data.frame(resp2, elevation, slope, stand)
lmer(resp2 ~ elevation + slope + (1|stand), data = dat)
}
mix_fun()
## Linear mixed model fit by REML ['lmerMod']
## Formula: resp2 ~ elevation + slope + (1 | stand)
## Data: dat
## REML criterion at convergence: 80.9781
## Random effects:
## Groups Name Std.Dev.
## stand (Intercept) 2.3573
## Residual 0.9754
## Number of obs: 20, groups: stand, 5
## Fixed Effects:
## (Intercept) elevation slope
## 10.584601 -0.005464 0.086839
# run 1000 simulations
sims1000 <- replicate(n = 1000, expr = mix_fun())
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
# check the number of iterations
length(sims1000)
## [1] 1000
# use this chunk to answer question 3
library(broom)
library(purrr)
library(furrr)
## Loading required package: future
library(tidyverse)
## -- Attaching packages --- tidyverse 1.3.0 --
## √ ggplot2 3.2.1 √ dplyr 0.8.4
## √ tibble 2.1.3 √ stringr 1.4.0
## √ tidyr 1.0.2 √ forcats 0.4.0
## √ readr 1.3.1
## -- Conflicts ------ tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x tidyr::unpack() masks Matrix::unpack()
variances <- sims1000 %>%
map_dfr(tidy, effects = "ran_pars", scales = "vcov")
variances %>% print(n = 6)
## # A tibble: 2,000 x 3
## term group estimate
## <chr> <chr> <dbl>
## 1 var_(Intercept).stand stand 2.61
## 2 var_Observation.Residual Residual 1.11
## 3 var_(Intercept).stand stand 9.73
## 4 var_Observation.Residual Residual 1.36
## 5 var_(Intercept).stand stand 0.827
## 6 var_Observation.Residual Residual 0.914
## # ... with 1,994 more rows
# use this chunk to answer question 4
library(ggplot2)
library(tidyverse)
smis3groups <- c(5, 25, 200) %>%
set_names(c("sample_size = 5","sample_size = 25", "sample_size = 200" )) %>%
map(~replicate(1000, mix_fun(nstand = .x) ) )
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
#extract variances for each of the 3 sample sizes
variances3groups <- smis3groups %>%
modify_depth(2, ~tidy(.x, effects = "ran_pars", scales = "vcov") ) %>%
map_dfr(bind_rows, .id = "stand_num") %>%
filter(group == "stand")
head(variances3groups)
## # A tibble: 6 x 4
## stand_num term group estimate
## <chr> <chr> <chr> <dbl>
## 1 sample_size = 5 var_(Intercept).stand stand 12.0
## 2 sample_size = 5 var_(Intercept).stand stand 6.84
## 3 sample_size = 5 var_(Intercept).stand stand 2.89
## 4 sample_size = 5 var_(Intercept).stand stand 2.52
## 5 sample_size = 5 var_(Intercept).stand stand 0.966
## 6 sample_size = 5 var_(Intercept).stand stand 2.91
#plot variances for each of the 3 sample sizes
ggplot(variances3groups, aes(x = estimate) ) +
geom_density(fill = "red", alpha = .25) +
facet_wrap(~stand_num) +
geom_vline(xintercept = 4)
#From the output, the sample size gets bigger, the peak is closer to the true variance 4. In addition, there is not that much difference between sample size of 25 and 200. In conclusion, sample size increases, the precision of estimating the true variances increases accordingly.
med_var3groups = variances3groups %>%
group_by(stand_num) %>%
summarise(mvar = median(estimate))
library(ggplot2)
ggplot(variances3groups, aes(x = estimate)) +
geom_density(fill = "orange", alpha = 0.25) +
facet_wrap(~stand_num) +
geom_vline(aes(xintercept = 4, linetype = " True Variance"), size = 0.5) +
geom_vline(data = med_var3groups, aes(xintercept = mvar, linetype = "Median Variance"),size = 0.5)
# use this chunk to answer question 5
library(furrr)
library(ggplot2)
coef3groups <- sims1000 %>%
future_map(tidy, effects = "fixed") %>%
bind_rows()
coef3groups
## # A tibble: 3,000 x 4
## term estimate std.error statistic
## <chr> <dbl> <dbl> <dbl>
## 1 (Intercept) -15.9 5.63 -2.83
## 2 elevation 0.0152 0.00426 3.57
## 3 slope 0.121 0.0157 7.73
## 4 (Intercept) -13.9 13.3 -1.05
## 5 elevation 0.0156 0.0117 1.33
## 6 slope 0.124 0.0166 7.48
## 7 (Intercept) -12.3 4.45 -2.75
## 8 elevation 0.0130 0.00336 3.87
## 9 slope 0.0937 0.0100 9.32
## 10 (Intercept) -11.9 21.4 -0.556
## # ... with 2,990 more rows
coef3groups %>%
dplyr::filter(term %in% c("elevation", "slope")) %>%
group_by(term) %>%
mutate(x = 1 : 1000) %>%
ungroup() %>%
mutate(real_value = ifelse(term == "elevation", 0.005, 0.1)) %>%
ggplot(aes(x = x, y = estimate)) +
geom_line() +
facet_wrap(~term) +
geom_hline(aes(yintercept = real_value, color = term), linetype = 4, size = 0.5) +
theme_bw()
#From the output, individual estimated coefficients from simulation models fluctuate a lot.The mean of estimated elevation and slope are the true values when there are repetitively running simulation models.