The statistical model:
\(y_t = \beta_0 + \beta_1 * (Elevation_s)_t + \beta_2 * Slope_t + (b_s)_t + \epsilon_t\)
Where:
Let’s define the parameters:
nstand = 5
nplot = 4
b0 = -1
b1 = .005
b2 = .1
sds = 2
sd = 1
Simulate other variables:
set.seed(16)
stand = rep(LETTERS[1:nstand], each = nplot)
standeff = rep( rnorm(nstand, 0, sds), each = nplot)
ploteff = rnorm(nstand*nplot, 0, sd)
Simulate elevation and slope:
elevation = rep( runif(nstand, 1000, 1500), each = nplot)
slope = runif(nstand*nplot, 2, 75)
Simulate response variable:
resp2 = b0 + b1*elevation + b2*slope + standeff + ploteff
resp2
## [1] 11.671585 12.325584 13.316671 12.796432 11.868602 8.983889 12.370697
## [8] 8.596145 16.352939 12.693014 7.723378 10.365894 5.925563 2.718576
## [15] 3.999244 4.950275 11.571009 13.595712 13.588022 11.781083
Your tasks (complete each task in its’ own code chunk, make sure to use echo=TRUE so I can see your code):
library(lme4)
## Loading required package: Matrix
fit1 <- lmer(resp2 ~ 1 + elevation + slope + (1|stand))
summary(fit1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: resp2 ~ 1 + elevation + slope + (1 | stand)
##
## REML criterion at convergence: 82
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.65583 -0.62467 -0.01693 0.53669 1.41736
##
## Random effects:
## Groups Name Variance Std.Dev.
## stand (Intercept) 1.208 1.099
## Residual 1.358 1.165
## Number of obs: 20, groups: stand, 5
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -21.314628 6.602050 -3.228
## elevation 0.020600 0.004916 4.190
## slope 0.095105 0.016441 5.785
##
## Correlation of Fixed Effects:
## (Intr) elevtn
## elevation -0.991
## slope 0.049 -0.148
The estimated parameters are different from the defined intial parameters. The estimated intercept b0 (-21.31) is smaller then defined intercept b0 (-1), estimated elevation b1 (0.02) is higher then defined elevation b1 (0.005) and the estimated slope b2 (0.095) is slighty less then defined slope b2 (0.1).
mix_fun = function(nstand = 5, nplot = 4, b0 = -1, b1 = 0.005, b2 = 0.1, sds = 2, sd = 1) {
stand = rep(LETTERS[1:nstand], each = nplot)
standeff = rep(rnorm(nstand, 0, sds), each = nplot)
ploteff = rnorm(nstand * nplot, 0, sd)
elevation = rep(runif(nstand, 1000, 1500), each = nplot)
slope = runif(nstand * nplot, 2, 75)
resp2 = b0 + b1 * elevation + b2 * slope + standeff + ploteff
dat = data.frame(resp2, elevation, slope, stand)
lmer(resp2 ~ elevation + slope + (1|stand), data = dat)
}
mix_fun()
## Linear mixed model fit by REML ['lmerMod']
## Formula: resp2 ~ elevation + slope + (1 | stand)
## Data: dat
## REML criterion at convergence: 80.9781
## Random effects:
## Groups Name Std.Dev.
## stand (Intercept) 2.3573
## Residual 0.9754
## Number of obs: 20, groups: stand, 5
## Fixed Effects:
## (Intercept) elevation slope
## 10.584601 -0.005464 0.086839
#run 1000 simulations of the above model
sims1000 <- replicate(n = 1000, expr = mix_fun())
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
#check the number of iterations
length(sims1000)
## [1] 1000
library(broom)
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0 v purrr 0.3.3
## v tibble 3.0.0 v dplyr 0.8.5
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts -------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x tidyr::unpack() masks Matrix::unpack()
variances <- sims1000 %>% map_dfr(tidy, effects = "ran_pars", scales = "vcov")
head(variances,6)
## # A tibble: 6 x 3
## term group estimate
## <chr> <chr> <dbl>
## 1 var_(Intercept).stand stand 2.61
## 2 var_Observation.Residual Residual 1.11
## 3 var_(Intercept).stand stand 9.73
## 4 var_Observation.Residual Residual 1.36
## 5 var_(Intercept).stand stand 0.827
## 6 var_Observation.Residual Residual 0.914
library(ggplot2)
library(furrr)
## Loading required package: future
library(dplyr)
library(tidyverse)
plan(multiprocess)
smis3groups <- c(5, 25, 200) %>%
set_names(c("sample_size = 5","sample_size = 25", "sample_size = 200" )) %>%
map(~replicate(1000, mix_fun(nstand = .x) ) )
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
## singular fit
#extract variances for each of the 3 sample sizes
variances3groups <- smis3groups %>%
modify_depth(2, ~tidy(.x, effects = "ran_pars", scales = "vcov") ) %>%
map_dfr(bind_rows, .id = "stand_num") %>%
filter(group == "stand")
#plot variances for each of the 3 sample sizes
ggplot(variances3groups, aes(x = estimate) ) +
geom_density(fill = "red", alpha = .25) +
facet_wrap(~stand_num) +
geom_vline(xintercept = 4)
The sample size and Variance are directly proportional and as the sample size increases, the variance increases.
library(tidyverse)
library(broom)
library(dplyr)
library(ggplot2)
simul_estimates <- sims1000 %>%
map(tidy, effects = "fixed") %>%
bind_rows()
simul_estimates
## # A tibble: 3,000 x 4
## term estimate std.error statistic
## <chr> <dbl> <dbl> <dbl>
## 1 (Intercept) -15.9 5.63 -2.83
## 2 elevation 0.0152 0.00426 3.57
## 3 slope 0.121 0.0157 7.73
## 4 (Intercept) -13.9 13.3 -1.05
## 5 elevation 0.0156 0.0117 1.33
## 6 slope 0.124 0.0166 7.48
## 7 (Intercept) -12.3 4.45 -2.75
## 8 elevation 0.0130 0.00336 3.87
## 9 slope 0.0937 0.0100 9.32
## 10 (Intercept) -11.9 21.4 -0.556
## # ... with 2,990 more rows
simul_estimates %>%
dplyr::filter(term %in% c("elevation", "slope")) %>%
group_by(term) %>%
mutate(x = 1 : 1000) %>%
ungroup() %>%
mutate(real_value = ifelse(term == "elevation", 0.005, 0.1)) %>%
ggplot(aes(x = x, y = estimate)) +
geom_line() +
facet_wrap(~term) +
geom_hline(aes(yintercept = real_value, color = term), linetype = 4, size = 0.5) +
theme_bw()
The output shows the estminated coefficients from simulation model fluctuate.