The statistical model:
\(y_t = \beta_0 + \beta_1 * (Elevation_s)_t + \beta_2 * Slope_t + (b_s)_t + \epsilon_t\)
Where:
Let’s define the parameters:
nstand = 5
nplot = 4
b0 = -1
b1 = .005
b2 = .1
sds = 2
sd = 1
Simulate other variables:
set.seed(16)
stand = rep(LETTERS[1:nstand], each = nplot)
standeff = rep( rnorm(nstand, 0, sds), each = nplot)
ploteff = rnorm(nstand*nplot, 0, sd)
Simulate elevation and slope:
elevation = rep( runif(nstand, 1000, 1500), each = nplot)
slope = runif(nstand*nplot, 2, 75)
Simulate response variable:
resp2 = b0 + b1*elevation + b2*slope + standeff + ploteff
Your tasks (complete each task in its’ own code chunk, make sure to use echo=TRUE so I can see your code):
resp2 <- b0 + b1*elevation + b2*slope + standeff + ploteff
library(Matrix)
library(lme4)
fit2 <- lmer(resp2 ~ elevation + slope + (1|stand))
fit2
## Linear mixed model fit by REML ['lmerMod']
## Formula: resp2 ~ elevation + slope + (1 | stand)
## REML criterion at convergence: 81.9874
## Random effects:
## Groups Name Std.Dev.
## stand (Intercept) 1.099
## Residual 1.165
## Number of obs: 20, groups: stand, 5
## Fixed Effects:
## (Intercept) elevation slope
## -21.31463 0.02060 0.09511
summary(fit2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: resp2 ~ elevation + slope + (1 | stand)
##
## REML criterion at convergence: 82
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.65583 -0.62467 -0.01693 0.53669 1.41736
##
## Random effects:
## Groups Name Variance Std.Dev.
## stand (Intercept) 1.208 1.099
## Residual 1.358 1.165
## Number of obs: 20, groups: stand, 5
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -21.314628 6.602053 -3.228
## elevation 0.020600 0.004916 4.190
## slope 0.095105 0.016441 5.785
##
## Correlation of Fixed Effects:
## (Intr) elevtn
## elevation -0.991
## slope 0.049 -0.148
set.seed(16)
mixed_fun = function (nstand = 5, nplot = 4, b0 = -1, b1 = 0.005, b2 = 0.1, sds = 2, sd = 1) {
stand = rep(LETTERS[1:nstand], each = nplot)
elevation = rep( runif(nstand, 1000, 1500), each = nplot)
slope = runif(nstand*nplot, 2, 75)
standeff = rep( rnorm(nstand, 0, sds), each = nplot)
ploteff = rnorm(nstand*nplot, 0, sd)
resp = b0 + b1*elevation + b2*slope + standeff + ploteff
dat <- data.frame(elevation, slope, stand, resp)
lmer(resp ~ elevation + slope + (1|stand), data = dat)
}
set.seed(16)
simsmix = replicate (1000, mixed_fun(), simplify = FALSE)
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
library(purrr)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:Matrix':
##
## expand
library(broom)
tidy(fit2, effects = "ran_pars", scales = "vcov")
## # A tibble: 2 x 3
## term group estimate
## <chr> <chr> <dbl>
## 1 var_(Intercept).stand stand 1.21
## 2 var_Observation.Residual Residual 1.36
library("broom")
head(map_dfr(simsmix, broom::tidy, .id = "model", effects = "ran_pars"))
## # A tibble: 6 x 4
## model term group estimate
## <chr> <chr> <chr> <dbl>
## 1 1 sd_(Intercept).stand stand 1.37
## 2 1 sd_Observation.Residual Residual 0.864
## 3 2 sd_(Intercept).stand stand 2.03
## 4 2 sd_Observation.Residual Residual 1.00
## 5 3 sd_(Intercept).stand stand 1.51
## 6 3 sd_Observation.Residual Residual 0.945
The graphs below showcase a close relationship between the median of the estimated variance and the true variance. As the sample size increases, the median gets closer to the true value. Also, the distribution of the estimated variance becomes to look like a bell-shaped normal distribution when the sample size gets bigger.
This relationship is significant when the sample size increases from 5 to 20, which, however, is not so obvious when the sample size increases from 20 to 100.
Therefore, the threshold of the relationship between the median of the estimated variance and the true variance is 20. When the sample size exceeds this value, the increase in sample size will not improve the accuracy of the variance estimate.
stand_sims <- c(5, 20, 100) %>% set_names() %>% map(~replicate (1000, mixed_fun(nstand = .x)))
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
stand_vars = stand_sims %>% modify_depth(2, ~tidy(.x, effects = "ran_pars", scales = "vcov")) %>% map_dfr(bind_rows, .id = "stand_number") %>% filter (group == "stand")
head(stand_vars)
## # A tibble: 6 x 4
## stand_number term group estimate
## <chr> <chr> <chr> <dbl>
## 1 5 var_(Intercept).stand stand 5.20
## 2 5 var_(Intercept).stand stand 2.46
## 3 5 var_(Intercept).stand stand 11.9
## 4 5 var_(Intercept).stand stand 1.78
## 5 5 var_(Intercept).stand stand 3.62
## 6 5 var_(Intercept).stand stand 4.07
library(forcats)
stand_vars = mutate(stand_vars, stand_number = fct_inorder(stand_number))
add_prefix <- function(string) { paste("Number of Stands:", string, sep = " ")}
groupmed = stand_vars %>% group_by(stand_number) %>% summarise(mvar = median(estimate))
library(ggplot2)
ggplot(stand_vars, aes(x = estimate)) + geom_density(fill = "orange", alpha = 0.25) + facet_wrap(~stand_number, labeller = as_labeller(add_prefix)) + geom_vline(aes(xintercept = 4, linetype = "True Variance"), size = 0.5) + geom_vline(data = groupmed, aes(xintercept = mvar, linetype = "Median Variance"), size = 0.5) + theme_bw() + scale_linetype_manual(name = "", values = c(2,1)) + theme(legend.position = "bottom", legend.key.width = unit(.1, "cm")) + labs(x = "Estimated Variance", y = "Probability Density")
From the graph below, we know that the estimate centers around 0.005, which is the true value of the elevation coefficient. Except for a few outliers, most of the estimated values reside in the range of -0.025 and 0.04.
library("tidyr")
library("broom")
library("ggplot2")
mixeffect_df = map_dfr(simsmix, broom::tidy, .id = "model", terms = "elevation")
elevation_points = mixeffect_df[which(mixeffect_df$term == "elevation"),c(1,3)]
elevation_points$model <- as.numeric(elevation_points$model)
ggplot(data = elevation_points, aes(y = estimate, x = model)) + geom_line() + geom_hline(yintercept = 0.005, size = 1, color = "coral") + labs(y = "Estimated Values", x = "Simulation Run") + ggtitle("Elevation Coefficients Plot")
From the graph below, we know that the estimate centers around 0.1, which is the true value of the slope coefficient. Except for a few outliers, most of the estimated values reside in the range of 0.07 and 0.13.
slope_points = mixeffect_df[which(mixeffect_df$term == "slope"),c(1,3)]
slope_points$model <- as.numeric(slope_points$model)
ggplot(data = slope_points, aes(y = estimate, x = model)) + geom_line() + geom_hline(yintercept = 0.1, size = 1, color = "coral") + labs(y = "Estimated Values", x = "Simulation Run") + ggtitle("Slope Coefficients Plot")