The statistical model:
\(y_t = \beta_0 + \beta_1 * (Elevation_s)_t + \beta_2 * Slope_t + (b_s)_t + \epsilon_t\)
Where:
Let’s define the parameters:
nstand = 5
nplot = 4
b0 = -1
b1 = .005
b2 = .1
sds = 2
sd = 1
Simulate other variables:
set.seed(16)
stand = rep(LETTERS[1:nstand], each = nplot)
standeff = rep( rnorm(nstand, 0, sds), each = nplot)
ploteff = rnorm(nstand*nplot, 0, sd)
Simulate elevation and slope:
elevation = rep( runif(nstand, 1000, 1500), each = nplot)
slope = runif(nstand*nplot, 2, 75)
Simulate response variable:
resp2 = b0 + b1*elevation + b2*slope + standeff + ploteff
Your tasks (complete each task in its’ own code chunk, make sure to use echo=TRUE so I can see your code):
# use this chunk to answer question 1
library(Matrix)
library(lme4)
fit2 <- lmer(resp2 ~ elevation + slope + (1|stand))
options(scipen=100,digits=4)
summary(fit2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: resp2 ~ elevation + slope + (1 | stand)
##
## REML criterion at convergence: 82
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6558 -0.6247 -0.0169 0.5367 1.4174
##
## Random effects:
## Groups Name Variance Std.Dev.
## stand (Intercept) 1.21 1.10
## Residual 1.36 1.17
## Number of obs: 20, groups: stand, 5
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -21.31463 6.60205 -3.23
## elevation 0.02060 0.00492 4.19
## slope 0.09511 0.01644 5.78
##
## Correlation of Fixed Effects:
## (Intr) elevtn
## elevation -0.991
## slope 0.049 -0.148
Estimated slope (0.09511) is close to the initial parameter 0.1, but estimated intercept (-21.31463) and estimated elevation (0.02060) is both not close to the initial parameters -1 and 0.005 respectively.
# use this chunk to answer question 2
linear_fit <- function(nstand = 5, nplot = 4, b0 = -1, b1 = 0.005, b2 = 0.1, sds = 2, sd = 1) {
stand <- rep(LETTERS[1:nstand], each = nplot)
standeff <- rep(rnorm(nstand, 0, sds), each = nplot)
ploteff <- rnorm(nstand * nplot, 0, sd)
elevation <- rep(runif(nstand, 1000, 1500), each = nplot)
slope <- runif(nstand * nplot, 2, 75)
resp2 <- b0 + b1 * elevation + b2 * slope + standeff + ploteff
dat <- data.frame(resp2, elevation, slope, stand)
lmer(resp2 ~ 1 + elevation + slope + (1|stand), data = dat)
}
linear_fit()
## Linear mixed model fit by REML ['lmerMod']
## Formula: resp2 ~ 1 + elevation + slope + (1 | stand)
## Data: dat
## REML criterion at convergence: 80.98
## Random effects:
## Groups Name Std.Dev.
## stand (Intercept) 2.357
## Residual 0.975
## Number of obs: 20, groups: stand, 5
## Fixed Effects:
## (Intercept) elevation slope
## 10.58460 -0.00546 0.08684
simu_R <- replicate(n = 1000, expr = linear_fit())
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
# use this chunk to answer question 3
library(tidyverse)
## -- Attaching packages ------------------------- tidyverse 1.2.1 --
## √ ggplot2 3.2.0 √ purrr 0.3.2
## √ tibble 2.1.3 √ dplyr 0.8.3
## √ tidyr 0.8.3 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.4.0
## -- Conflicts ---------------------------- tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(broom)
Res <- simu_R %>% map_dfr(tidy, effects = "ran_pars", scales = "vcov")
Res %>% print(n = 6)
## # A tibble: 2,000 x 3
## term group estimate
## <chr> <chr> <dbl>
## 1 var_(Intercept).stand stand 2.61
## 2 var_Observation.Residual Residual 1.11
## 3 var_(Intercept).stand stand 9.73
## 4 var_Observation.Residual Residual 1.36
## 5 var_(Intercept).stand stand 0.827
## 6 var_Observation.Residual Residual 0.914
## # ... with 1,994 more rows
# use this chunk to answer question 4
stand_sims = c(5, 20, 100) %>%set_names(c("samplesize5","samplesize20","samplesize100")) %>%map(~replicate(1000, linear_fit(nstand = .x)))
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
library(broom)
stand_vars = stand_sims %>%modify_depth(2, ~tidy(.x, effects = "ran_pars", scales = "vcov")) %>%map_dfr(bind_rows, .id = "stand_num") %>%filter(group == "stand")
stand_vars = mutate(stand_vars, stand_num = fct_inorder(stand_num))
add_prefix = function(string) {
paste("# of stands:" , string, sep = " ")
}
groupmed = stand_vars %>%group_by(stand_num) %>%summarise(mvar = median(estimate))
library(ggplot2)
ggplot(stand_vars, aes(x = estimate))+
geom_density(fill = "orange", alpha = 0.25) +
facet_wrap(~stand_num, labeller = as_labeller(add_prefix)) +
geom_vline(aes(xintercept = 4 , linetype = "True Variance"), size = 1) +
geom_vline(data = groupmed, aes(xintercept = mvar, linetype = "Median Variance"), size = 1) +
theme_bw() +
scale_linetype_manual(name = "", values = c(2,1)) +
theme(legend.position = "bottom", legend.key.width = unit(.1, "cm")) +
labs(x = "Estimated Variance", y = "Density")
# use this chunk to answer question 5
coefplot = simu_R %>%map_dfr(tidy, effects = "fixed") %>%filter(term %in% c("elevation","slope")) %>%group_by(term)
coefplot
## # A tibble: 2,000 x 4
## # Groups: term [2]
## term estimate std.error statistic
## <chr> <dbl> <dbl> <dbl>
## 1 elevation 0.0152 0.00426 3.57
## 2 slope 0.121 0.0157 7.73
## 3 elevation 0.0156 0.0117 1.33
## 4 slope 0.124 0.0166 7.48
## 5 elevation 0.0130 0.00336 3.87
## 6 slope 0.0937 0.0100 9.32
## 7 elevation 0.0135 0.0157 0.856
## 8 slope 0.0767 0.0152 5.05
## 9 elevation -0.0235 0.0132 -1.78
## 10 slope 0.127 0.0106 12.0
## # ... with 1,990 more rows
library(dplyr)
mean = coefplot %>%summarise_at("estimate", funs( mean))
mean
## # A tibble: 2 x 2
## term estimate
## <chr> <dbl>
## 1 elevation 0.00489
## 2 slope 0.0997
plot = ggplot(coefplot, aes(x=rep(1:1000, each=2),y=estimate))+
geom_point(size=0.5)+
facet_wrap(~term) +
geom_hline(data=mean,aes(yintercept=mean$estimate,color = term), size = 0.9) +
labs(x="Replicated numbers", y="Estimate of coefficients")
plot