The statistical model:
\(y_t = \beta_0 + \beta_1 * (Elevation_s)_t + \beta_2 * Slope_t + (b_s)_t + \epsilon_t\)
Where:
Let’s define the parameters:
nstand = 5
nplot = 4
b0 = -1
b1 = .005
b2 = .1
sds = 2
sd = 1
Simulate other variables:
set.seed(16)
stand = rep(LETTERS[1:nstand], each = nplot)
standeff = rep( rnorm(nstand, 0, sds), each = nplot)
ploteff = rnorm(nstand*nplot, 0, sd)
Simulate elevation and slope:
elevation = rep( runif(nstand, 1000, 1500), each = nplot)
slope = runif(nstand*nplot, 2, 75)
Simulate response variable:
resp2 = b0 + b1*elevation + b2*slope + standeff + ploteff
Your tasks (complete each task in its’ own code chunk, make sure to use echo=TRUE so I can see your code):
library(lme4)
library(lmerTest)
liner_mix_mod <- lmer(resp2 ~ 1 + elevation + slope + (1|stand))
summary_lm<-summary(liner_mix_mod)
summary_lm$coefficients
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -21.31462843 6.602052944 3.001313 -3.228485 4.824164e-02
## elevation 0.02060019 0.004916391 3.113482 4.190104 2.303393e-02
## slope 0.09510535 0.016441240 15.868032 5.784560 2.884449e-05
df_coeff<-data.frame(summary_lm$coefficients)
df_coeff_compare<-data.frame(cbind(c(b0,b1,b2), df_coeff$Estimate))
colnames(df_coeff_compare)<-c("initial_para", "estimate_para")
df_coeff_compare$difference<-(df_coeff_compare$initial_para-df_coeff_compare$estimate_para)
df_coeff_compare
## initial_para estimate_para difference
## 1 -1.000 -21.31462843 20.314628426
## 2 0.005 0.02060019 -0.015600188
## 3 0.100 0.09510535 0.004894651
Comments: As we can cearly see from the output that the estimates are different. also the magnitute of sifferencehas been mentioned in the coumn“difference” by quick obervation we can see the valye if b2 is very similar for initial and estimated scenario.
#create a function
function_lin_mix <- function(nstand = 5, nplot = 4, b0 = -1, b1 = 0.005, b2 = 0.1, sds = 2, sd = 1) {
stand <- rep(LETTERS[1:nstand], each = nplot)
standeff <- rep(rnorm(nstand, 0, sds), each = nplot)
ploteff <- rnorm(nstand * nplot, 0, sd)
elevation <- rep(runif(nstand, 1000, 1500), each = nplot)
slope <- runif(nstand * nplot, 2, 75)
resp2 <- b0 + b1 * elevation + b2 * slope + standeff + ploteff
#store the values of parameters in a df
df_lin_mix <- data.frame(resp2, elevation, slope, stand)
lmer(resp2 ~ 1 + elevation + slope + (1|stand), data = df_lin_mix)
}
function_lin_mix()
## Linear mixed model fit by REML ['lmerModLmerTest']
## Formula: resp2 ~ 1 + elevation + slope + (1 | stand)
## Data: df_lin_mix
## REML criterion at convergence: 80.9781
## Random effects:
## Groups Name Std.Dev.
## stand (Intercept) 2.3573
## Residual 0.9754
## Number of obs: 20, groups: stand, 5
## Fixed Effects:
## (Intercept) elevation slope
## 10.584601 -0.005464 0.086839
#running 1000 simulations
sim_1000<-replicate(n=1000, expr=function_lin_mix())
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00350336
## (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0045514
## (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00363544
## (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00439561
## (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00239513
## (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00219495
## (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00322495
## (tol = 0.002, component 1)
#veryfy the number of iterations
length(sim_1000)
## [1] 1000
library(purrr)
library(furrr)
library(broom)
library(broom.mixed)
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.2.17
## Current Matrix version is 1.2.15
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
library(tidyverse)
extract_var<-sim_1000 %>% map_dfr(tidy, effects = "ran_pars", scales = "vcov")
extract_var%>%print(n = 6)
## # A tibble: 2,000 x 4
## effect group term estimate
## <chr> <chr> <chr> <dbl>
## 1 ran_pars stand var__(Intercept) 2.61
## 2 ran_pars Residual var__Observation 1.11
## 3 ran_pars stand var__(Intercept) 9.73
## 4 ran_pars Residual var__Observation 1.36
## 5 ran_pars stand var__(Intercept) 0.827
## 6 ran_pars Residual var__Observation 0.914
## # ... with 1,994 more rows
library(dplyr)
library(ggplot2)
simulation_3_sample = c(10, 50, 100) %>%
set_names(c("sample size=10", "sample size=50", "sample size=100")) %>%
map(~replicate(1000, function_lin_mix(nstand = .x)))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00613041
## (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00465885
## (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00204668
## (tol = 0.002, component 1)
#extrating variance
extract_var_3_sample<-simulation_3_sample %>%
modify_depth(2, ~tidy(.x, effects = "ran_pars", scales = "vcov")) %>%
map_dfr(bind_rows, .id = "id")
subset_var<-subset(extract_var_3_sample, group == "stand", select = colnames(extract_var_3_sample))
#mean variance
mean(subset_var$estimate)
## [1] 3.986178
median(subset_var$estimate)
## [1] 3.782357
#plot
ggplot(subset_var, aes(x = estimate)) +
geom_density(fill = "navy", alpha = "0.5") +
facet_wrap(group~id)+geom_vline(xintercept = c(mean(subset_var$estimate), median(subset_var$estimate)))+ labs(title = "stributions of the variances for each of the 3 sample sizes")
Comments: The mean variance as estimated above is 3.957636 We can clearly see that as the sample size increases the variance shrinks
sim_1000_coeff <- sim_1000 %>%
future_map(tidy, effects = "fixed") %>%
bind_rows()
df_graph<-sim_1000_coeff %>%
dplyr::filter(term %in% c("elevation", "slope")) %>%
group_by(term) %>%
mutate(range = 1 : 1000) %>%
ungroup()
#compute mean slope and elevation
mean_slope<-mean(df_graph$estimate[df_graph$term=="slope"])
mean_elevation<-mean(df_graph$estimate[df_graph$term=="elevation"])
#add term mean estimate for slope and elevation
df_graph_revised<-df_graph%>%
mutate(term_mean_est = ifelse(term == "slope", mean_slope, mean_elevation))
ggplot(df_graph_revised, aes(x = range, y = estimate)) +geom_line() +facet_wrap(effect~term) +
geom_hline(aes(yintercept = term_mean_est, color = term))+labs(title = "Plot of the coefficients of the estimates of elevation and slope")
Comments: As we can see the man slope is0.1002819 and elevation is 0.005192469 , The estimate moves around the mean for both slope and elevation.