## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: Matrix
##
##
## Attaching package: 'Matrix'
##
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
##
## Attaching package: 'psych'
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'MASS'
##
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
##
## arm (Version 1.13-1, built: 2022-8-25)
##
##
## Working directory is /Users/user/Desktop/R - Spring
##
##
##
## Attaching package: 'arm'
##
##
## The following objects are masked from 'package:psych':
##
## logit, rescale, sim
# Lets start by reading in the data
load("radon.Rdata")
What should be our first step for this question?
Answer: look at data and create county sample size variable
head(radon)
## # A tibble: 6 × 3
## y x county
## <dbl> <dbl> <int>
## 1 0.788 1 1
## 2 0.788 0 1
## 3 1.06 0 1
## 4 0 0 1
## 5 1.13 0 2
## 6 0.916 0 2
describe(radon)
## vars n mean sd median trimmed mad min max range skew
## y 1 919 1.22 0.85 1.28 1.25 0.85 -2.3 3.88 6.18 -0.39
## x 2 919 0.17 0.37 0.00 0.08 0.00 0.0 1.00 1.00 1.79
## county 3 919 43.52 25.67 44.00 43.86 37.06 1.0 85.00 84.00 -0.04
## kurtosis se
## y 0.93 0.03
## x 1.20 0.01
## county -1.41 0.85
representing group level data at the individual level
county_sample_size <- tibble(table(radon$county))
county_sample_size <- county_sample_size %>% mutate(county = 1:85)
county_sample_size <- county_sample_size %>% mutate(N = table(radon$county))
radon_df_list <- list(radon, radon_county, county_sample_size)
radon_data <- radon_df_list %>% reduce(full_join, by = 'county')
Now we can write the model
radon_lm <- lmer(y ~ 1 + x + (1|N), data = radon_data)
display(radon_lm)
## lmer(formula = y ~ 1 + x + (1 | N), data = radon_data)
## coef.est coef.se
## (Intercept) 1.37 0.06
## x -0.69 0.07
##
## Error terms:
## Groups Name Std.Dev.
## N (Intercept) 0.23
## Residual 0.78
## ---
## number of obs: 919, groups: N, 22
## AIC = 2199.1, DIC = 2176.4
## deviance = 2183.8
ranef(radon_lm)
## $N
## (Intercept)
## 1 0.13678974
## 2 0.03408249
## 3 0.18218025
## 4 0.16541642
## 5 0.13665385
## 6 -0.15992831
## 7 -0.05064027
## 8 0.12485501
## 9 0.01747863
## 10 -0.05000466
## 11 0.01104821
## 12 -0.13042257
## 13 0.20259091
## 14 0.43705293
## 23 -0.04402600
## 25 0.08101379
## 32 -0.15668402
## 46 -0.03402666
## 52 -0.40604432
## 63 -0.02729662
## 105 -0.01020491
## 116 -0.45988389
##
## with conditional variances for "N"
coef(radon_lm)
## $N
## (Intercept) x
## 1 1.5053185 -0.6883572
## 2 1.4026112 -0.6883572
## 3 1.5507090 -0.6883572
## 4 1.5339451 -0.6883572
## 5 1.5051826 -0.6883572
## 6 1.2086004 -0.6883572
## 7 1.3178884 -0.6883572
## 8 1.4933837 -0.6883572
## 9 1.3860073 -0.6883572
## 10 1.3185241 -0.6883572
## 11 1.3795769 -0.6883572
## 12 1.2381062 -0.6883572
## 13 1.5711196 -0.6883572
## 14 1.8055816 -0.6883572
## 23 1.3245027 -0.6883572
## 25 1.4495425 -0.6883572
## 32 1.2118447 -0.6883572
## 46 1.3345021 -0.6883572
## 52 0.9624844 -0.6883572
## 63 1.3412321 -0.6883572
## 105 1.3583238 -0.6883572
## 116 0.9086448 -0.6883572
##
## attr(,"class")
## [1] "coef.mer"
in your own words interpret the coefficients
Answer: for a county that has one house the intercept is 1.5 and .6 decrease in radon on 1st floor. For a county that has 116 houses radon at basement is .9 and a .6 decrease in radon on the 1st floor. The x or slope doesn’t vary because it was not allowed to vary so it is a fixed effect.
onefifth_data <- radon_data %>% filter (county %in% sort(sample(1:85, .2*85, replace = FALSE)))
view(onefifth_data)
radon_lm_all_data <- lmer(y ~ x + u + (1|county), data = radon_data)
display(radon_lm_all_data)
## lmer(formula = y ~ x + u + (1 | county), data = radon_data)
## coef.est coef.se
## (Intercept) 1.47 0.04
## x -0.67 0.07
## u 0.72 0.09
##
## Error terms:
## Groups Name Std.Dev.
## county (Intercept) 0.16
## Residual 0.76
## ---
## number of obs: 919, groups: county, 85
## AIC = 2144.2, DIC = 2111.4
## deviance = 2122.8
radon_lm_onefifth_data <- lmer(y ~ x + u + (1|county), data = onefifth_data)
display(radon_lm_onefifth_data)
## lmer(formula = y ~ x + u + (1 | county), data = onefifth_data)
## coef.est coef.se
## (Intercept) 1.39 0.10
## x -0.68 0.17
## u 0.55 0.24
##
## Error terms:
## Groups Name Std.Dev.
## county (Intercept) 0.21
## Residual 0.79
## ---
## number of obs: 140, groups: county, 17
## AIC = 352, DIC = 330.3
## deviance = 336.1
standard error of our coefficients is higher because we have less observations
coef(radon_lm_onefifth_data)
## $county
## (Intercept) x u
## 12 1.432436 -0.6816121 0.54931
## 17 1.314566 -0.6816121 0.54931
## 21 1.490446 -0.6816121 0.54931
## 22 1.108413 -0.6816121 0.54931
## 23 1.359098 -0.6816121 0.54931
## 25 1.615862 -0.6816121 0.54931
## 28 1.402414 -0.6816121 0.54931
## 30 1.345094 -0.6816121 0.54931
## 34 1.421588 -0.6816121 0.54931
## 35 1.352750 -0.6816121 0.54931
## 39 1.454673 -0.6816121 0.54931
## 44 1.238637 -0.6816121 0.54931
## 52 1.446092 -0.6816121 0.54931
## 54 1.250585 -0.6816121 0.54931
## 55 1.509829 -0.6816121 0.54931
## 71 1.410229 -0.6816121 0.54931
## 76 1.465356 -0.6816121 0.54931
##
## attr(,"class")
## [1] "coef.mer"
Interpretation: When radon levels are at 0 county 5 has an average radon level of 1.43 at basement level and a .59 decrease on the 1st floor. The average change between counties is .24
fixef(radon_lm_onefifth_data)
## (Intercept) x u
## 1.3892981 -0.6816121 0.5493100
Interpretation: There is a .59 decrease in radon when going from the basement level to the first floor. There is a .24 increase in radon when comparing counties?
ranef(radon_lm_onefifth_data)
## $county
## (Intercept)
## 12 0.04313752
## 17 -0.07473260
## 21 0.10114772
## 22 -0.28088474
## 23 -0.03019989
## 25 0.22656389
## 28 0.01311587
## 30 -0.04420379
## 34 0.03229003
## 35 -0.03654824
## 39 0.06537447
## 44 -0.15066075
## 52 0.05679394
## 54 -0.13871297
## 55 0.12053087
## 71 0.02093076
## 76 0.07605790
##
## with conditional variances for "county"
Interpretation: fixed effects and random effects should add up to the coefficients in coef()
#county 5
1.4304065 + 0.0001449968
## [1] 1.430551
very very very close … 1.430551 vs 1.430552
#county 17
1.4304065 - 0.0126193130
## [1] 1.417787
this one was exactly the same
lm_1 <- lmer(y ~ x + u + (1|county), data = slice_sample(radon_data, prop = 0.2))
display(lm_1)
## lmer(formula = y ~ x + u + (1 | county), data = slice_sample(radon_data,
## prop = 0.2))
## coef.est coef.se
## (Intercept) 1.47 0.07
## x -0.83 0.14
## u 0.74 0.17
##
## Error terms:
## Groups Name Std.Dev.
## county (Intercept) 0.17
## Residual 0.69
## ---
## number of obs: 183, groups: county, 54
## AIC = 404.9, DIC = 379.6
## deviance = 387.3
lm_2 <- lmer(y ~ x + u + (1|county), data = slice_sample(radon_data, prop = 0.2))
display(lm_2)
## lmer(formula = y ~ x + u + (1 | county), data = slice_sample(radon_data,
## prop = 0.2))
## coef.est coef.se
## (Intercept) 1.44 0.09
## x -0.51 0.17
## u 0.77 0.23
##
## Error terms:
## Groups Name Std.Dev.
## county (Intercept) 0.41
## Residual 0.75
## ---
## number of obs: 183, groups: county, 55
## AIC = 458.2, DIC = 436.2
## deviance = 442.2
lm_3 <- lmer(y ~ x + u + (1|county), data = slice_sample(radon_data, prop = 0.2))
display(lm_3)
## lmer(formula = y ~ x + u + (1 | county), data = slice_sample(radon_data,
## prop = 0.2))
## coef.est coef.se
## (Intercept) 1.45 0.07
## x -0.70 0.13
## u 0.75 0.18
##
## Error terms:
## Groups Name Std.Dev.
## county (Intercept) 0.25
## Residual 0.67
## ---
## number of obs: 183, groups: county, 58
## AIC = 404.5, DIC = 379.4
## deviance = 386.9
Answer: x and u have higher standard errors so more variation in comparison to the intercept. In the first two the intercept is the same (1.40) and in the third it is only slight different (1.34). While the variables x (floor) and u (county) vary more.
lm_4 <- lmer(y ~ x + u + (1|county), data = radon_data, subset = (county %in% sort(sample(1:85, .2 * 85, replace = FALSE))))
display(lm_4)
## lmer(formula = y ~ x + u + (1 | county), data = radon_data, subset = (county %in%
## sort(sample(1:85, 0.2 * 85, replace = FALSE))))
## coef.est coef.se
## (Intercept) 1.59 0.09
## x -0.44 0.18
## u 0.50 0.21
##
## Error terms:
## Groups Name Std.Dev.
## county (Intercept) 0.12
## Residual 0.75
## ---
## number of obs: 109, groups: county, 17
## AIC = 261, DIC = 238.4
## deviance = 244.7
Answer: The average radon level is 1.56 on the basement level. There is a .66 decrease when moving to the first floor and there is a .46 difference between counties. The number of observations is different in this model but the number of observations at the county level is the same compared to the models prior. This is because the ones before are looking at different counties and counties have a different number of houses. This last model is taking houses from the same county. I think that’s right but not 100% sure.
ICC_radon <- .23^2 / (.23^2 + .79^2)
ICC_radon
## [1] 0.07813885
Answer: In order to find the ICC you need to divide the random effect variance by the total variance. I did this for the first model. The ICC tells us how much information we are getting from the group and individual level. 0 = no information obtained from group level, while 1 = no information obtained from the individual level. .78 suggests a moderate amount of information is obtained by looking at the group level, but not much.
should be county std. dev and residual std. dev