## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: Matrix
##
##
## Attaching package: 'Matrix'
##
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
##
## Attaching package: 'psych'
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'MASS'
##
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
##
## arm (Version 1.13-1, built: 2022-8-25)
##
##
## Working directory is C:/Users/brand/Desktop/UCR/PhD CCN/PSYC213_Spring2023/Lab/Week 7
##
##
##
## Attaching package: 'arm'
##
##
## The following objects are masked from 'package:psych':
##
## logit, rescale, sim
# Lets start by reading in the data
load("radon.Rdata")
What should be our first step for this question?
Answer:
#our first step is to figure out what the equation is supposed to look like
head(radon)
## # A tibble: 6 × 3
## y x county
## <dbl> <dbl> <int>
## 1 0.788 1 1
## 2 0.788 0 1
## 3 1.06 0 1
## 4 0 0 1
## 5 1.13 0 2
## 6 0.916 0 2
head(radon_county)
## # A tibble: 6 × 2
## u county
## <dbl> <int>
## 1 -0.689 1
## 2 -0.847 2
## 3 -0.113 3
## 4 -0.593 4
## 5 -0.143 5
## 6 0.387 6
county_n<- tibble(table(radon$county))
county_n<-county_n%>%mutate(county=1:85)
county_n<-county_n%>%mutate(N=table(radon$county))
df_list<-list(radon,radon_county,county_n)
radon_full<-df_list %>%reduce(full_join,by="county")
Now we can write the model
m1<-lmer(y ~ 1 + x +(1 | N), data = radon_full)
display(m1)
## lmer(formula = y ~ 1 + x + (1 | N), data = radon_full)
## coef.est coef.se
## (Intercept) 1.37 0.06
## x -0.69 0.07
##
## Error terms:
## Groups Name Std.Dev.
## N (Intercept) 0.23
## Residual 0.78
## ---
## number of obs: 919, groups: N, 22
## AIC = 2199.1, DIC = 2176.4
## deviance = 2183.8
ranef(m1)
## $N
## (Intercept)
## 1 0.13678974
## 2 0.03408249
## 3 0.18218025
## 4 0.16541642
## 5 0.13665385
## 6 -0.15992831
## 7 -0.05064027
## 8 0.12485501
## 9 0.01747863
## 10 -0.05000466
## 11 0.01104821
## 12 -0.13042257
## 13 0.20259091
## 14 0.43705293
## 23 -0.04402600
## 25 0.08101379
## 32 -0.15668402
## 46 -0.03402666
## 52 -0.40604432
## 63 -0.02729662
## 105 -0.01020491
## 116 -0.45988389
##
## with conditional variances for "N"
coef(m1)
## $N
## (Intercept) x
## 1 1.5053185 -0.6883572
## 2 1.4026112 -0.6883572
## 3 1.5507090 -0.6883572
## 4 1.5339451 -0.6883572
## 5 1.5051826 -0.6883572
## 6 1.2086004 -0.6883572
## 7 1.3178884 -0.6883572
## 8 1.4933837 -0.6883572
## 9 1.3860073 -0.6883572
## 10 1.3185241 -0.6883572
## 11 1.3795769 -0.6883572
## 12 1.2381062 -0.6883572
## 13 1.5711196 -0.6883572
## 14 1.8055816 -0.6883572
## 23 1.3245027 -0.6883572
## 25 1.4495425 -0.6883572
## 32 1.2118447 -0.6883572
## 46 1.3345021 -0.6883572
## 52 0.9624844 -0.6883572
## 63 1.3412321 -0.6883572
## 105 1.3583238 -0.6883572
## 116 0.9086448 -0.6883572
##
## attr(,"class")
## [1] "coef.mer"
in your own words interpret the coefficients
Answer: The coefficient for the intercept is 1.37, meaning that in the basement, there was an average radon score of 1.37, across all counties. The x coefficient refers to the change -0.69, meaning that if the floor for a given sample was taken on the first floor as opposed to in the basement, on the first floor there was a lower radon score, again across all counties. The ranef command shows us the change from the lowest n counties to the highest n counties and the change in score depending on the size of the sample in a given county. In the coef command, we see the radon score for all the counties with different sized samples, while holding the change in radon score based on the basement measurement across all counties with different sample sizes.
# create the selected sub sample of our data
sub_sample <- radon_full %>% filter(county %in% sort(sample(1:85, .2* 85, replace = FALSE)))
# fit the full model
M1_all <- lmer(y~ x + u + (1|county), data = radon_full)
# fit the sub_sample model
M2_a <- lmer(y~ x + u + (1|county), data = sub_sample)
## boundary (singular) fit: see help('isSingular')
display(M1_all)
## lmer(formula = y ~ x + u + (1 | county), data = radon_full)
## coef.est coef.se
## (Intercept) 1.47 0.04
## x -0.67 0.07
## u 0.72 0.09
##
## Error terms:
## Groups Name Std.Dev.
## county (Intercept) 0.16
## Residual 0.76
## ---
## number of obs: 919, groups: county, 85
## AIC = 2144.2, DIC = 2111.4
## deviance = 2122.8
display(M2_a)
## lmer(formula = y ~ x + u + (1 | county), data = sub_sample)
## coef.est coef.se
## (Intercept) 1.40 0.05
## x -0.74 0.12
## u 0.40 0.17
##
## Error terms:
## Groups Name Std.Dev.
## county (Intercept) 0.00
## Residual 0.71
## ---
## number of obs: 269, groups: county, 17
## AIC = 592.9, DIC = 565.6
## deviance = 574.2
coef(M2_a)
## $county
## (Intercept) x u
## 5 1.404962 -0.7410941 0.4043203
## 26 1.404962 -0.7410941 0.4043203
## 28 1.404962 -0.7410941 0.4043203
## 35 1.404962 -0.7410941 0.4043203
## 38 1.404962 -0.7410941 0.4043203
## 39 1.404962 -0.7410941 0.4043203
## 43 1.404962 -0.7410941 0.4043203
## 53 1.404962 -0.7410941 0.4043203
## 54 1.404962 -0.7410941 0.4043203
## 55 1.404962 -0.7410941 0.4043203
## 56 1.404962 -0.7410941 0.4043203
## 58 1.404962 -0.7410941 0.4043203
## 61 1.404962 -0.7410941 0.4043203
## 68 1.404962 -0.7410941 0.4043203
## 73 1.404962 -0.7410941 0.4043203
## 80 1.404962 -0.7410941 0.4043203
## 82 1.404962 -0.7410941 0.4043203
##
## attr(,"class")
## [1] "coef.mer"
fixef(M2_a)
## (Intercept) x u
## 1.4049620 -0.7410941 0.4043203
Interpretation: The fixef command gives us an intercept, and vectors for basement measurement and radon measurement averaged across all counties by sample size
ranef(M2_a)
## $county
## (Intercept)
## 5 0
## 26 0
## 28 0
## 35 0
## 38 0
## 39 0
## 43 0
## 53 0
## 54 0
## 55 0
## 56 0
## 58 0
## 61 0
## 68 0
## 73 0
## 80 0
## 82 0
##
## with conditional variances for "county"
Interpretation: The ranef command gives us the differences between counties with different sample sizes of houses in a county where radon was measured.
1.405177 - 0.5275425 + 0.7388988
## [1] 1.616533
1.4306756 - 0.5275425 + 0.7388988 - 0.025498274
## [1] 1.616534
1.4306756 - 0.025498274
## [1] 1.405177
Interpretation: When we look at the fixed and random effects for a given county (e.g., county with sample size of 1) we see that the fixed effect intercept minus the county intercept is the same as the coefficient output for that given county.
M3_a <- lmer(y ~ x + u + (1 | county), data= slice_sample(radon_full, prop = .2))
display(M3_a)
## lmer(formula = y ~ x + u + (1 | county), data = slice_sample(radon_full,
## prop = 0.2))
## coef.est coef.se
## (Intercept) 1.41 0.07
## x -0.93 0.18
## u 0.70 0.18
##
## Error terms:
## Groups Name Std.Dev.
## county (Intercept) 0.18
## Residual 0.77
## ---
## number of obs: 183, groups: county, 60
## AIC = 445.2, DIC = 421.5
## deviance = 428.4
M3_b <- lmer(y ~ x + u + (1 | county), data= slice_sample(radon_full, prop = .2))
## boundary (singular) fit: see help('isSingular')
display(M3_b)
## lmer(formula = y ~ x + u + (1 | county), data = slice_sample(radon_full,
## prop = 0.2))
## coef.est coef.se
## (Intercept) 1.40 0.07
## x -0.66 0.15
## u 0.89 0.16
##
## Error terms:
## Groups Name Std.Dev.
## county (Intercept) 0.00
## Residual 0.76
## ---
## number of obs: 183, groups: county, 52
## AIC = 434.2, DIC = 408.8
## deviance = 416.5
M3_c <- lmer(y ~ x + u + (1 | county), data= slice_sample(radon_full, prop = .2))
display(M3_c)
## lmer(formula = y ~ x + u + (1 | county), data = slice_sample(radon_full,
## prop = 0.2))
## coef.est coef.se
## (Intercept) 1.38 0.07
## x -0.70 0.16
## u 0.87 0.20
##
## Error terms:
## Groups Name Std.Dev.
## county (Intercept) 0.25
## Residual 0.72
## ---
## number of obs: 183, groups: county, 58
## AIC = 429.4, DIC = 405.8
## deviance = 412.6
Answer: The number of observations for county vary each slice that is taken. The coefficients standard errors are similar across slices. The intercept coefficient also seems relatively stable across slices.
M4_a <- lmer(y ~ x + u + (1 | county), data=radon_full, subset=(county %in% sort(sample(1:85, .2*85, replace=FALSE))))
## boundary (singular) fit: see help('isSingular')
display(M4_a)
## lmer(formula = y ~ x + u + (1 | county), data = radon_full, subset = (county %in%
## sort(sample(1:85, 0.2 * 85, replace = FALSE))))
## coef.est coef.se
## (Intercept) 1.37 0.06
## x -0.53 0.17
## u 0.74 0.17
##
## Error terms:
## Groups Name Std.Dev.
## county (Intercept) 0.00
## Residual 0.68
## ---
## number of obs: 153, groups: county, 17
## AIC = 332.6, DIC = 307.9
## deviance = 315.3
Answer: This random sampling of 1/5 of the counties seems to provide more varied descriptive statistics. Which makes sense because the sample size that we are taking is also less than our full set of data.
M5<- lmer(y ~ 1 + (1 | county), data=radon_full)
# Obtain the residual variance
measurement_error_var <- attr(VarCorr(M5), "sc")
measurement_error_var
## [1] 0.797885
icc_by_hand <-.1^2 / (.1^2 + .71^2)
icc_by_hand
## [1] 0.01945147
icc_by_hand2 <-.1^2 / (.1^2 + .71^2 + .797885)
icc_by_hand2
## [1] 0.007622038
icc_by_hand3 <-.1^2 / (.1^2 + .71^2 + .797885^2)
icc_by_hand3
## [1] 0.008690208
# not 100% sure if I am doing this right for a two-way random effects ICC vs the one-way random effects ICC
Answer: We’re calculating group information. Here we get info for the group level that we don’t see at the individual level. An ICC closer to a 0 means that grouping is not providing us any additional information. An ICC closer to a 1 means our individual level data is not providing us with any additional information.