regression with clusters
options(show.signif.stars = FALSE, digits = 5)
install.packages(“pacman”)
pacman::p_load(alr4, tidyverse, magrittr)
data management
data(Stevens, package="alr4")
dta <- Stevens %>%
rename(Length=y, Loudness=loudness, Subject=subject)
ot <- theme_set(theme_bw())
regression plot
ggplot(dta, aes(Loudness, Length, group = Subject, alpha = .5))+
geom_point() +
geom_line() +
stat_smooth(aes(group = 1), method = "lm", size = rel(.8)) +
coord_cartesian(ylim = c(-1, 5)) +
labs(x = "Loudness (db)", y = "Average log(Length)") +
theme(legend.position = "NONE")
## `geom_smooth()` using formula 'y ~ x'

model fits
m0 <- lm(Length ~ Loudness, data = dta)
# summary model output
summary(m0)
##
## Call:
## lm(formula = Length ~ Loudness, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2669 -0.3179 -0.0093 0.3707 0.9219
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.20377 0.34429 -9.31 2.5e-12
## Loudness 0.07901 0.00482 16.39 < 2e-16
##
## Residual standard error: 0.482 on 48 degrees of freedom
## Multiple R-squared: 0.848, Adjusted R-squared: 0.845
## F-statistic: 269 on 1 and 48 DF, p-value: <2e-16
regression model per individual
ggplot(dta, aes(Loudness, Length, group = Subject, color = Subject))+
geom_point() +
stat_smooth(method = "lm", se = F, size = rel(.5)) +
scale_color_grey()+
coord_cartesian(ylim = c(-1, 5)) +
labs(x = "Loudness (db)", y = "Average log(Length)") +
theme(legend.position = "NONE")
## `geom_smooth()` using formula 'y ~ x'

fancy way of conducting regression model per subject
# grouped by subject before doing regression
dta %>%
group_by(Subject) %>%
do(broom::tidy(lm(Length ~ Loudness, data = .)))
## # A tibble: 20 x 6
## # Groups: Subject [10]
## Subject term estimate std.error statistic p.value
## <fct> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 A (Intercept) -4.49 0.748 -6.01 0.00923
## 2 A Loudness 0.103 0.0105 9.80 0.00225
## 3 B (Intercept) -4.59 0.462 -9.92 0.00218
## 4 B Loudness 0.0935 0.00647 14.5 0.000718
## 5 C (Intercept) -2.29 0.253 -9.05 0.00285
## 6 C Loudness 0.0695 0.00355 19.6 0.000291
## 7 D (Intercept) -2.92 0.338 -8.63 0.00327
## 8 D Loudness 0.0751 0.00474 15.8 0.000547
## 9 E (Intercept) -5.39 0.743 -7.24 0.00542
## 10 E Loudness 0.104 0.0104 9.98 0.00214
## 11 F (Intercept) -2.42 0.523 -4.63 0.0190
## 12 F Loudness 0.0774 0.00732 10.6 0.00181
## 13 G (Intercept) -2.81 0.312 -9.01 0.00288
## 14 G Loudness 0.0650 0.00437 14.9 0.000659
## 15 H (Intercept) -3.42 0.401 -8.53 0.00338
## 16 H Loudness 0.0822 0.00561 14.6 0.000691
## 17 I (Intercept) -1.95 0.512 -3.80 0.0319
## 18 I Loudness 0.0656 0.00718 9.14 0.00277
## 19 J (Intercept) -1.76 0.265 -6.63 0.00699
## 20 J Loudness 0.0552 0.00372 14.9 0.000660
# fitting a model treating subject as fixed effects
summary(m1 <- lm(Length ~ Subject/Loudness - 1, data = dta))
##
## Call:
## lm(formula = Length ~ Subject/Loudness - 1, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3318 -0.1073 0.0023 0.1485 0.3242
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## SubjectA -4.49370 0.48664 -9.23 2.8e-10
## SubjectB -4.58530 0.48664 -9.42 1.8e-10
## SubjectC -2.29360 0.48664 -4.71 5.2e-05
## SubjectD -2.91940 0.48664 -6.00 1.4e-06
## SubjectE -5.38510 0.48664 -11.07 4.1e-12
## SubjectF -2.42120 0.48664 -4.98 2.5e-05
## SubjectG -2.81090 0.48664 -5.78 2.6e-06
## SubjectH -3.42070 0.48664 -7.03 8.2e-08
## SubjectI -1.94870 0.48664 -4.00 0.00038
## SubjectJ -1.75910 0.48664 -3.61 0.00109
## SubjectA:Loudness 0.10265 0.00681 15.06 1.6e-15
## SubjectB:Loudness 0.09355 0.00681 13.73 1.8e-14
## SubjectC:Loudness 0.06950 0.00681 10.20 2.9e-11
## SubjectD:Loudness 0.07506 0.00681 11.02 4.6e-12
## SubjectE:Loudness 0.10391 0.00681 15.25 1.1e-15
## SubjectF:Loudness 0.07740 0.00681 11.36 2.2e-12
## SubjectG:Loudness 0.06497 0.00681 9.53 1.4e-10
## SubjectH:Loudness 0.08223 0.00681 12.07 4.9e-13
## SubjectI:Loudness 0.06561 0.00681 9.63 1.1e-10
## SubjectJ:Loudness 0.05525 0.00681 8.11 4.7e-09
##
## Residual standard error: 0.215 on 30 degrees of freedom
## Multiple R-squared: 0.996, Adjusted R-squared: 0.993
## F-statistic: 369 on 20 and 30 DF, p-value: <2e-16
# augument data with model fit
dta %>% mutate( fit1 <- fitted(m1) )
## Subject Loudness Length sd n fit1 <- fitted(m1)
## 1 A 50 0.379 0.507 3 0.6388
## 2 A 60 1.727 0.904 3 1.6653
## 3 A 70 3.016 0.553 3 2.6918
## 4 A 80 3.924 0.363 3 3.7183
## 5 A 90 4.413 0.092 3 4.7448
## 6 B 50 0.260 0.077 3 0.0922
## 7 B 60 0.833 0.287 3 1.0277
## 8 B 70 2.009 0.396 3 1.9632
## 9 B 80 2.720 0.124 3 2.8987
## 10 B 90 3.994 0.068 3 3.8342
## 11 C 50 1.072 0.106 3 1.1814
## 12 C 60 1.942 0.324 3 1.8764
## 13 C 70 2.700 0.319 3 2.5714
## 14 C 80 3.250 0.137 3 3.2664
## 15 C 90 3.893 0.356 3 3.9614
## 16 D 50 0.697 0.288 3 0.8336
## 17 D 60 1.787 0.121 3 1.5842
## 18 D 70 2.283 0.139 3 2.3348
## 19 D 80 3.127 0.331 3 3.0854
## 20 D 90 3.780 0.314 3 3.8360
## 21 E 50 -0.520 0.168 3 -0.1896
## 22 E 60 1.124 0.550 3 0.8495
## 23 E 70 2.045 0.315 3 1.8886
## 24 E 80 3.113 0.247 3 2.9277
## 25 E 90 3.681 0.217 3 3.9668
## 26 F 50 1.348 0.842 3 1.4488
## 27 F 60 2.134 0.130 3 2.2228
## 28 F 70 3.249 0.626 3 2.9968
## 29 F 80 3.936 0.181 3 3.7708
## 30 F 90 4.317 0.107 3 4.5448
## 31 G 50 0.485 0.272 3 0.4376
## 32 G 60 1.158 0.413 3 1.0873
## 33 G 70 1.585 0.240 3 1.7370
## 34 G 80 2.289 0.102 3 2.3867
## 35 G 90 3.168 0.118 3 3.0364
## 36 H 50 0.682 0.189 3 0.6908
## 37 H 60 1.684 0.231 3 1.5131
## 38 H 70 2.090 0.540 3 2.3354
## 39 H 80 3.171 0.449 3 3.1577
## 40 H 90 4.050 0.309 3 3.9800
## 41 I 50 1.201 0.089 3 1.3318
## 42 I 60 2.142 0.514 3 1.9879
## 43 I 70 2.543 0.821 3 2.6440
## 44 I 80 3.563 0.283 3 3.3001
## 45 I 90 3.771 0.378 3 3.9562
## 46 J 50 1.111 0.174 3 1.0034
## 47 J 60 1.500 0.112 3 1.5559
## 48 J 70 2.010 0.555 3 2.1084
## 49 J 80 2.595 0.013 3 2.6609
## 50 J 90 3.326 0.289 3 3.2134
regression model for all with individual parameters
ggplot(dta, aes(Loudness, Length, group = Subject, alpha = Subject))+
geom_path() +
geom_point(aes(Loudness, Length, group = Subject, alpha = Subject)) +
coord_cartesian(ylim = c(-1, 5)) +
labs(x = "Loudness (dB)", y = "Average log(Length)") +
theme(legend.position = "NONE")
## Warning: Using alpha for a discrete variable is not advised.
