0.1 regression with clusters

options(show.signif.stars = FALSE, digits = 5)

0.2 install.packages(“pacman”)

pacman::p_load(alr4, tidyverse, magrittr)

0.3 data management

data(Stevens, package="alr4")

dta <- Stevens %>% 
 rename(Length=y, Loudness=loudness, Subject=subject)

ot <- theme_set(theme_bw())

0.4 regression plot

ggplot(dta, aes(Loudness, Length, group = Subject, alpha = .5))+
 geom_point() +
 geom_line() +
 stat_smooth(aes(group = 1), method = "lm", size = rel(.8)) +
 coord_cartesian(ylim = c(-1, 5)) +
 labs(x = "Loudness (db)", y = "Average log(Length)") +
 theme(legend.position = "NONE")
## `geom_smooth()` using formula 'y ~ x'

0.5 model fits

m0 <- lm(Length ~ Loudness, data = dta)
# summary model output
summary(m0)
## 
## Call:
## lm(formula = Length ~ Loudness, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2669 -0.3179 -0.0093  0.3707  0.9219 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.20377    0.34429   -9.31  2.5e-12
## Loudness     0.07901    0.00482   16.39  < 2e-16
## 
## Residual standard error: 0.482 on 48 degrees of freedom
## Multiple R-squared:  0.848,  Adjusted R-squared:  0.845 
## F-statistic:  269 on 1 and 48 DF,  p-value: <2e-16

0.6 regression model per individual

ggplot(dta, aes(Loudness, Length, group = Subject, color = Subject))+
 geom_point() +
 stat_smooth(method = "lm", se = F, size = rel(.5)) +
 scale_color_grey()+
 coord_cartesian(ylim = c(-1, 5)) +
 labs(x = "Loudness (db)", y = "Average log(Length)") +
 theme(legend.position = "NONE")
## `geom_smooth()` using formula 'y ~ x'

0.7 fancy way of conducting regression model per subject

# grouped by subject before doing regression
dta %>% 
 group_by(Subject) %>% 
 do(broom::tidy(lm(Length ~ Loudness, data = .)))
## # A tibble: 20 x 6
## # Groups:   Subject [10]
##    Subject term        estimate std.error statistic  p.value
##    <fct>   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
##  1 A       (Intercept)  -4.49     0.748       -6.01 0.00923 
##  2 A       Loudness      0.103    0.0105       9.80 0.00225 
##  3 B       (Intercept)  -4.59     0.462       -9.92 0.00218 
##  4 B       Loudness      0.0935   0.00647     14.5  0.000718
##  5 C       (Intercept)  -2.29     0.253       -9.05 0.00285 
##  6 C       Loudness      0.0695   0.00355     19.6  0.000291
##  7 D       (Intercept)  -2.92     0.338       -8.63 0.00327 
##  8 D       Loudness      0.0751   0.00474     15.8  0.000547
##  9 E       (Intercept)  -5.39     0.743       -7.24 0.00542 
## 10 E       Loudness      0.104    0.0104       9.98 0.00214 
## 11 F       (Intercept)  -2.42     0.523       -4.63 0.0190  
## 12 F       Loudness      0.0774   0.00732     10.6  0.00181 
## 13 G       (Intercept)  -2.81     0.312       -9.01 0.00288 
## 14 G       Loudness      0.0650   0.00437     14.9  0.000659
## 15 H       (Intercept)  -3.42     0.401       -8.53 0.00338 
## 16 H       Loudness      0.0822   0.00561     14.6  0.000691
## 17 I       (Intercept)  -1.95     0.512       -3.80 0.0319  
## 18 I       Loudness      0.0656   0.00718      9.14 0.00277 
## 19 J       (Intercept)  -1.76     0.265       -6.63 0.00699 
## 20 J       Loudness      0.0552   0.00372     14.9  0.000660
# fitting a model treating subject as fixed effects
summary(m1 <- lm(Length ~ Subject/Loudness - 1, data = dta))
## 
## Call:
## lm(formula = Length ~ Subject/Loudness - 1, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3318 -0.1073  0.0023  0.1485  0.3242 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)
## SubjectA          -4.49370    0.48664   -9.23  2.8e-10
## SubjectB          -4.58530    0.48664   -9.42  1.8e-10
## SubjectC          -2.29360    0.48664   -4.71  5.2e-05
## SubjectD          -2.91940    0.48664   -6.00  1.4e-06
## SubjectE          -5.38510    0.48664  -11.07  4.1e-12
## SubjectF          -2.42120    0.48664   -4.98  2.5e-05
## SubjectG          -2.81090    0.48664   -5.78  2.6e-06
## SubjectH          -3.42070    0.48664   -7.03  8.2e-08
## SubjectI          -1.94870    0.48664   -4.00  0.00038
## SubjectJ          -1.75910    0.48664   -3.61  0.00109
## SubjectA:Loudness  0.10265    0.00681   15.06  1.6e-15
## SubjectB:Loudness  0.09355    0.00681   13.73  1.8e-14
## SubjectC:Loudness  0.06950    0.00681   10.20  2.9e-11
## SubjectD:Loudness  0.07506    0.00681   11.02  4.6e-12
## SubjectE:Loudness  0.10391    0.00681   15.25  1.1e-15
## SubjectF:Loudness  0.07740    0.00681   11.36  2.2e-12
## SubjectG:Loudness  0.06497    0.00681    9.53  1.4e-10
## SubjectH:Loudness  0.08223    0.00681   12.07  4.9e-13
## SubjectI:Loudness  0.06561    0.00681    9.63  1.1e-10
## SubjectJ:Loudness  0.05525    0.00681    8.11  4.7e-09
## 
## Residual standard error: 0.215 on 30 degrees of freedom
## Multiple R-squared:  0.996,  Adjusted R-squared:  0.993 
## F-statistic:  369 on 20 and 30 DF,  p-value: <2e-16
# augument data with model fit
dta %>% mutate( fit1 <- fitted(m1) )
##    Subject Loudness Length    sd n fit1 <- fitted(m1)
## 1        A       50  0.379 0.507 3             0.6388
## 2        A       60  1.727 0.904 3             1.6653
## 3        A       70  3.016 0.553 3             2.6918
## 4        A       80  3.924 0.363 3             3.7183
## 5        A       90  4.413 0.092 3             4.7448
## 6        B       50  0.260 0.077 3             0.0922
## 7        B       60  0.833 0.287 3             1.0277
## 8        B       70  2.009 0.396 3             1.9632
## 9        B       80  2.720 0.124 3             2.8987
## 10       B       90  3.994 0.068 3             3.8342
## 11       C       50  1.072 0.106 3             1.1814
## 12       C       60  1.942 0.324 3             1.8764
## 13       C       70  2.700 0.319 3             2.5714
## 14       C       80  3.250 0.137 3             3.2664
## 15       C       90  3.893 0.356 3             3.9614
## 16       D       50  0.697 0.288 3             0.8336
## 17       D       60  1.787 0.121 3             1.5842
## 18       D       70  2.283 0.139 3             2.3348
## 19       D       80  3.127 0.331 3             3.0854
## 20       D       90  3.780 0.314 3             3.8360
## 21       E       50 -0.520 0.168 3            -0.1896
## 22       E       60  1.124 0.550 3             0.8495
## 23       E       70  2.045 0.315 3             1.8886
## 24       E       80  3.113 0.247 3             2.9277
## 25       E       90  3.681 0.217 3             3.9668
## 26       F       50  1.348 0.842 3             1.4488
## 27       F       60  2.134 0.130 3             2.2228
## 28       F       70  3.249 0.626 3             2.9968
## 29       F       80  3.936 0.181 3             3.7708
## 30       F       90  4.317 0.107 3             4.5448
## 31       G       50  0.485 0.272 3             0.4376
## 32       G       60  1.158 0.413 3             1.0873
## 33       G       70  1.585 0.240 3             1.7370
## 34       G       80  2.289 0.102 3             2.3867
## 35       G       90  3.168 0.118 3             3.0364
## 36       H       50  0.682 0.189 3             0.6908
## 37       H       60  1.684 0.231 3             1.5131
## 38       H       70  2.090 0.540 3             2.3354
## 39       H       80  3.171 0.449 3             3.1577
## 40       H       90  4.050 0.309 3             3.9800
## 41       I       50  1.201 0.089 3             1.3318
## 42       I       60  2.142 0.514 3             1.9879
## 43       I       70  2.543 0.821 3             2.6440
## 44       I       80  3.563 0.283 3             3.3001
## 45       I       90  3.771 0.378 3             3.9562
## 46       J       50  1.111 0.174 3             1.0034
## 47       J       60  1.500 0.112 3             1.5559
## 48       J       70  2.010 0.555 3             2.1084
## 49       J       80  2.595 0.013 3             2.6609
## 50       J       90  3.326 0.289 3             3.2134

0.8 regression model for all with individual parameters

ggplot(dta, aes(Loudness, Length, group = Subject, alpha = Subject))+
 geom_path() +
 geom_point(aes(Loudness, Length, group = Subject, alpha = Subject)) +
 coord_cartesian(ylim = c(-1, 5)) +
 labs(x = "Loudness (dB)", y = "Average log(Length)") +
 theme(legend.position = "NONE")
## Warning: Using alpha for a discrete variable is not advised.