setwd("/Users/isaiahmireles/Downloads")
df <- read.csv("lighting copy-1.csv")
str(df)
## 'data.frame': 1200 obs. of 5 variables:
## $ observation : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Country : chr "United States of America" "United States of America" "United States of America" "United States of America" ...
## $ light : chr "light1" "light2" "light3" "light4" ...
## $ Rater : int 1 1 1 1 1 1 2 2 2 2 ...
## $ average_rating: num 7 7.7 7.8 7.8 7.7 7.5 4.7 4.4 5 4.9 ...
unique(df$Country) # fixed factor
## [1] "United States of America" "India"
unique(df$light) # fixed factor
## [1] "light1" "light2" "light3" "light4" "light5" "light6"
unique(df$Rater) # random factor
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200
md1 <- lm(average_rating ~ Country + light, dat = df)
summary(md1)
##
## Call:
## lm(formula = average_rating ~ Country + light, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5161 -0.6747 0.0086 0.6939 3.0333
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.86668 0.09482 61.874 < 2e-16 ***
## CountryUnited States of America -0.96508 0.07226 -13.356 < 2e-16 ***
## lightlight2 0.21450 0.11177 1.919 0.05520 .
## lightlight3 0.31050 0.11177 2.778 0.00555 **
## lightlight4 0.20800 0.11177 1.861 0.06299 .
## lightlight5 0.20450 0.11177 1.830 0.06754 .
## lightlight6 0.15150 0.11177 1.356 0.17551
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.118 on 1193 degrees of freedom
## Multiple R-squared: 0.1354, Adjusted R-squared: 0.1311
## F-statistic: 31.14 on 6 and 1193 DF, p-value: < 2.2e-16
library(car)
## Loading required package: carData
library(lattice)
library(effects)
## Warning: package 'effects' was built under R version 4.4.3
## Use the command
## lattice::trellis.par.set(effectsTheme())
## to customize lattice options for effects plots.
## See ?effectsTheme for details.
plot(allEffects(md1), ask=FALSE)
plot(md1)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df |> group_by(Country) |> summarize(sum = mean(average_rating))
## # A tibble: 2 × 2
## Country sum
## <chr> <dbl>
## 1 India 6.05
## 2 United States of America 5.08
t-val
\[ t = \frac{\hat{\beta}}{\text{Se}(\hat{\beta})} \]
library(lme4)
## Loading required package: Matrix
library(Matrix)
m2 <- lmer(average_rating ~ Country + light + (1|Rater), data=df)
m2
## Linear mixed model fit by REML ['lmerMod']
## Formula: average_rating ~ Country + light + (1 | Rater)
## Data: df
## REML criterion at convergence: 1305.917
## Random effects:
## Groups Name Std.Dev.
## Rater (Intercept) 1.0833
## Residual 0.2839
## Number of obs: 1200, groups: Rater, 200
## Fixed Effects:
## (Intercept) CountryUnited States of America
## 5.8667 -0.9651
## lightlight2 lightlight3
## 0.2145 0.3105
## lightlight4 lightlight5
## 0.2080 0.2045
## lightlight6
## 0.1515
summary(m2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: average_rating ~ Country + light + (1 | Rater)
## Data: df
##
## REML criterion at convergence: 1305.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.2105 -0.4611 0.0155 0.4517 3.8312
##
## Random effects:
## Groups Name Variance Std.Dev.
## Rater (Intercept) 1.1735 1.0833
## Residual 0.0806 0.2839
## Number of obs: 1200, groups: Rater, 200
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.86668 0.14804 39.629
## CountryUnited States of America -0.96508 0.17253 -5.594
## lightlight2 0.21450 0.02839 7.555
## lightlight3 0.31050 0.02839 10.937
## lightlight4 0.20800 0.02839 7.326
## lightlight5 0.20450 0.02839 7.203
## lightlight6 0.15150 0.02839 5.336
##
## Correlation of Fixed Effects:
## (Intr) CnUSoA lghtl2 lghtl3 lghtl4 lghtl5
## CntryUntSoA -0.845
## lightlight2 -0.096 0.000
## lightlight3 -0.096 0.000 0.500
## lightlight4 -0.096 0.000 0.500 0.500
## lightlight5 -0.096 0.000 0.500 0.500 0.500
## lightlight6 -0.096 0.000 0.500 0.500 0.500 0.500
plot(m2)
\[ Y_{ij} = \beta_0 + \beta_1 X_{ij} + u_j + \epsilon_{ij} \]
\[ \text{ICC} = \frac{\sigma^2_{\text{group}}}{\sigma^2_{\text{group}} + \sigma^2_{\text{residual}}} \]
vc <- as.data.frame(VarCorr(m2))
rater_var <- vc$vcov[vc$grp == "Rater"]
resid_var <- vc$vcov[vc$grp == "Residual"]
icc <- rater_var / (rater_var + resid_var)
icc
## [1] 0.9357286
Interpretation of ICC = 0.9357
average_rating is due to differences between
ratersMeaning, Ratings from the same rater are highly
consistent - Different raters have very different
baseline tendencies (some rate higher/lower) - The random
effect (1 | Rater) is crucial for
correctly modeling the data
Takeaway:
Most of the variation is driven by who is rating, not random
noise