setwd("/Users/isaiahmireles/Downloads")
df <- read.csv("lighting copy-1.csv")
str(df)
## 'data.frame':    1200 obs. of  5 variables:
##  $ observation   : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Country       : chr  "United States of America" "United States of America" "United States of America" "United States of America" ...
##  $ light         : chr  "light1" "light2" "light3" "light4" ...
##  $ Rater         : int  1 1 1 1 1 1 2 2 2 2 ...
##  $ average_rating: num  7 7.7 7.8 7.8 7.7 7.5 4.7 4.4 5 4.9 ...
unique(df$Country) # fixed factor  
## [1] "United States of America" "India"
unique(df$light) # fixed factor 
## [1] "light1" "light2" "light3" "light4" "light5" "light6"
unique(df$Rater) # random factor   
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200
md1 <- lm(average_rating ~ Country + light, dat = df)
summary(md1)
## 
## Call:
## lm(formula = average_rating ~ Country + light, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5161 -0.6747  0.0086  0.6939  3.0333 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      5.86668    0.09482  61.874  < 2e-16 ***
## CountryUnited States of America -0.96508    0.07226 -13.356  < 2e-16 ***
## lightlight2                      0.21450    0.11177   1.919  0.05520 .  
## lightlight3                      0.31050    0.11177   2.778  0.00555 ** 
## lightlight4                      0.20800    0.11177   1.861  0.06299 .  
## lightlight5                      0.20450    0.11177   1.830  0.06754 .  
## lightlight6                      0.15150    0.11177   1.356  0.17551    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.118 on 1193 degrees of freedom
## Multiple R-squared:  0.1354, Adjusted R-squared:  0.1311 
## F-statistic: 31.14 on 6 and 1193 DF,  p-value: < 2.2e-16
library(car)
## Loading required package: carData
library(lattice)
library(effects)
## Warning: package 'effects' was built under R version 4.4.3
## Use the command
##     lattice::trellis.par.set(effectsTheme())
##   to customize lattice options for effects plots.
## See ?effectsTheme for details.
plot(allEffects(md1), ask=FALSE)

plot(md1)

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df |> group_by(Country) |> summarize(sum = mean(average_rating))
## # A tibble: 2 × 2
##   Country                    sum
##   <chr>                    <dbl>
## 1 India                     6.05
## 2 United States of America  5.08

t-val

\[ t = \frac{\hat{\beta}}{\text{Se}(\hat{\beta})} \]

library(lme4)
## Loading required package: Matrix
library(Matrix)
m2 <- lmer(average_rating ~ Country + light + (1|Rater), data=df)
m2
## Linear mixed model fit by REML ['lmerMod']
## Formula: average_rating ~ Country + light + (1 | Rater)
##    Data: df
## REML criterion at convergence: 1305.917
## Random effects:
##  Groups   Name        Std.Dev.
##  Rater    (Intercept) 1.0833  
##  Residual             0.2839  
## Number of obs: 1200, groups:  Rater, 200
## Fixed Effects:
##                     (Intercept)  CountryUnited States of America  
##                          5.8667                          -0.9651  
##                     lightlight2                      lightlight3  
##                          0.2145                           0.3105  
##                     lightlight4                      lightlight5  
##                          0.2080                           0.2045  
##                     lightlight6  
##                          0.1515
summary(m2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: average_rating ~ Country + light + (1 | Rater)
##    Data: df
## 
## REML criterion at convergence: 1305.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.2105 -0.4611  0.0155  0.4517  3.8312 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Rater    (Intercept) 1.1735   1.0833  
##  Residual             0.0806   0.2839  
## Number of obs: 1200, groups:  Rater, 200
## 
## Fixed effects:
##                                 Estimate Std. Error t value
## (Intercept)                      5.86668    0.14804  39.629
## CountryUnited States of America -0.96508    0.17253  -5.594
## lightlight2                      0.21450    0.02839   7.555
## lightlight3                      0.31050    0.02839  10.937
## lightlight4                      0.20800    0.02839   7.326
## lightlight5                      0.20450    0.02839   7.203
## lightlight6                      0.15150    0.02839   5.336
## 
## Correlation of Fixed Effects:
##             (Intr) CnUSoA lghtl2 lghtl3 lghtl4 lghtl5
## CntryUntSoA -0.845                                   
## lightlight2 -0.096  0.000                            
## lightlight3 -0.096  0.000  0.500                     
## lightlight4 -0.096  0.000  0.500  0.500              
## lightlight5 -0.096  0.000  0.500  0.500  0.500       
## lightlight6 -0.096  0.000  0.500  0.500  0.500  0.500
plot(m2)

\[ Y_{ij} = \beta_0 + \beta_1 X_{ij} + u_j + \epsilon_{ij} \]

\[ \text{ICC} = \frac{\sigma^2_{\text{group}}}{\sigma^2_{\text{group}} + \sigma^2_{\text{residual}}} \]

intra cross correlation

vc <- as.data.frame(VarCorr(m2))

rater_var <- vc$vcov[vc$grp == "Rater"]
resid_var <- vc$vcov[vc$grp == "Residual"]

icc <- rater_var / (rater_var + resid_var)
icc
## [1] 0.9357286

Interpretation of ICC = 0.9357

Meaning, Ratings from the same rater are highly consistent - Different raters have very different baseline tendencies (some rate higher/lower) - The random effect (1 | Rater) is crucial for correctly modeling the data

Takeaway:
Most of the variation is driven by who is rating, not random noise