setwd("~/Dropbox/Data Analysis")

Install ez

# install.packages("ez")

Loading packages

library(readr)
library(psych)
library(tibble)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.1 
## ✔ purrr   0.3.4      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()   masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
library(ggplot2)
library(dplyr)
library(knitr)
library(pwr)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## The following object is masked from 'package:psych':
## 
##     logit
library(devtools)
## Loading required package: usethis
library(effects)
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following object is masked from 'package:psych':
## 
##     describe
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(emmeans)
## 
## Attaching package: 'emmeans'
## 
## The following object is masked from 'package:devtools':
## 
##     test
library(pacman)
library(ez)
source('https://raw.githubusercontent.com/joshuacorrell/teachR/main/mcSummaryLm.R')

Importing data

SAT_data <-read.csv("SAT.csv", header = T) 

Creating Ws and ID variable

SAT_data_wide <- dplyr::mutate(SAT_data,
                               W0 = (SATV + SATM + RSATV + RSATM)/sqrt(4),
                               W_SUBJECT = ((-1)*SATV + SATM + (-1)*RSATV + RSATM)/sqrt(4),
                               W_REPORT = ((-1)*SATV + (-1)*SATM + RSATV + RSATM)/sqrt(4),
                               W_SUBXREPO = (SATV) + (-1)*SATM + (-1)*RSATV + RSATM/sqrt(4),
                               ID = row_number())

Interpreting W0

# Is there a difference in SAT scores across individuals?

SAT_W0_model  <- lm(W0 ~ 1, data = SAT_data_wide)

mcSummary(SAT_W0_model)
## lm(formula = W0 ~ 1, data = SAT_data_wide)
## 
## Omnibus ANOVA
##                  SS  df      MS EtaSq F p
## Model          0.00   0     Inf     0    
## Error      24264.72 169 143.578          
## Corr Total 24264.72 169 143.578          
## 
##    RMSE AdjEtaSq
##  11.982        0
## 
## Coefficients
##                 Est StErr       t  SSR(3) EtaSq tol  CI_2.5 CI_97.5 p
## (Intercept) 109.356 0.919 118.993 2032981 0.988  NA 107.542  111.17 0

Interpreting W_SUBJECT

# Do SAT scores differ by the test subject?

SAT_W_SUBJECT_model  <- lm(W_SUBJECT ~ 1, data = SAT_data_wide)

mcSummary(SAT_W_SUBJECT_model)
## lm(formula = W_SUBJECT ~ 1, data = SAT_data_wide)
## 
## Omnibus ANOVA
##                  SS  df     MS EtaSq F p
## Model          0.00   0    Inf     0    
## Error      13065.29 169 77.309          
## Corr Total 13065.29 169 77.309          
## 
##   RMSE AdjEtaSq
##  8.793        0
## 
## Coefficients
##               Est StErr    t   SSR(3) EtaSq tol CI_2.5 CI_97.5 p
## (Intercept) 6.035 0.674 8.95 6192.212 0.322  NA  4.704   7.367 0

Interpreting W_REPORT

# Do self-reported and actual SAT scores differ frome each other?

SAT_W_REPORT_model  <- lm(W_REPORT ~ 1, data = SAT_data_wide)

mcSummary(SAT_W_REPORT_model)
## lm(formula = W_REPORT ~ 1, data = SAT_data_wide)
## 
## Omnibus ANOVA
##                 SS  df    MS EtaSq F p
## Model        0.000   0   Inf     0    
## Error      480.778 169 2.845          
## Corr Total 480.778 169 2.845          
## 
##   RMSE AdjEtaSq
##  1.687        0
## 
## Coefficients
##                Est StErr      t SSR(3) EtaSq tol CI_2.5 CI_97.5     p
## (Intercept) -0.379 0.129 -2.933 24.472 0.048  NA -0.635  -0.124 0.004

Interpreting W_SUBXREPO

# Do self-reported and actual SAT scores differ frome each other depending on the subject of the test?

SAT_W_SUBXREPO_model  <- lm(W_SUBXREPO ~ 1, data = SAT_data_wide)

mcSummary(SAT_W_SUBXREPO_model)
## lm(formula = W_SUBXREPO ~ 1, data = SAT_data_wide)
## 
## Omnibus ANOVA
##                  SS  df     MS EtaSq F p
## Model         0.000   0    Inf     0    
## Error      7066.278 169 41.812          
## Corr Total 7066.278 169 41.812          
## 
##   RMSE AdjEtaSq
##  6.466        0
## 
## Coefficients
##                 Est StErr       t   SSR(3) EtaSq tol CI_2.5 CI_97.5 p
## (Intercept) -27.621 0.496 -55.694 129692.5 0.948  NA  -28.6 -26.642 0

Renaming SAT Scores

SAT_data_wide <- rename(SAT_data_wide, Self.M = SATM, Self.V = SATV, Real.M = RSATM, Real.V = RSATV) 

Converting from Wide to Long

SAT_data_long <- gather(SAT_data_wide, key = type.subject, value = Score, Self.M, Self.V, Real.M, Real.V) %>% 
  dplyr::select(-W0, -W_SUBJECT, -W_REPORT, -W_SUBXREPO) %>% 
  separate(type.subject, into = c("Type", "Subject")) %>% 
  mutate(Type = dplyr::recode(Type, "Self" = 1, "Real" = 2),
        Subject = dplyr::recode(Subject, "M" = -1, "V" = 1))

Calculating SS not Accounting for dependence (long format)

SAT_long_model <- lm(Score ~ 1 + Type + Subject + Type:Subject, data = SAT_data_long)

mcSummary(SAT_long_model)
## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif
## lm(formula = Score ~ 1 + Type + Subject + Type:Subject, data = SAT_data_long)
## 
## Omnibus ANOVA
##                  SS  df       MS EtaSq      F p
## Model       6287.86   3 2095.953  0.14 36.549 0
## Error      38766.11 676   57.346               
## Corr Total 45053.97 679   66.353               
## 
##   RMSE AdjEtaSq
##  7.573    0.136
## 
## Coefficients
##                 Est StErr      t     SSR(3) EtaSq tol CI_2.5 CI_97.5     p
## (Intercept)  55.247 0.918 60.160 207552.151 0.843  NA 53.444  57.050 0.000
## Type         -0.379 0.581 -0.653     24.472 0.001 1.0 -1.520   0.761 0.514
## Subject      -2.047 0.918 -2.229    284.951 0.007 0.1 -3.850  -0.244 0.026
## Type:Subject -0.647 0.581 -1.114     71.176 0.002 0.1 -1.787   0.493 0.266

Graphing

ggplot(SAT_data_long, aes(x = factor(Type), y = Score, fill = factor(Subject))) + 
  geom_bar(stat = "summary", fun.y = "mean", position = "dodge") + 
  scale_fill_brewer(name = "Subject", labels = c("Math", "Verbal"), palette = "Paired") + 
  scale_x_discrete(name = "Type of Score Reported", labels = c("Self Report", "Actual Score")) +  
  ggtitle("Self-Reported and Actual SAT Scores by Test Subject") +
  theme_minimal()
## Warning: Ignoring unknown parameters: fun.y
## No summary function supplied, defaulting to `mean_se()`

## Testing Same Analyses with Repeated Measures ANOVA

SAT_data_long$TypeF <- factor(SAT_data_long$Type, labels = c("Self", "Real"))

SAT_data_long$SubjectF <- factor(SAT_data_long$Subject, labels = c("Math", "Verbal"))

SAT_data_long_ANOVA = ezANOVA(data = SAT_data_long, dv = Score, 
                         wid = ID, 
                         within = .(TypeF, SubjectF), 
                         within_full = NULL, 
                         within_covariates = NULL, 
                         between = NULL, 
                         between_covariates = NULL, 
                         observed = TypeF, 
                         diff = NULL, 
                         reverse_diff = FALSE, 
                         type = 3, 
                         white.adjust = FALSE, 
                         detailed = TRUE, 
                         return_aov = FALSE)
## Warning: Converting "ID" to factor for ANOVA.
print(SAT_data_long_ANOVA)
## $ANOVA
##           Effect DFn DFd          SSn        SSd            F             p
## 1    (Intercept)   1 169 2.032981e+06 24264.7191 14159.393647 7.041056e-165
## 2          TypeF   1 169 2.447206e+01   480.7779     8.602262  3.823477e-03
## 3       SubjectF   1 169 6.192212e+03 13065.2882    80.096495  6.192837e-16
## 4 TypeF:SubjectF   1 169 7.117647e+01   955.3235    12.591361  5.018688e-04
##   p<.05          ges
## 1     * 0.9812428979
## 2     * 0.0006297208
## 3     * 0.1374398724
## 4     * 0.0018315299