setwd("~/Dropbox/Data Analysis")
# install.packages("ez")
library(readr)
library(psych)
library(tibble)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ purrr 0.3.4 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
library(dplyr)
library(knitr)
library(pwr)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
##
## The following object is masked from 'package:psych':
##
## logit
library(devtools)
## Loading required package: usethis
library(effects)
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
## The following object is masked from 'package:psych':
##
## describe
##
## The following objects are masked from 'package:base':
##
## format.pval, units
library(emmeans)
##
## Attaching package: 'emmeans'
##
## The following object is masked from 'package:devtools':
##
## test
library(pacman)
library(ez)
source('https://raw.githubusercontent.com/joshuacorrell/teachR/main/mcSummaryLm.R')
SAT_data <-read.csv("SAT.csv", header = T)
SAT_data_wide <- dplyr::mutate(SAT_data,
W0 = (SATV + SATM + RSATV + RSATM)/sqrt(4),
W_SUBJECT = ((-1)*SATV + SATM + (-1)*RSATV + RSATM)/sqrt(4),
W_REPORT = ((-1)*SATV + (-1)*SATM + RSATV + RSATM)/sqrt(4),
W_SUBXREPO = (SATV) + (-1)*SATM + (-1)*RSATV + RSATM/sqrt(4),
ID = row_number())
# Is there a difference in SAT scores across individuals?
SAT_W0_model <- lm(W0 ~ 1, data = SAT_data_wide)
mcSummary(SAT_W0_model)
## lm(formula = W0 ~ 1, data = SAT_data_wide)
##
## Omnibus ANOVA
## SS df MS EtaSq F p
## Model 0.00 0 Inf 0
## Error 24264.72 169 143.578
## Corr Total 24264.72 169 143.578
##
## RMSE AdjEtaSq
## 11.982 0
##
## Coefficients
## Est StErr t SSR(3) EtaSq tol CI_2.5 CI_97.5 p
## (Intercept) 109.356 0.919 118.993 2032981 0.988 NA 107.542 111.17 0
# Do SAT scores differ by the test subject?
SAT_W_SUBJECT_model <- lm(W_SUBJECT ~ 1, data = SAT_data_wide)
mcSummary(SAT_W_SUBJECT_model)
## lm(formula = W_SUBJECT ~ 1, data = SAT_data_wide)
##
## Omnibus ANOVA
## SS df MS EtaSq F p
## Model 0.00 0 Inf 0
## Error 13065.29 169 77.309
## Corr Total 13065.29 169 77.309
##
## RMSE AdjEtaSq
## 8.793 0
##
## Coefficients
## Est StErr t SSR(3) EtaSq tol CI_2.5 CI_97.5 p
## (Intercept) 6.035 0.674 8.95 6192.212 0.322 NA 4.704 7.367 0
# Do self-reported and actual SAT scores differ frome each other?
SAT_W_REPORT_model <- lm(W_REPORT ~ 1, data = SAT_data_wide)
mcSummary(SAT_W_REPORT_model)
## lm(formula = W_REPORT ~ 1, data = SAT_data_wide)
##
## Omnibus ANOVA
## SS df MS EtaSq F p
## Model 0.000 0 Inf 0
## Error 480.778 169 2.845
## Corr Total 480.778 169 2.845
##
## RMSE AdjEtaSq
## 1.687 0
##
## Coefficients
## Est StErr t SSR(3) EtaSq tol CI_2.5 CI_97.5 p
## (Intercept) -0.379 0.129 -2.933 24.472 0.048 NA -0.635 -0.124 0.004
# Do self-reported and actual SAT scores differ frome each other depending on the subject of the test?
SAT_W_SUBXREPO_model <- lm(W_SUBXREPO ~ 1, data = SAT_data_wide)
mcSummary(SAT_W_SUBXREPO_model)
## lm(formula = W_SUBXREPO ~ 1, data = SAT_data_wide)
##
## Omnibus ANOVA
## SS df MS EtaSq F p
## Model 0.000 0 Inf 0
## Error 7066.278 169 41.812
## Corr Total 7066.278 169 41.812
##
## RMSE AdjEtaSq
## 6.466 0
##
## Coefficients
## Est StErr t SSR(3) EtaSq tol CI_2.5 CI_97.5 p
## (Intercept) -27.621 0.496 -55.694 129692.5 0.948 NA -28.6 -26.642 0
SAT_data_wide <- rename(SAT_data_wide, Self.M = SATM, Self.V = SATV, Real.M = RSATM, Real.V = RSATV)
SAT_data_long <- gather(SAT_data_wide, key = type.subject, value = Score, Self.M, Self.V, Real.M, Real.V) %>%
dplyr::select(-W0, -W_SUBJECT, -W_REPORT, -W_SUBXREPO) %>%
separate(type.subject, into = c("Type", "Subject")) %>%
mutate(Type = dplyr::recode(Type, "Self" = 1, "Real" = 2),
Subject = dplyr::recode(Subject, "M" = -1, "V" = 1))
SAT_long_model <- lm(Score ~ 1 + Type + Subject + Type:Subject, data = SAT_data_long)
mcSummary(SAT_long_model)
## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif
## lm(formula = Score ~ 1 + Type + Subject + Type:Subject, data = SAT_data_long)
##
## Omnibus ANOVA
## SS df MS EtaSq F p
## Model 6287.86 3 2095.953 0.14 36.549 0
## Error 38766.11 676 57.346
## Corr Total 45053.97 679 66.353
##
## RMSE AdjEtaSq
## 7.573 0.136
##
## Coefficients
## Est StErr t SSR(3) EtaSq tol CI_2.5 CI_97.5 p
## (Intercept) 55.247 0.918 60.160 207552.151 0.843 NA 53.444 57.050 0.000
## Type -0.379 0.581 -0.653 24.472 0.001 1.0 -1.520 0.761 0.514
## Subject -2.047 0.918 -2.229 284.951 0.007 0.1 -3.850 -0.244 0.026
## Type:Subject -0.647 0.581 -1.114 71.176 0.002 0.1 -1.787 0.493 0.266
ggplot(SAT_data_long, aes(x = factor(Type), y = Score, fill = factor(Subject))) +
geom_bar(stat = "summary", fun.y = "mean", position = "dodge") +
scale_fill_brewer(name = "Subject", labels = c("Math", "Verbal"), palette = "Paired") +
scale_x_discrete(name = "Type of Score Reported", labels = c("Self Report", "Actual Score")) +
ggtitle("Self-Reported and Actual SAT Scores by Test Subject") +
theme_minimal()
## Warning: Ignoring unknown parameters: fun.y
## No summary function supplied, defaulting to `mean_se()`
## Testing Same Analyses with Repeated Measures ANOVA
SAT_data_long$TypeF <- factor(SAT_data_long$Type, labels = c("Self", "Real"))
SAT_data_long$SubjectF <- factor(SAT_data_long$Subject, labels = c("Math", "Verbal"))
SAT_data_long_ANOVA = ezANOVA(data = SAT_data_long, dv = Score,
wid = ID,
within = .(TypeF, SubjectF),
within_full = NULL,
within_covariates = NULL,
between = NULL,
between_covariates = NULL,
observed = TypeF,
diff = NULL,
reverse_diff = FALSE,
type = 3,
white.adjust = FALSE,
detailed = TRUE,
return_aov = FALSE)
## Warning: Converting "ID" to factor for ANOVA.
print(SAT_data_long_ANOVA)
## $ANOVA
## Effect DFn DFd SSn SSd F p
## 1 (Intercept) 1 169 2.032981e+06 24264.7191 14159.393647 7.041056e-165
## 2 TypeF 1 169 2.447206e+01 480.7779 8.602262 3.823477e-03
## 3 SubjectF 1 169 6.192212e+03 13065.2882 80.096495 6.192837e-16
## 4 TypeF:SubjectF 1 169 7.117647e+01 955.3235 12.591361 5.018688e-04
## p<.05 ges
## 1 * 0.9812428979
## 2 * 0.0006297208
## 3 * 0.1374398724
## 4 * 0.0018315299