This is analytic replication of Jinkinson Smith’s submission for OQSPS. https://openpsych.net/forum/showthread.php?tid=305&pid=4379
library(pacman)
p_load(kirkegaard, readxl, rms)
Load data, create derived variables.
#data
d = readxl::read_xlsx("data/data.xlsx") %>% df_legalize_names()
#create new variables in order
d %<>% mutate(
#reverse coded
Expected = -1 * Expected,
MOV = Trump_2016 - Clinton_2016,
Trump_win = Trump_2016 > Clinton_2016,
total_visits = Trump_visits + Clinton_visits,
VAI = (Trump_visits - Clinton_visits) / total_visits,
VAI0 = if_else(is.na(VAI), true = 0, false = VAI),
shift = MOV - Expected
)
#correlations
cor_matrix(d[c("Trump_2016", "MOV", "Turnout", "shift", "VAI")], p_val = .95)
## Trump_2016 MOV Turnout
## Trump_2016 "1" "0.99 [p=2.68e-40]" "-0.27 [p=0.0537]"
## MOV "0.99 [p=2.68e-40]" "1" "-0.27 [p=0.0549]"
## Turnout "-0.27 [p=0.0537]" "-0.27 [p=0.0549]" "1"
## shift "0.91 [p=7.98e-20]" "0.92 [p=5.09e-22]" "-0.14 [p=0.339]"
## VAI "-0.01 [p=0.955]" "0.07 [p=0.725]" "-0.16 [p=0.426]"
## shift VAI
## Trump_2016 "0.91 [p=7.98e-20]" "-0.01 [p=0.955]"
## MOV "0.92 [p=5.09e-22]" "0.07 [p=0.725]"
## Turnout "-0.14 [p=0.339]" "-0.16 [p=0.426]"
## shift "1" "0.04 [p=0.853]"
## VAI "0.04 [p=0.853]" "1"
#scatterplots
GG_scatter(d, "Turnout", "MOV", case_names = "State")
GG_scatter(d, "VAI", "MOV", case_names = "State")
GG_scatter(d, "Turnout", "shift", case_names = "State")
#binary tests
#nonparametric
wilcox.test(Turnout ~ Trump_win, data = d, correct = T)
## Warning in wilcox.test.default(x = c(0.567, 0.701, 0.642, 0.644, 0.609, :
## cannot compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: Turnout by Trump_win
## W = 446.5, p-value = 0.01216
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(Turnout ~ Trump_win, data = d, correct = F)
## Warning in wilcox.test.default(x = c(0.567, 0.701, 0.642, 0.644, 0.609, :
## cannot compute exact p-value with ties
##
## Wilcoxon rank sum test
##
## data: Turnout by Trump_win
## W = 446.5, p-value = 0.01183
## alternative hypothesis: true location shift is not equal to 0
#regular t-test
t.test(Turnout ~ Trump_win, data = d, var.equal = T)
##
## Two Sample t-test
##
## data: Turnout by Trump_win
## t = 2.2518, df = 49, p-value = 0.02885
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.004045685 0.071173363
## sample estimates:
## mean in group FALSE mean in group TRUE
## 0.6301429 0.5925333
t.test(Turnout ~ Trump_win, data = d, var.equal = F)
##
## Welch Two Sample t-test
##
## data: Turnout by Trump_win
## t = 2.1137, df = 33.139, p-value = 0.04215
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.001414461 0.073804586
## sample estimates:
## mean in group FALSE mean in group TRUE
## 0.6301429 0.5925333
#plot
GG_group_means(d, "Turnout", "Trump_win", type = "points")
#obvious model
ols(MOV ~ Turnout + VAI, data = d)
## Frequencies of Missing Values Due to Each Variable
## MOV Turnout VAI
## 0 0 25
##
## Linear Regression Model
##
## ols(formula = MOV ~ Turnout + VAI, data = d)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 26 LR chi2 0.82 R2 0.031
## sigma12.9067 d.f. 2 R2 adj -0.053
## d.f. 23 Pr(> chi2) 0.6641 g 2.567
##
## Residuals
##
## Min 1Q Median 3Q Max
## -34.0233 -4.8478 0.7771 6.1377 24.1105
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 22.4171 28.3413 0.79 0.4370
## Turnout -34.4441 44.0676 -0.78 0.4424
## VAI 1.1360 5.1411 0.22 0.8271
##
#impute 0's for VAI
ols(MOV ~ Turnout + VAI0, data = d)
## Linear Regression Model
##
## ols(formula = MOV ~ Turnout + VAI0, data = d)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 51 LR chi2 3.88 R2 0.073
## sigma23.3760 d.f. 2 R2 adj 0.035
## d.f. 48 Pr(> chi2) 0.1439 g 7.225
##
## Residuals
##
## Min 1Q Median 3Q Max
## -86.785 -13.885 4.089 14.243 41.377
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 67.6484 33.1215 2.04 0.0466
## Turnout -105.0671 54.4630 -1.93 0.0596
## VAI0 -0.4256 7.2899 -0.06 0.9537
##
#weigh by visits too
ols(MOV ~ Turnout + VAI0 * total_visits, data = d)
## Frequencies of Missing Values Due to Each Variable
## MOV Turnout VAI0 total_visits
## 0 0 0 25
##
## Linear Regression Model
##
## ols(formula = MOV ~ Turnout + VAI0 * total_visits, data = d)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 26 LR chi2 1.17 R2 0.044
## sigma13.4170 d.f. 4 R2 adj -0.138
## d.f. 21 Pr(> chi2) 0.8834 g 3.054
##
## Residuals
##
## Min 1Q Median 3Q Max
## -33.7212 -4.7550 0.2351 3.9728 26.1732
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 24.8990 31.3762 0.79 0.4363
## Turnout -41.9539 50.5651 -0.83 0.4160
## VAI0 2.4395 6.1733 0.40 0.6967
## total_visits 0.0745 0.1653 0.45 0.6568
## VAI0 * total_visits 0.0961 0.7610 0.13 0.9007
##