installing necessary libraries
library(dplyr)
library(texreg)
library(lme4)
library(rmarkdown)
library(ggplot2)
library(GGally)
library(effects)
reading in and transforming data
all <- read.csv("~/Documents/Dropbox/Research/Eric/Study 2a/2a v2/all conds with beta delta.csv") %>% #reading in
mutate(cond_label = factor(ifelse(cond == 1, "no_default", #making a condition description column
ifelse(cond == 2, "SS_default",
ifelse(cond == 3, "LL_default", NA)))),
DV_label = factor(ifelse(DV == 0, "SS",
ifelse(DV == 1, "LL", NA))),
cond = factor(cond)) %>% #making condition a factor
select(-coding) #dropping coding column
the logistic regression below gives the effects of conditions 2 and 3 relative to condition 1, which I don’t think we want… so I use ANOVA instead to test for an overall effect of condition Note that DV is choice of SS/LL payment option, cond is the default condition (SS / LL / No default)
summary(glm(DV ~ cond, data=all, family="binomial"))
##
## Call:
## glm(formula = DV ~ cond, family = "binomial", data = all)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.3707 -1.1010 0.9957 1.2557 1.2793
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.18232 0.21409 -0.852 0.3944
## cond2 -0.05407 0.25635 -0.211 0.8330
## cond3 0.62601 0.25914 2.416 0.0157 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 677.65 on 488 degrees of freedom
## Residual deviance: 664.88 on 486 degrees of freedom
## AIC: 670.88
##
## Number of Fisher Scoring iterations: 4
summary(aov(DV ~ cond, data=all))
## Df Sum Sq Mean Sq F value Pr(>F)
## cond 2 3.17 1.5861 6.477 0.00167 **
## Residuals 486 119.02 0.2449
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
given the significant effect of default condition revealed by ANOVA, test for differences in the proportions of Ps choosing SS between each condition
first just look at the numbers:
table(all$cond_label, all$DV_label) # columns: 0=SS, 1=LL. rows: 1=no default, 2=SS default, 3=LL default
##
## LL SS
## LL_default 120 77
## no_default 40 48
## SS_default 90 114
prop.test(table(all$cond, all$DV)) #proportions shown = % choosing SS
##
## 3-sample test for equality of proportions without continuity
## correction
##
## data: table(all$cond, all$DV)
## X-squared = 12.6956, df = 2, p-value = 0.001751
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3
## 0.5454545 0.5588235 0.3908629
centering continuous measures
all$betac <- all$beta - mean(all$beta)
all$deltac<- all$delta - mean(all$delta)
all$adr<- exp(all$deltac * 365)
summary(glm(DV ~ betac, data=all, family="binomial"))
##
## Call:
## glm(formula = DV ~ betac, family = "binomial", data = all)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1908 -1.1331 0.4849 1.0409 2.4620
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.01236 0.09823 -0.126 0.9
## betac 6.37032 0.96104 6.629 3.39e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 677.65 on 488 degrees of freedom
## Residual deviance: 611.16 on 487 degrees of freedom
## AIC: 615.16
##
## Number of Fisher Scoring iterations: 4
summary(aov(DV ~ betac * cond, data=all)) #no interaction between beta and condition
## Df Sum Sq Mean Sq F value Pr(>F)
## betac 1 13.74 13.735 63.768 1.03e-14 ***
## cond 2 3.96 1.978 9.184 0.000122 ***
## betac:cond 2 0.46 0.230 1.068 0.344518
## Residuals 483 104.04 0.215
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
but there is an interaction between delta and default condition
summary(aov(DV ~ deltac * cond, data=all))
## Df Sum Sq Mean Sq F value Pr(>F)
## deltac 1 17.03 17.026 82.273 < 2e-16 ***
## cond 2 3.31 1.657 8.005 0.00038 ***
## deltac:cond 2 1.90 0.948 4.581 0.01069 *
## Residuals 483 99.95 0.207
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
and also between annualized discount rate (ADR) and default condition - we’ll use ADR from here on
summary(aov(DV ~ adr * cond, data=all))
## Df Sum Sq Mean Sq F value Pr(>F)
## adr 1 8.38 8.379 37.824 1.62e-09 ***
## cond 2 3.24 1.619 7.309 0.000746 ***
## adr:cond 2 3.58 1.788 8.073 0.000356 ***
## Residuals 483 106.99 0.222
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
beta and delta (centered) are correlated:
##splom
ggpairs(all %>% select(betac, deltac),
lower=list(continuous="smooth", params=c(colour="blue")),
diag=list(continuous="density", params=c(colour="blue")),
upper=list(params=list(corSize=5)), axisLabels='show',
title = "beta / delta correlation")
evaluate the nested logit as per eric’s suggestion - this tell us the effect of ADR in each condition:
nest <- glm(formula = DV ~ adr %in% cond, family = "binomial", data = all)
summary(nest)
##
## Call:
## glm(formula = DV ~ adr %in% cond, family = "binomial", data = all)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4755 -1.2450 0.9121 1.0025 2.4335
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.72213 0.13383 5.396 6.81e-08 ***
## adr:cond1 -0.22190 0.07918 -2.802 0.00507 **
## adr:cond2 -0.78810 0.14546 -5.418 6.02e-08 ***
## adr:cond3 -0.31935 0.07957 -4.014 5.98e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 677.65 on 488 degrees of freedom
## Residual deviance: 614.73 on 485 degrees of freedom
## AIC: 622.73
##
## Number of Fisher Scoring iterations: 5
we can then plot these three lines
effect_test <- effect("adr*cond", nest)
summary(effect_test)
##
## adr*cond effect
## cond
## adr 1 2 3
## 5 0.404355569 3.848026e-02 0.294291856
## 10 0.182898940 7.773287e-04 0.077888344
## 15 0.068733501 1.512162e-05 0.016821213
## 20 0.023758062 2.939459e-07 0.003453490
## 25 0.007960523 5.713867e-09 0.000701442
##
## Lower 95 Percent Confidence Limits
## cond
## adr 1 2 3
## 5 0.2459651645 1.083860e-02 1.736945e-01
## 10 0.0481828199 5.169282e-05 1.953946e-02
## 15 0.0076779114 2.422959e-07 1.859056e-03
## 20 0.0011769221 1.133904e-09 1.734743e-04
## 25 0.0001791562 5.303329e-12 1.614263e-05
##
## Upper 95 Percent Confidence Limits
## cond
## adr 1 2 3
## 5 0.5855381 1.275279e-01 0.45274317
## 10 0.4974281 1.157118e-02 0.26362855
## 15 0.4131600 9.428751e-04 0.13581713
## 20 0.3344999 7.619489e-05 0.06473589
## 25 0.2643534 6.156147e-06 0.02961790
plot(effect_test, multiline=T, ylab="SS", sug=F)
try spotlight analysis - at 1st and 3rd quartiles of continuous delta measure
summary(all$deltac)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.004604 -0.001713 -0.001225 0.000000 0.001571 0.009228
high_D <- subset(all, deltac >=.001571) #impatient
low_D <- subset(all, deltac <= -.001713) #patient
summary(glm(DV ~ cond, data=high_D, family="binomial"))
##
## Call:
## glm(formula = DV ~ cond, family = "binomial", data = high_D)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9005 -0.7585 -0.6681 -0.6681 1.7941
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6931 0.4330 -1.601 0.109
## cond2 -0.6931 0.5590 -1.240 0.215
## cond3 -0.4055 0.5465 -0.742 0.458
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 136.10 on 121 degrees of freedom
## Residual deviance: 134.58 on 119 degrees of freedom
## AIC: 140.58
##
## Number of Fisher Scoring iterations: 4
summary(glm(DV ~ cond, data=low_D, family="binomial"))
##
## Call:
## glm(formula = DV ~ cond, family = "binomial", data = low_D)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0184 0.5287 0.5287 0.7502 1.1073
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.1671 0.4097 0.408 0.68344
## cond2 0.9569 0.5194 1.842 0.06542 .
## cond3 1.7301 0.5996 2.885 0.00391 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 136.66 on 122 degrees of freedom
## Residual deviance: 127.78 on 120 degrees of freedom
## AIC: 133.78
##
## Number of Fisher Scoring iterations: 4
the proportions these tests give are the % of people choosing SS
prop.test(table(high_D$cond, high_D$DV)) #impatient people choose SS often, but default condition didn't make a difference
##
## 3-sample test for equality of proportions without continuity
## correction
##
## data: table(high_D$cond, high_D$DV)
## X-squared = 1.5618, df = 2, p-value = 0.458
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3
## 0.6666667 0.8000000 0.7500000
prop.test(table(low_D$cond, low_D$DV)) #patient people choose SS seldom, and default cond makes a difference
##
## 3-sample test for equality of proportions without continuity
## correction
##
## data: table(low_D$cond, low_D$DV)
## X-squared = 9.1961, df = 2, p-value = 0.01007
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3
## 0.4583333 0.2452830 0.1304348
note: hid (patient people) make a ‘preference-consistent’ choice when they choose the LL option. Since DV is coded as 1=LL 0=SS, the DV variable is also a ‘chose pref-consistent option’variable. lod(impatient people) make a ’pref-consistent’ choice when DV=0, so for these people the proportion choosing the pref-consistent option is 100 - the proportion given by the prop tests above
significance tests for 2 proportions at a time:
hiD12 <- subset(high_D, cond!=3)
hiD12$cond <- droplevels(hiD12$cond)
prop.test(table(hiD12$cond, hiD12$DV))
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: table(hiD12$cond, hiD12$DV)
## X-squared = 0.9256, df = 1, p-value = 0.336
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.3829399 0.1162732
## sample estimates:
## prop 1 prop 2
## 0.6666667 0.8000000
hiD13 <- subset(high_D, cond!=2)
hiD13$cond <- droplevels(hiD13$cond)
prop.test(table(hiD13$cond, hiD13$DV))
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: table(hiD13$cond, hiD13$DV)
## X-squared = 0.2163, df = 1, p-value = 0.6418
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.3394717 0.1728050
## sample estimates:
## prop 1 prop 2
## 0.6666667 0.7500000
hiD23 <- subset(high_D, cond!=1)
hiD23$cond <- droplevels(hiD23$cond)
prop.test(table(hiD23$cond, hiD23$DV)) #none of these comparisons are significant - the default condition doesn't seem to shift choice for impatient people
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: table(hiD23$cond, hiD23$DV)
## X-squared = 0.1231, df = 1, p-value = 0.7257
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.1356388 0.2356388
## sample estimates:
## prop 1 prop 2
## 0.80 0.75
loD12 <- subset(low_D, cond!=3)
loD12$cond <- droplevels(loD12$cond)
prop.test(table(loD12$cond, loD12$DV))
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: table(loD12$cond, loD12$DV)
## X-squared = 2.5725, df = 1, p-value = 0.1087
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.04777028 0.47387091
## sample estimates:
## prop 1 prop 2
## 0.4583333 0.2452830
loD13 <- subset(low_D, cond!=2)
loD13$cond <- droplevels(loD13$cond)
prop.test(table(loD13$cond, loD13$DV))
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: table(loD13$cond, loD13$DV)
## X-squared = 7.5249, df = 1, p-value = 0.006085
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.07436431 0.58143279
## sample estimates:
## prop 1 prop 2
## 0.4583333 0.1304348
loD23 <- subset(low_D, cond!=1)
loD23$cond <- droplevels(loD23$cond)
prop.test(table(loD23$cond, loD23$DV)) #two of these are at least marginal - the default condition seems capable of shifting choice for patient people
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: table(loD23$cond, loD23$DV)
## X-squared = 1.4194, df = 1, p-value = 0.2335
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.05674753 0.28644400
## sample estimates:
## prop 1 prop 2
## 0.2452830 0.1304348
testing for overall effect of ‘patient’ variable
high_D$patient <- 0
low_D$patient <- 1
combined <- rbind(high_D, low_D)
combined <- combined %>% mutate(patient_label = factor(ifelse(patient == 0, "impatient",
ifelse(patient == 1, "patient", NA))))
prop.test(table(combined$patient, combined$DV_label))
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: table(combined$patient, combined$DV_label)
## X-squared = 61.749, df = 1, p-value = 3.902e-15
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.6260549 -0.3943370
## sample estimates:
## prop 1 prop 2
## 0.2459016 0.7560976