installing necessary libraries

library(dplyr) 
library(texreg)
library(lme4)
library(rmarkdown)
library(ggplot2)
library(GGally)
library(effects)

reading in and transforming data

all <- read.csv("~/Documents/Dropbox/Research/Eric/Study 2a/2a v2/all conds with beta delta.csv") %>% #reading in
        mutate(cond_label = factor(ifelse(cond == 1, "no_default", #making a condition description column
                           ifelse(cond == 2, "SS_default",
                           ifelse(cond == 3, "LL_default", NA)))),
               DV_label = factor(ifelse(DV == 0, "SS",
                          ifelse(DV == 1, "LL", NA))), 
               cond = factor(cond)) %>% #making condition a factor 
        select(-coding) #dropping coding column

the logistic regression below gives the effects of conditions 2 and 3 relative to condition 1, which I don’t think we want… so I use ANOVA instead to test for an overall effect of condition Note that DV is choice of SS/LL payment option, cond is the default condition (SS / LL / No default)

summary(glm(DV ~ cond, data=all, family="binomial"))  
## 
## Call:
## glm(formula = DV ~ cond, family = "binomial", data = all)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3707  -1.1010   0.9957   1.2557   1.2793  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -0.18232    0.21409  -0.852   0.3944  
## cond2       -0.05407    0.25635  -0.211   0.8330  
## cond3        0.62601    0.25914   2.416   0.0157 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 677.65  on 488  degrees of freedom
## Residual deviance: 664.88  on 486  degrees of freedom
## AIC: 670.88
## 
## Number of Fisher Scoring iterations: 4
summary(aov(DV ~ cond, data=all))
##              Df Sum Sq Mean Sq F value  Pr(>F)   
## cond          2   3.17  1.5861   6.477 0.00167 **
## Residuals   486 119.02  0.2449                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

given the significant effect of default condition revealed by ANOVA, test for differences in the proportions of Ps choosing SS between each condition

first just look at the numbers:

table(all$cond_label, all$DV_label) # columns: 0=SS, 1=LL. rows: 1=no default, 2=SS default, 3=LL default
##             
##               LL  SS
##   LL_default 120  77
##   no_default  40  48
##   SS_default  90 114
prop.test(table(all$cond, all$DV)) #proportions shown = % choosing SS
## 
##  3-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(all$cond, all$DV)
## X-squared = 12.6956, df = 2, p-value = 0.001751
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3 
## 0.5454545 0.5588235 0.3908629

centering continuous measures

all$betac <- all$beta - mean(all$beta)
all$deltac<- all$delta - mean(all$delta)
all$adr<- exp(all$deltac * 365)

summary(glm(DV ~ betac, data=all, family="binomial"))
## 
## Call:
## glm(formula = DV ~ betac, family = "binomial", data = all)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1908  -1.1331   0.4849   1.0409   2.4620  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.01236    0.09823  -0.126      0.9    
## betac        6.37032    0.96104   6.629 3.39e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 677.65  on 488  degrees of freedom
## Residual deviance: 611.16  on 487  degrees of freedom
## AIC: 615.16
## 
## Number of Fisher Scoring iterations: 4
summary(aov(DV ~ betac * cond, data=all)) #no interaction between beta and condition
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## betac         1  13.74  13.735  63.768 1.03e-14 ***
## cond          2   3.96   1.978   9.184 0.000122 ***
## betac:cond    2   0.46   0.230   1.068 0.344518    
## Residuals   483 104.04   0.215                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

but there is an interaction between delta and default condition

summary(aov(DV ~ deltac * cond, data=all)) 
##              Df Sum Sq Mean Sq F value  Pr(>F)    
## deltac        1  17.03  17.026  82.273 < 2e-16 ***
## cond          2   3.31   1.657   8.005 0.00038 ***
## deltac:cond   2   1.90   0.948   4.581 0.01069 *  
## Residuals   483  99.95   0.207                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

and also between annualized discount rate (ADR) and default condition - we’ll use ADR from here on

summary(aov(DV ~ adr * cond, data=all)) 
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## adr           1   8.38   8.379  37.824 1.62e-09 ***
## cond          2   3.24   1.619   7.309 0.000746 ***
## adr:cond      2   3.58   1.788   8.073 0.000356 ***
## Residuals   483 106.99   0.222                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

beta and delta (centered) are correlated:

##splom
ggpairs(all %>% select(betac, deltac), 
        lower=list(continuous="smooth", params=c(colour="blue")),
        diag=list(continuous="density", params=c(colour="blue")),
        upper=list(params=list(corSize=5)), axisLabels='show',
        title = "beta / delta correlation")

evaluate the nested logit as per eric’s suggestion - this tell us the effect of ADR in each condition:

nest <- glm(formula = DV ~ adr %in% cond, family = "binomial", data = all)
summary(nest)
## 
## Call:
## glm(formula = DV ~ adr %in% cond, family = "binomial", data = all)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4755  -1.2450   0.9121   1.0025   2.4335  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.72213    0.13383   5.396 6.81e-08 ***
## adr:cond1   -0.22190    0.07918  -2.802  0.00507 ** 
## adr:cond2   -0.78810    0.14546  -5.418 6.02e-08 ***
## adr:cond3   -0.31935    0.07957  -4.014 5.98e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 677.65  on 488  degrees of freedom
## Residual deviance: 614.73  on 485  degrees of freedom
## AIC: 622.73
## 
## Number of Fisher Scoring iterations: 5

we can then plot these three lines

effect_test <- effect("adr*cond", nest)       
summary(effect_test)
## 
##  adr*cond effect
##     cond
## adr            1            2           3
##   5  0.404355569 3.848026e-02 0.294291856
##   10 0.182898940 7.773287e-04 0.077888344
##   15 0.068733501 1.512162e-05 0.016821213
##   20 0.023758062 2.939459e-07 0.003453490
##   25 0.007960523 5.713867e-09 0.000701442
## 
##  Lower 95 Percent Confidence Limits
##     cond
## adr             1            2            3
##   5  0.2459651645 1.083860e-02 1.736945e-01
##   10 0.0481828199 5.169282e-05 1.953946e-02
##   15 0.0076779114 2.422959e-07 1.859056e-03
##   20 0.0011769221 1.133904e-09 1.734743e-04
##   25 0.0001791562 5.303329e-12 1.614263e-05
## 
##  Upper 95 Percent Confidence Limits
##     cond
## adr          1            2          3
##   5  0.5855381 1.275279e-01 0.45274317
##   10 0.4974281 1.157118e-02 0.26362855
##   15 0.4131600 9.428751e-04 0.13581713
##   20 0.3344999 7.619489e-05 0.06473589
##   25 0.2643534 6.156147e-06 0.02961790
plot(effect_test, multiline=T, ylab="SS", sug=F)

try spotlight analysis - at 1st and 3rd quartiles of continuous delta measure

summary(all$deltac)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.004604 -0.001713 -0.001225  0.000000  0.001571  0.009228
high_D <- subset(all, deltac >=.001571) #impatient
low_D <- subset(all, deltac <= -.001713) #patient
summary(glm(DV ~ cond, data=high_D, family="binomial")) 
## 
## Call:
## glm(formula = DV ~ cond, family = "binomial", data = high_D)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9005  -0.7585  -0.6681  -0.6681   1.7941  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -0.6931     0.4330  -1.601    0.109
## cond2        -0.6931     0.5590  -1.240    0.215
## cond3        -0.4055     0.5465  -0.742    0.458
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 136.10  on 121  degrees of freedom
## Residual deviance: 134.58  on 119  degrees of freedom
## AIC: 140.58
## 
## Number of Fisher Scoring iterations: 4
summary(glm(DV ~ cond, data=low_D, family="binomial"))
## 
## Call:
## glm(formula = DV ~ cond, family = "binomial", data = low_D)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0184   0.5287   0.5287   0.7502   1.1073  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   0.1671     0.4097   0.408  0.68344   
## cond2         0.9569     0.5194   1.842  0.06542 . 
## cond3         1.7301     0.5996   2.885  0.00391 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 136.66  on 122  degrees of freedom
## Residual deviance: 127.78  on 120  degrees of freedom
## AIC: 133.78
## 
## Number of Fisher Scoring iterations: 4

the proportions these tests give are the % of people choosing SS

prop.test(table(high_D$cond, high_D$DV)) #impatient people choose SS often, but default condition didn't make a difference
## 
##  3-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(high_D$cond, high_D$DV)
## X-squared = 1.5618, df = 2, p-value = 0.458
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3 
## 0.6666667 0.8000000 0.7500000
prop.test(table(low_D$cond, low_D$DV)) #patient people choose SS seldom, and default cond makes a difference
## 
##  3-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(low_D$cond, low_D$DV)
## X-squared = 9.1961, df = 2, p-value = 0.01007
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3 
## 0.4583333 0.2452830 0.1304348

note: hid (patient people) make a ‘preference-consistent’ choice when they choose the LL option. Since DV is coded as 1=LL 0=SS, the DV variable is also a ‘chose pref-consistent option’variable. lod(impatient people) make a ’pref-consistent’ choice when DV=0, so for these people the proportion choosing the pref-consistent option is 100 - the proportion given by the prop tests above

significance tests for 2 proportions at a time:

hiD12 <- subset(high_D, cond!=3)
hiD12$cond <- droplevels(hiD12$cond)
prop.test(table(hiD12$cond, hiD12$DV))
## 
##  2-sample test for equality of proportions with continuity
##  correction
## 
## data:  table(hiD12$cond, hiD12$DV)
## X-squared = 0.9256, df = 1, p-value = 0.336
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.3829399  0.1162732
## sample estimates:
##    prop 1    prop 2 
## 0.6666667 0.8000000
hiD13 <- subset(high_D, cond!=2)
hiD13$cond <- droplevels(hiD13$cond)
prop.test(table(hiD13$cond, hiD13$DV))
## 
##  2-sample test for equality of proportions with continuity
##  correction
## 
## data:  table(hiD13$cond, hiD13$DV)
## X-squared = 0.2163, df = 1, p-value = 0.6418
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.3394717  0.1728050
## sample estimates:
##    prop 1    prop 2 
## 0.6666667 0.7500000
hiD23 <- subset(high_D, cond!=1)
hiD23$cond <- droplevels(hiD23$cond)
prop.test(table(hiD23$cond, hiD23$DV)) #none of these comparisons are significant - the default condition doesn't seem to shift choice for impatient people
## 
##  2-sample test for equality of proportions with continuity
##  correction
## 
## data:  table(hiD23$cond, hiD23$DV)
## X-squared = 0.1231, df = 1, p-value = 0.7257
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.1356388  0.2356388
## sample estimates:
## prop 1 prop 2 
##   0.80   0.75
loD12 <- subset(low_D, cond!=3)
loD12$cond <- droplevels(loD12$cond)
prop.test(table(loD12$cond, loD12$DV))
## 
##  2-sample test for equality of proportions with continuity
##  correction
## 
## data:  table(loD12$cond, loD12$DV)
## X-squared = 2.5725, df = 1, p-value = 0.1087
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.04777028  0.47387091
## sample estimates:
##    prop 1    prop 2 
## 0.4583333 0.2452830
loD13 <- subset(low_D, cond!=2)
loD13$cond <- droplevels(loD13$cond)
prop.test(table(loD13$cond, loD13$DV))
## 
##  2-sample test for equality of proportions with continuity
##  correction
## 
## data:  table(loD13$cond, loD13$DV)
## X-squared = 7.5249, df = 1, p-value = 0.006085
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.07436431 0.58143279
## sample estimates:
##    prop 1    prop 2 
## 0.4583333 0.1304348
loD23 <- subset(low_D, cond!=1)
loD23$cond <- droplevels(loD23$cond)
prop.test(table(loD23$cond, loD23$DV))  #two of these are at least marginal - the default condition seems capable of shifting choice for patient people
## 
##  2-sample test for equality of proportions with continuity
##  correction
## 
## data:  table(loD23$cond, loD23$DV)
## X-squared = 1.4194, df = 1, p-value = 0.2335
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.05674753  0.28644400
## sample estimates:
##    prop 1    prop 2 
## 0.2452830 0.1304348

testing for overall effect of ‘patient’ variable

high_D$patient <- 0
low_D$patient <- 1
combined <- rbind(high_D, low_D) 
combined <- combined %>% mutate(patient_label = factor(ifelse(patient == 0, "impatient",
                                                              ifelse(patient == 1, "patient", NA))))
prop.test(table(combined$patient, combined$DV_label))
## 
##  2-sample test for equality of proportions with continuity
##  correction
## 
## data:  table(combined$patient, combined$DV_label)
## X-squared = 61.749, df = 1, p-value = 3.902e-15
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.6260549 -0.3943370
## sample estimates:
##    prop 1    prop 2 
## 0.2459016 0.7560976