This is a script with exploratory analyses for the Partisan-Motivated Sampling Manuscript under R&R at JPSP. In particular, this script is addressing comments by Fiedler with regards to how more variability of ingroup experiences give rise to biased evaluations.
t.test(Survdf_Dis$sd_Ingroup, Survdf_Dis$sd_Outgroup) #yes, people do have more variable ingroup experiences.
##
## Welch Two Sample t-test
##
## data: Survdf_Dis$sd_Ingroup and Survdf_Dis$sd_Outgroup
## t = 2.1212, df = 4169.7, p-value = 0.03396
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.01961453 0.49820925
## sample estimates:
## mean of x mean of y
## 9.689036 9.430124
#here is nonparametric in case we violate assumptions
wilcox.test(Survdf_Dis$sd_Ingroup, Survdf_Dis$sd_Outgroup, alternative = "two.sided")
##
## Wilcoxon rank sum test with continuity correction
##
## data: Survdf_Dis$sd_Ingroup and Survdf_Dis$sd_Outgroup
## W = 2310874, p-value = 0.01106
## alternative hypothesis: true location shift is not equal to 0
Yes, people have sig more variable ingroup relative to outgroup experiences.
m <- lm(diff_score~sd_PL, data = Survdf_Dis)
summary(m)
##
## Call:
## lm(formula = diff_score ~ sd_PL, data = Survdf_Dis)
##
## Residuals:
## Min 1Q Median 3Q Max
## -78.960 -8.475 -0.520 6.708 85.584
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.11143 0.96840 1.148 0.2512
## sd_PL 0.20012 0.09132 2.191 0.0285 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.99 on 2398 degrees of freedom
## (27 observations deleted due to missingness)
## Multiple R-squared: 0.001998, Adjusted R-squared: 0.001582
## F-statistic: 4.802 on 1 and 2398 DF, p-value: 0.02853
ggplot(Survdf_Dis, aes(x = sd_PL, y = diff_score))+geom_point(size = .3) + geom_smooth(method='lm', formula= y~x)+
coord_cartesian(ylim = c(-25, 25)) + ggtitle("main effect of variance on evaluative biases (b = .2, SE = .09, t = 2.19, p = .028)")+ labs(x = "Total variance of sampled data", y = "Difference actual data and reported")
## Warning: Removed 27 rows containing non-finite values (stat_smooth).
## Warning: Removed 27 rows containing missing values (geom_point).
We a simple linear regression finds a main effect of variability, such that people with more variable ingroup experiences end up with more biased evals
m <- lm(diff_score~as.factor(Condition)*sd_Ingroup+as.factor(Condition)*sd_Outgroup, data = Survdf_Dis)
tab_model(m)
| Â | diff_score | ||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 8.46 | 5.83 – 11.09 | <0.001 |
| Condition [Same] | -10.32 | -14.58 – -6.06 | <0.001 |
| Condition [Worse] | -13.21 | -17.21 – -9.21 | <0.001 |
| sd_Ingroup | -0.10 | -0.30 – 0.10 | 0.312 |
| sd_Outgroup | -0.14 | -0.31 – 0.02 | 0.094 |
|
Condition [Same] * sd_Ingroup |
0.39 | 0.06 – 0.72 | 0.020 |
|
Condition [Worse] * sd_Ingroup |
0.31 | 0.02 – 0.59 | 0.034 |
|
Condition [Same] * sd_Outgroup |
0.14 | -0.17 – 0.44 | 0.379 |
|
Condition [Worse] * sd_Outgroup |
0.08 | -0.19 – 0.36 | 0.557 |
| Observations | 1914 | ||
| R2 / R2 adjusted | 0.134 / 0.131 | ||
ggplot(Survdf_Dis)+
geom_jitter(aes(sd_Ingroup,diff_score), colour="blue", size = .3) + geom_smooth(aes(sd_Ingroup,diff_score), method=lm, se=FALSE, color = "blue")+facet_wrap(~Condition)+
geom_jitter(aes(sd_Outgroup,diff_score), colour="red", size = .3) + geom_smooth(aes(sd_Outgroup,diff_score), method=lm, se=FALSE, color = "red") +facet_wrap(~Condition)+
coord_cartesian(ylim = c(-15, 15))+
labs(x = "Variance of ingroup (Blue) and outgroup (Red)", y = "Difference between actual data and reported")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 254 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 398 rows containing non-finite values (stat_smooth).
## Warning: Removed 254 rows containing missing values (geom_point).
## Warning: Removed 398 rows containing missing values (geom_point).
The effect of valence is driven by ingroup variability – which is moderated by condition
Survdf$tstart <- Survdf$Trial #start time
Survdf$tstop <- abs(1+Survdf$Trial) #shift stop up
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ 1 +cluster(Participant), data = Survdf),
xlab = "trials",
ylab = "Overall survival probability")
Cox regression model for interaction effects and then ggsurve for visuals
coxph(Surv(tstart, tstop, event) ~ Condition*Val, data = Survdf) %>%
gtsummary::tbl_regression(exp = TRUE)
## Warning: The `.dots` argument of `group_by()` is deprecated as of dplyr 1.0.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
| Characteristic | HR1 | 95% CI1 | p-value |
|---|---|---|---|
| Condition | |||
| Better | — | — | |
| Same | 0.82 | 0.72, 0.94 | 0.003 |
| Worse | 0.92 | 0.80, 1.05 | 0.2 |
| Val | |||
| neg | — | — | |
| pos | 0.88 | 0.78, 0.99 | 0.039 |
| Condition * Val | |||
| Same * pos | 1.28 | 1.03, 1.58 | 0.024 |
| Worse * pos | 1.23 | 1.02, 1.48 | 0.029 |
|
1
HR = Hazard Ratio, CI = Confidence Interval
|
|||
BetterSurvDF <- Survdf[which(Survdf$Condition=="Better"),]
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ Val +cluster(Participant), data = BetterSurvDF),
xlab = "Prob of survival in Better by Val",
ylab = "Prob of survival")
SameSurvDF <- Survdf[which(Survdf$Condition=="Same"),]
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ Val +cluster(Participant), data = SameSurvDF),
xlab = "Prob of survival in Same by Val",
ylab = "Prob of survival")
WorseSurvDF <- Survdf[which(Survdf$Condition=="Worse"),]
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ Val +cluster(Participant), data = WorseSurvDF),
xlab = "Prob of survival in Worse by Val",
ylab = "Prob of survival")
This model suggests that people are more likely to survive in the Better condition if they received a positive first sample are more likely to survive in the Worse condition if they received a negative first sample
Cox regression for p values and then ggsurve for visuals
Survdf$inVarDich[Survdf$sd_Ingroup > median(Survdf$sd_Ingroup, na.rm = T) ] <- "high"
Survdf$inVarDich[Survdf$sd_Ingroup < median(Survdf$sd_Ingroup, na.rm = T) ] <- "low"
coxph(Surv(tstart, tstop, event) ~ inVarDich*Condition, data = Survdf) %>%
gtsummary::tbl_regression(exp = TRUE)
| Characteristic | HR1 | 95% CI1 | p-value |
|---|---|---|---|
| inVarDich | |||
| high | — | — | |
| low | 1.00 | 0.88, 1.14 | >0.9 |
| Condition | |||
| Better | — | — | |
| Same | 0.90 | 0.77, 1.05 | 0.2 |
| Worse | 0.88 | 0.76, 1.01 | 0.072 |
| inVarDich * Condition | |||
| low * Same | 1.05 | 0.85, 1.30 | 0.7 |
| low * Worse | 1.38 | 1.13, 1.68 | 0.002 |
|
1
HR = Hazard Ratio, CI = Confidence Interval
|
|||
BetterSurvDF <- Survdf[which(Survdf$Condition=="Better"),]
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ inVarDich +cluster(Participant), data = BetterSurvDF),
xlab = "trials",
ylab = "Prob of survival in Better")
SameSurvDF <- Survdf[which(Survdf$Condition=="Same"),]
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ inVarDich +cluster(Participant), data = SameSurvDF),
xlab = "trials",
ylab = "Prob of survival in Same")
WorseSurvDF <- Survdf[which(Survdf$Condition=="Worse"),]
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ inVarDich +cluster(Participant), data = WorseSurvDF),
xlab = "trials",
ylab = "Prob of survival in Worse")
This model suggests that the variability of ingroup experiences does not influence survival in the better or Same condition, but finds a large effect in the Worse condition, such that people with more variable experiences have a 38% increase in the chance of surviving
Cox regression for p values and then ggsurve for visuals
Survdf$outVarDich[Survdf$sd_Outgroup > median(Survdf$sd_Outgroup, na.rm = T) ] <- "high"
Survdf$outVarDich[Survdf$sd_Outgroup < median(Survdf$sd_Outgroup, na.rm = T) ] <- "low"
coxph(Surv(tstart, tstop, event) ~ outVarDich*Condition, data = Survdf) %>%
gtsummary::tbl_regression(exp = TRUE)
| Characteristic | HR1 | 95% CI1 | p-value |
|---|---|---|---|
| outVarDich | |||
| high | — | — | |
| low | 1.28 | 1.12, 1.47 | <0.001 |
| Condition | |||
| Better | — | — | |
| Same | 1.07 | 0.92, 1.26 | 0.4 |
| Worse | 1.05 | 0.90, 1.22 | 0.5 |
| outVarDich * Condition | |||
| low * Same | 0.75 | 0.60, 0.93 | 0.010 |
| low * Worse | 0.93 | 0.76, 1.14 | 0.5 |
|
1
HR = Hazard Ratio, CI = Confidence Interval
|
|||
BetterSurvDF <- Survdf[which(Survdf$Condition=="Better"),]
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ outVarDich +cluster(Participant), data = BetterSurvDF),
xlab = "trials",
ylab = "Prob of survival in Better")
SameSurvDF <- Survdf[which(Survdf$Condition=="Same"),]
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ outVarDich +cluster(Participant), data = SameSurvDF),
xlab = "trials",
ylab = "Prob of survival in Same")
WorseSurvDF <- Survdf[which(Survdf$Condition=="Worse"),]
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ outVarDich +cluster(Participant), data = WorseSurvDF),
xlab = "trials",
ylab = "Prob of survival in Worse")
Here we find no difference for the variability of outgroup experiences in the Worse condition, but a significant effect in the Same and Better, such that participants were more likely to survive in the Same condition when they had less variable outgroup experiences. And more likely to survive in Better condition with more variable outgorup experiences. Not sure we need to report this as it’s pretty nuanced and we already see that the effect of variance is driven by ingroup
Cox regression for p values and then ggsurve for visuals
coxph(Surv(tstart, tstop, event) ~ Val*as.factor(PolStrength)*Condition, data = Survdf) %>%
gtsummary::tbl_regression(exp = TRUE)
| Characteristic | HR1 | 95% CI1 | p-value |
|---|---|---|---|
| Val | |||
| neg | — | — | |
| pos | 0.82 | 0.63, 1.07 | 0.2 |
| as.factor(PolStrength) | |||
| 1 | — | — | |
| 2 | 1.02 | 0.80, 1.31 | 0.9 |
| 3 | 0.82 | 0.64, 1.05 | 0.11 |
| Condition | |||
| Better | — | — | |
| Same | 0.77 | 0.59, 1.00 | 0.054 |
| Worse | 0.90 | 0.68, 1.20 | 0.5 |
| Val * as.factor(PolStrength) | |||
| pos * 2 | 0.99 | 0.71, 1.39 | >0.9 |
| pos * 3 | 1.16 | 0.83, 1.61 | 0.4 |
| Val * Condition | |||
| pos * Same | 1.98 | 1.28, 3.06 | 0.002 |
| pos * Worse | 1.41 | 0.96, 2.07 | 0.081 |
| as.factor(PolStrength) * Condition | |||
| 2 * Same | 0.93 | 0.65, 1.32 | 0.7 |
| 3 * Same | 1.23 | 0.88, 1.73 | 0.2 |
| 2 * Worse | 0.97 | 0.67, 1.41 | 0.9 |
| 3 * Worse | 1.06 | 0.75, 1.51 | 0.7 |
| Val * as.factor(PolStrength) * Condition | |||
| pos * 2 * Same | 0.81 | 0.46, 1.43 | 0.5 |
| pos * 3 * Same | 0.47 | 0.27, 0.81 | 0.007 |
| pos * 2 * Worse | 0.86 | 0.52, 1.41 | 0.5 |
| pos * 3 * Worse | 0.81 | 0.50, 1.32 | 0.4 |
|
1
HR = Hazard Ratio, CI = Confidence Interval
|
|||
SameSurvDF <- Survdf[which(Survdf$Condition=="Same"),]
SamePOSSurvDF <- SameSurvDF[which(SameSurvDF$Val=="pos"),]
SameNEGSurvDF <- SameSurvDF[which(SameSurvDF$Val=="neg"),]
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ PolStrength +cluster(Participant), data = SamePOSSurvDF),
xlab = "Survival in the Same condition following positive first sample",
ylab = "Prob of survival")
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ PolStrength +cluster(Participant), data = SameNEGSurvDF),
xlab = "Survival in the Same condition following negative first sample",
ylab = "Prob of survival")
WorseSurvDF <- Survdf[which(Survdf$Condition=="Worse"),]
WorsePOSSurvDF <- WorseSurvDF[which(WorseSurvDF$Val=="pos"),]
WorseNEGSurvDF <- WorseSurvDF[which(WorseSurvDF$Val=="neg"),]
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ PolStrength +cluster(Participant), data = WorsePOSSurvDF),
xlab = "Survival in the Worse condition following positive first sample",
ylab = "Prob of survival")
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ PolStrength +cluster(Participant), data = WorseNEGSurvDF),
xlab = "Survival in the Worse condition following negative first sample",
ylab = "Prob of survival")
This model suggests that strenght of affiliation does change the probability of surviving and that this effect is strongest in the Same condition following a positive first sample where participants with lowest strengh of affiliation are least likeley to survive
Cox regression for p values and then ggsurve for visuals
coxph(Surv(tstart, tstop, event) ~ diff_score, data = Survdf) %>%
gtsummary::tbl_regression(exp = TRUE)
| Characteristic | HR1 | 95% CI1 | p-value |
|---|---|---|---|
| diff_score | 1.02 | 1.01, 1.02 | <0.001 |
|
1
HR = Hazard Ratio, CI = Confidence Interval
|
|||
Survdf$BiasEval[Survdf$diff_score > median(Survdf$diff_score, na.rm = T) ] <- "high"
Survdf$BiasEval[Survdf$diff_score < median(Survdf$diff_score, na.rm = T) ] <- "low"
#visualize
ggsurvplot(
fit = survfit(Surv(tstart, tstop, event) ~ BiasEval +cluster(Participant), data = Survdf),
xlab = "trials",
ylab = "Prob of survival")
This is more just proof of concept. We find this people with the most biased evals are less likely to survive. I don’t think we need to report this
The next set of models examines the idea the Robin outlined where we examine two types of stopping – one from the ingroup and one from the outgroup. There are two potential ways to model this. The first is by running these as separate survival models, but we need to be careful because there is no good way to test whether these processes are independent from one another, and in fact we might expect them to be (i.e. when you stop about ingroup tell you something about when you stop from outgroup and vice versa). The second way to model these two stopping points is with competing risks models. There, the event would be 1 if the last sample was from the ingroup and 2 if the last sample was from outgroup and 0 is cencored (i.e. no event). The next set of models look at the first set of models
InStopBetterSurvDF <- InStopDF[which(InStopDF$Condition=="Better"),]
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ Val +cluster(Participant), data = InStopBetterSurvDF),
xlab = "Ingroup last sample",
ylab = "Prob of survival")
OutStopBetterSurvDF <- OutStopDF[which(OutStopDF$Condition=="Better"),]
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ Val +cluster(Participant), data = OutStopBetterSurvDF),
xlab = "Outgroup last sample",
ylab = "Prob of survival")
The chance of surviving in the Better condition changes drastically for ingroup last samples based on first sample valence, such that people are less likely to survive following a negative compared to positive first sample. Nothing happening for outgroup last sample in the Better condition
InStopSameSurvDF <- InStopDF[which(InStopDF$Condition=="Same"),]
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ Val +cluster(Participant), data = InStopSameSurvDF),
xlab = "Ingroup last sample",
ylab = "Prob of survival in Same")
OutStopSameSurvDF <- OutStopDF[which(OutStopDF$Condition=="Same"),]
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ Val +cluster(Participant), data = OutStopSameSurvDF),
xlab = "Outgroup last sample",
ylab = "Prob of survival in Same")
Nothing really happening here for either ingroup or outgroup last sample
InStopWorseSurvDF <- InStopDF[which(InStopDF$Condition=="Worse"),]
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ Val +cluster(Participant), data = InStopWorseSurvDF),
xlab = "Ingroup last sample",
ylab = "Prob of survival in Worse")
OutStopWorseSurvDF <- OutStopDF[which(OutStopDF$Condition=="Worse"),]
ggsurvplot(
fit = survfit(Surv(Trial, event) ~ Val +cluster(Participant), data = OutStopWorseSurvDF),
xlab = "Outgroup last sample",
ylab = "Prob of survival in Worse")
No difference from ingroup last sample in the Worse, but participants are much more likely to continue sampling from the outgroup in the worse if they got a negative compared to positive first sample