Setup

library(pacman); p_load(ggplot2, metafor, weights, dplyr, reshape2, tidyr)

merged_S_withk[1, "study_ID_numeric"] = 1
temp = 1
for(i in 2:nrow(merged_S_withk)) {
  current_study_ID = merged_S_withk[i, "study_ID"]
  previous_study_ID = merged_S_withk[i-1, "study_ID"]
  if (current_study_ID!=previous_study_ID) {
    temp = temp+1}
  merged_S_withk[i, "study_ID_numeric"] = temp}

merged_S_withk[1, "estimates"] = 1
temp1 = 1
for(i in 2:nrow(merged_S_withk)){
  {temp1 = temp1+1}
  merged_S_withk[i, "estimates"] = temp1}

merged_S_withk = select(merged_S_withk, -study_ID, -authors_year, -title, -published, -source_year, 
                        -PGD_continuous_step_level, -treatment_ID, -PC_cooperation,
                        -Study_number, -PE_contributions, -Zyi)

logitP = function(x) 1/(1+exp(-x))
merged_S_withk$prct = logitP(merged_S_withk$yi)

Rationale

A recently shared plot from Yuan et al. (2022) is causing a stir, but most people don’t understand that the shaded region in said plot is the 90% prediction interval. Here, I have plotted the weighted and unweighted versions of that plot with the 95% confidence intervals.

The Graphs

wtd.cor(merged_S_withk$year_data_collection, merged_S_withk$prct)

##   correlation    std.err  t.value      p.value
## Y   0.1497554 0.03854444 3.885267 0.0001125585

wtd.cor(merged_S_withk$year_data_collection, merged_S_withk$prct, weight = merged_S_withk$participants_eligible)

##   correlation    std.err  t.value     p.value
## Y    0.141726 0.03859055 3.672557 0.000259642

merged_S_withk$pc <- predict(prcomp(~year_data_collection + prct, merged_S_withk))[,1]

ggplot(merged_S_withk, aes(year_data_collection, prct, color = pc)) + 
  geom_point(shape = 16, size = 4, show.legend = F, alpha = .4) + 
  scale_color_gradient(low = "#0091ff", high = "#f0650e") + 
  scale_alpha(range = c(.05, .25)) + 
  stat_smooth(method = lm, color = "cyan", formula = "y ~ x", geom = "line", alpha = .6) + 
  stat_smooth(method = loess, color = "blue", formula = "y ~ x", geom = "line", alpha = .6) + 
  geom_smooth(method = lm, color = "cyan", formula = "y ~ x", alpha = .1, linetype = 0, fill = "orange") + 
  geom_smooth(method = loess, color = "blue", formula = "y ~ x", alpha = .1, linetype = 0, fill = "violet") +
  labs(title = "Historical Changes Over Time in the Mean Cooperation Rate in Social Dilemmas (Unweighted)", x = "Year of Data Collection", y = "Cooperation Rate") + 
  theme_minimal() + 
  annotate("text", x = 2006, y = .15, label = "r = .15", family = "serif", size = 6) +
  theme(legend.position = "none", text = element_text(size = 14, family = "serif"), plot.title = element_text(hjust = 0.5), plot.margin = margin(10, 13, 10, 10))

Sometime between 2011 and now, geom_smooth and stat_smooth were busted. Hadley removed the weight aesthetic from stat_smooth, but if you try to change the y aesthetic to something like “weights”, it still suggests you use aes(weight), despite it being gone. So, I can’t make the lines with fancy low alpha in this plot without doing more that I don’t want to waste my time doing right now. So, below, just use linetype = 1 to get the line back.

ggplot(merged_S_withk, aes(year_data_collection, prct, color = pc)) + 
  geom_point(shape = 16, size = merged_S_withk$participants_eligible, show.legend = F, alpha = .6) + 
  scale_color_gradient(low = "#0091ff", high = "#f0650e") + 
  scale_alpha(range = c(.05, .25)) + 
  geom_smooth(method = lm, color = "cyan", formula = "y ~ x", alpha = .1, linetype = 0, fill = "orange", aes(weight = participants_eligible)) + 
  geom_smooth(method = loess, color = "blue", formula = "y ~ x", alpha = .1, linetype = 0, fill = "violet", aes(weight = participants_eligible)) +
  labs(title = "Historical Changes Over Time in the Mean Cooperation Rate in Social Dilemmas (Weighted)", x = "Year of Data Collection", y = "Cooperation Rate") + 
  theme_minimal() + 
  annotate("text", x = 2006, y = .15, label = "r = .14", family = "serif", size = 6) +
  theme(legend.position = "none", text = element_text(size = 14, family = "serif"), plot.title = element_text(hjust = 0.5), plot.margin = margin(10, 13, 10, 10))

References

Yuan, M., Spadaro, G., Jin, S., Wu, J., Kou, Y., Van Lange, P. A. M., & Balliet, D. (2022). Did cooperation among strangers decline in the United States? A cross-temporal meta-analysis of social dilemmas (1956–2017). Psychological Bulletin, 148(3–4), 129–157. https://doi.org/10.1037/bul0000363

Postscript: Do changing sex demographics of studies explain the trend?

To test this question, we could redo the meta-analysis and then check if sex is a moderator or just check if sex moderates the regression. I don’t know why the authors didn’t do this, but whatever, doesn’t matter, they provided the data when possible.

M1 <- lm(prct ~ year_data_collection, merged_S_withk)

merged_S_withk <- merged_S_withk[!is.na(merged_S_withk$prop_male),] #393 remain

M1NA <- lm(prct ~ year_data_collection, merged_S_withk)
M2 <- lm(prct ~ year_data_collection + prop_male, merged_S_withk)
M3 <- lm(prct ~ year_data_collection * prop_male, merged_S_withk)

summary(M1)

## 
## Call:
## lm(formula = prct ~ year_data_collection, data = merged_S_withk)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.43928 -0.10891 -0.00788  0.10115  0.43435 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -2.455900   0.757758  -3.241 0.001251 ** 
## year_data_collection  0.001476   0.000380   3.885 0.000113 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1626 on 658 degrees of freedom
## Multiple R-squared:  0.02243,    Adjusted R-squared:  0.02094 
## F-statistic:  15.1 on 1 and 658 DF,  p-value: 0.0001126

summary(M1NA)

## 
## Call:
## lm(formula = prct ~ year_data_collection, data = merged_S_withk)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.36995 -0.11140 -0.01566  0.10038  0.41038 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -4.1651559  0.9622080  -4.329 1.91e-05 ***
## year_data_collection  0.0023384  0.0004838   4.834 1.93e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1609 on 391 degrees of freedom
## Multiple R-squared:  0.05639,    Adjusted R-squared:  0.05398 
## F-statistic: 23.37 on 1 and 391 DF,  p-value: 1.928e-06

summary(M2)

## 
## Call:
## lm(formula = prct ~ year_data_collection + prop_male, data = merged_S_withk)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38537 -0.10939 -0.01314  0.10262  0.41004 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -3.8815041  0.9974356  -3.891 0.000117 ***
## year_data_collection  0.0022039  0.0004995   4.412 1.33e-05 ***
## prop_male            -0.0318663  0.0295977  -1.077 0.282303    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1609 on 390 degrees of freedom
## Multiple R-squared:  0.05919,    Adjusted R-squared:  0.05436 
## F-statistic: 12.27 on 2 and 390 DF,  p-value: 6.812e-06

summary(M3)

## 
## Call:
## lm(formula = prct ~ year_data_collection * prop_male, data = merged_S_withk)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39074 -0.10888 -0.01416  0.10125  0.41085 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                    -4.856357   2.065173  -2.352  0.01919 * 
## year_data_collection            0.002695   0.001039   2.593  0.00987 **
## prop_male                       2.008242   3.783426   0.531  0.59586   
## year_data_collection:prop_male -0.001029   0.001909  -0.539  0.59003   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.161 on 389 degrees of freedom
## Multiple R-squared:  0.05989,    Adjusted R-squared:  0.05264 
## F-statistic:  8.26 on 3 and 389 DF,  p-value: 2.434e-05

So, percent female/male did not moderate the trend in the part of the sample that had such data, and the studies without that data yielded a (nonsignificantly) weaker trend (do the math!). A further possibility that cannot be tested with this data is that the decline in average intelligence among university students that has resulted from the ongoing universalization of education has resulted in lower than expected cooperation rates, because of the consistent correlation between cooperation and intelligence. The transitivity of that finding is not assured by any means, and it could only be tested if, say, SAT scores for the samples used here were gathered. Unfortunately, it does not seem testable and even if the data could be obtained, the invariance assumptions beggar belief, but if it’s true, it means that the increase in cooperation rates has been underestimated due to selection effects.

Cooperation Over Time - Replotted

Setup

Rationale

The Graphs

References

Postscript: Do changing sex demographics of studies explain the trend?