Pre-process

Remove joined_data and uncomment the following block to rerun the preprocessing scripts

data_list <- c("pilots", "collab_lang_20", "compet_lang_20", "nonlang_pilots", "compet_nonlang_15", "collab_nonlang_15")
process_all_data(data_list)
#join_all_data()

d.players <- read_csv(here("data/processed_data/joined_data/players.csv"),
                      col_types = cols()) %>% distinct()
d.games <- read_csv(here("data/processed_data/joined_data/games.csv"),
                      col_types = cols()) %>% distinct()
d.rounds <- read_csv(here("data/processed_data/joined_data/rounds.csv"),
                      col_types = cols()) %>% distinct()
d.raw_chat <- read_csv(here("data/processed_data/joined_data/raw_chat.csv"),
                      col_types = cols()) %>% distinct()
d.contexts <- read_csv(here("data/processed_data/joined_data/contexts.csv"),
                      col_types = cols()) %>% distinct()
d.feedback <- read_csv(here("data/processed_data/joined_data/feedback.csv"),
                      col_types = cols()) %>% distinct()
d.demographics <- read_csv(here("data/processed_data/joined_data/demographics.csv"),
                      col_types = cols()) %>% distinct()

Rename conditions for readability

d.games <- d.games %>% mutate(conditionName = condition, 
                              condition = case_when(condition == "coopMulti" ~ "Cooperative",
                                         condition == "competCartel" ~ "Competitive",
                                         condition == "coopCartel" ~ "Old Cooperative"))
d.games

## # A tibble: 90 × 22
##    gameId  finishedAt          createdAt           color othercolor flowers     
##    <chr>   <dttm>              <dttm>              <chr> <chr>      <chr>       
##  1 3S6bXn… 2021-08-19 19:17:17 2021-08-19 19:10:47 white yellow     "{\"label\"…
##  2 7JSk79… 2021-08-19 19:26:59 2021-08-19 19:08:11 red   purple     "{\"label\"…
##  3 8dDG6f… 2021-08-19 19:56:06 2021-08-19 19:29:02 purp… yellow     "{\"label\"…
##  4 B5SFfZ… 2021-08-19 19:39:43 2021-08-19 19:19:24 red   yellow     "{\"label\"…
##  5 DTw5mr… 2021-08-19 19:37:37 2021-08-19 19:08:50 yell… purple     "{\"label\"…
##  6 gaQuqy… 2021-08-19 19:34:49 2021-08-19 19:13:09 red   yellow     "{\"label\"…
##  7 gPR99k… 2021-08-19 19:25:11 2021-08-19 19:07:11 purp… white      "{\"label\"…
##  8 KiTSXq… 2021-08-19 19:12:16 2021-08-19 19:12:05 white yellow     "{\"label\"…
##  9 LmhxB2… 2021-08-19 19:29:22 2021-08-19 19:17:44 red   purple     "{\"label\"…
## 10 LrRSuk… 2021-08-19 19:18:04 2021-08-19 19:07:55 purp… yellow     "{\"label\"…
## # … with 80 more rows, and 16 more variables: otherflowers <chr>,
## #   numBlocks <dbl>, condition <chr>, name <chr>, playerCount <dbl>,
## #   selectionDuration <dbl>, rounds <dbl>, feedbackDuration <dbl>,
## #   numTargets <dbl>, repsPerBlock <dbl>, partial <lgl>, scale <dbl>,
## #   chatEnabled <lgl>, gameLength <dbl>, pilot <lgl>, conditionName <chr>

#exclude BLOCKS with <12 completed (all 3 participants responded) rounds
# exclude incomplete games (not just incomplete rounds)

games_exclude <- d.rounds %>% group_by(gameId) %>% tally() %>% filter(n!=3*24) %>% select(gameId)

#exclude cooperative condition
d.rounds.final <-  d.rounds %>% left_join(d.games %>% select(gameId, conditionName, condition)) %>% 
  filter(conditionName != "coopCartel") %>% anti_join(games_exclude)

## Joining, by = "gameId"
## Joining, by = "gameId"

d.raw_chat.final <- d.raw_chat %>% 
  left_join(d.games %>% select(gameId, conditionName, condition)) %>% 
  anti_join(games_exclude) %>% 
  write_csv(here("data/processed_data/joined_data/filtered_raw_chat.csv"))

## Joining, by = "gameId"
## Joining, by = "gameId"

Clean up a sharable data csv

interpretable_data <- d.raw_chat.final %>% 
  add_chat_info(d.players, d.rounds.final, d.games) %>% 
    left_join(d.contexts %>% select(gameId, repNum, blockNum, trialNum, 
                                  playerResponse = label, 
                                  responseRawUtility = utility, blinded)) %>%
   #to avoid confusion, drop any round info thats actually a message
   mutate(playerResponse = ifelse(type == "message", NA, playerResponse),
         playerUtility = ifelse(type == "message", NA, playerUtility),
         responseRawUtility = ifelse(type == "message", NA, responseRawUtility),
         blinded = ifelse(type == "message", NA, blinded)) %>%
  select(gameId, trialNum, condition, chatEnabled, playerId, name, text, 
         participantAction, type, repNum, 
         blockNum, time, everything(), - submitted, -index, -createdAt) %>% write_csv(here("data/interpretable_transcript.csv"))

## `summarise()` has grouped output by 'gameId'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'gameId'. You can override using the `.groups` argument.

## Joining, by = c("gameId", "trialNum")
## Joining, by = c("gameId", "trialNum")

## Joining, by = "playerId"

## Joining, by = c("gameId", "repNum", "blockNum", "trialNum", "playerId")

## Joining, by = c("gameId", "condition")

## Joining, by = c("gameId", "trialNum", "repNum", "blockNum", "playerResponse")

#how many games are we left with
game_completion <- d.rounds %>% 
  group_by(gameId) %>% 
  summarize(num_rounds=max(trialNum+1)) %>% 
  left_join(d.games%>% ungroup() %>% select(gameId, condition, chat=chatEnabled, gameLength)) %>%
  mutate(gameComplete = ifelse(num_rounds == 24, TRUE, FALSE))

## Joining, by = "gameId"

#knitr::kable(summary)
message("Full games")

## Full games

game_completion %>% filter(num_rounds==24)

## # A tibble: 77 × 6
##    gameId            num_rounds condition   chat  gameLength gameComplete
##    <chr>                  <dbl> <chr>       <lgl>      <dbl> <lgl>       
##  1 2F7o2m2ZZj2T4EFzo         24 Cooperative FALSE        269 TRUE        
##  2 2JYf8Bb4Y3EcndcqM         24 Cooperative FALSE        406 TRUE        
##  3 3FWWt63LvpHJrdPDf         24 Competitive FALSE        241 TRUE        
##  4 3S6bXnp5NWyGgmBxD         24 Cooperative TRUE         389 TRUE        
##  5 5etSp4RTD85pHPt96         24 Competitive TRUE         880 TRUE        
##  6 5XZ8cFNsh6dk6tw9s         24 Cooperative FALSE        317 TRUE        
##  7 7fFMnLt83jGgAY9Bm         24 Cooperative FALSE        236 TRUE        
##  8 7JSk79tktXw77agpS         24 Cooperative TRUE        1127 TRUE        
##  9 7qCsiS5gNiMqqKAHg         24 Competitive TRUE        1318 TRUE        
## 10 8dDG6frr5JfsMjmTD         24 Cooperative TRUE        1623 TRUE        
## # … with 67 more rows

message("Partial games")

## Partial games

game_completion %>% filter(num_rounds!=24)

## # A tibble: 10 × 6
##    gameId            num_rounds condition   chat  gameLength gameComplete
##    <chr>                  <dbl> <chr>       <lgl>      <dbl> <lgl>       
##  1 47MwccNZCwNDTyxqK          2 Competitive TRUE        4222 FALSE       
##  2 9YvhBapNfWcFwvvpX         11 Competitive TRUE         922 FALSE       
##  3 aSeqwy83KvRSbvRSS          6 Cooperative FALSE        101 FALSE       
##  4 avdWsKnbRkYLQ5Mq8         23 Competitive TRUE        1070 FALSE       
##  5 B5SFfZ9rHwth8oHNn         11 Cooperative TRUE        1218 FALSE       
##  6 co5ircoedXApqB9jD         18 Competitive TRUE         879 FALSE       
##  7 gPR99kfXeccf8PeGF         19 Cooperative TRUE        1079 FALSE       
##  8 gRJRf37xPZyK72tFx          4 Cooperative FALSE         91 FALSE       
##  9 HevPvvPd7xwLaz9S6          3 Competitive TRUE         487 FALSE       
## 10 YGABm7kTsNdSBHTWW         16 Cooperative TRUE        1132 FALSE

Game counts

#pull out pilots
d.rounds.final %>% left_join(d.games %>% select(gameId, chatEnabled)) %>% group_by(condition, chatEnabled) %>% 
  summarize(n_games = n_distinct(gameId)) %>% arrange(condition, chatEnabled)

## Joining, by = "gameId"

## `summarise()` has grouped output by 'condition'. You can override using the `.groups` argument.

## # A tibble: 4 × 3
## # Groups:   condition [2]
##   condition   chatEnabled n_games
##   <chr>       <lgl>         <int>
## 1 Competitive FALSE            20
## 2 Competitive TRUE             18
## 3 Cooperative FALSE            18
## 4 Cooperative TRUE             21

Demographics

d.demographics

## # A tibble: 297 × 12
##    gameId          createdAt             age gender language raceWhite raceBlack
##    <chr>           <dttm>              <dbl> <chr>  <chr>    <lgl>     <lgl>    
##  1 KiTSXqszi96Dog… 2021-08-19 19:14:27    25 Female English  TRUE      NA       
##  2 KiTSXqszi96Dog… 2021-08-19 19:14:34    21 Female English  TRUE      NA       
##  3 KiTSXqszi96Dog… 2021-08-19 19:16:45    42 male   English  TRUE      NA       
##  4 LrRSukRse3aB9x… 2021-08-19 19:20:36    27 Female English  TRUE      NA       
##  5 LrRSukRse3aB9x… 2021-08-19 19:21:22    37 Female English  TRUE      NA       
##  6 LrRSukRse3aB9x… 2021-08-19 19:22:12    29 female English  TRUE      NA       
##  7 3S6bXnp5NWyGgm… 2021-08-19 19:23:04    25 Female English  TRUE      NA       
##  8 3S6bXnp5NWyGgm… 2021-08-19 19:23:46    60 female english  TRUE      NA       
##  9 Rq47E8uzWFKm4C… 2021-08-19 19:25:48    19 female English  TRUE      NA       
## 10 gPR99kfXeccf8P… 2021-08-19 19:25:50    33 Male   English  TRUE      NA       
## # … with 287 more rows, and 5 more variables: raceAsian <lgl>,
## #   raceNative <lgl>, raceIslander <lgl>, raceHispanic <lgl>, education <chr>

d.feedback

## # A tibble: 297 × 8
##    playerId  correctness human workedWell  fair     chatUseful  feedback  time  
##    <chr>     <chr>       <chr> <chr>       <chr>    <chr>       <chr>     <chr> 
##  1 ajXrM4MR… yes         yes   neutral     "Did no… x           Wish I'd… x     
##  2 DE8LiqDx… yes         yes   neutral     "Unfort… I was unab… n/a       N/A   
##  3 HETvsnvE… yes         yes   neutral     "If I r… Someone di… Yes, a p… This …
##  4 GsXazheE… yes         yes   stronglyAg… "Yeah i… Very easy   Nope - w… Yes   
##  5 ehFS23PQ… yes         no    agree       "Yes"    Yes         None      Not t…
##  6 WADiGpeA… yes         yes   neutral     "Yes"    Very - it … <NA>      Plent…
##  7 7mbrv5hE… yes         yes   stronglyAg… "Yes"    Yes         No        Yes   
##  8 fqKx97dX… yes         no    stronglyAg… "yes, a… yes a coup… no all s… yes   
##  9 Sz4iGenv… yes         yes   stronglyAg… "yes"    yes         nope!     more …
## 10 uxoyB5wD… yes         yes   stronglyAg… "Yes."   Very easy … No.       More …
## # … with 287 more rows

d.feedback %>% ggplot(aes(x = human)) + geom_bar(stat = "count") + theme_jmank()

Basic Analyses

Break out the by-player statistics for each round

#player language per round
d.player_word_counts <- d.raw_chat.final %>% 
  filter(type == "message") %>%
  full_join(d.rounds.final, c("gameId", "trialNum", "repNum", 
                              "playerId", "numPlayers", "blockNum")) %>%
 # filter(!is.chitchat) %>% 
  mutate(text = gsub('[[:punct:] ]+',' ',text),
         text = str_squish(text),
         utt_length_chars = str_length(text), 
         utt_length_words = str_count(text, "\\W+") + 1) %>% 
  group_by(gameId, blockNum, trialNum, repNum, playerId, numPlayers) %>%
  summarize(text = paste0(text, collapse = ', '),
            total_num_words = sum(utt_length_words),
            total_num_chars = sum(utt_length_chars))

## `summarise()` has grouped output by 'gameId', 'blockNum', 'trialNum', 'repNum', 'playerId'. You can override using the `.groups` argument.

# player points per round
d.by_player_metrics <- d.rounds.final  %>% 
  left_join(d.player_word_counts) %>%
  left_join(game_completion %>% 
              select(gameId, gameComplete)) %>%
  left_join(d.games %>% select(-c(createdAt, flowers:scale), condition)) %>%  
  mutate(languageCondition = ifelse(chatEnabled, "Lang", "Nonlang"),
         fullCondition = paste(condition, languageCondition, sep = "-"))

## Joining, by = c("gameId", "repNum", "blockNum", "trialNum", "numPlayers", "playerId")

## Joining, by = "gameId"

## Joining, by = c("gameId", "conditionName", "condition")

Round info

#get the # of raw points, max adjusted points, game legnth, # of words exchanged
d.max_round <- d.contexts %>%
  group_by(gameId, repNum, blockNum, trialNum) %>%
  #top 3 flowers
  slice_max(utility, n = 3) %>%
  summarise(roundMax = sum(utility))

## `summarise()` has grouped output by 'gameId', 'repNum', 'blockNum'. You can override using the `.groups` argument.

d.by_round_metrics <- d.by_player_metrics  %>% 
  group_by(gameId, trialNum, repNum, blockNum) %>%
  summarize(groupPoints = sum(playerUtility, na.rm = T),
            groupNumWords = sum(total_num_words, na.rm = T),
            groupNumChars = sum(total_num_chars, na.rm = T),
            numFlowers = n_distinct(playerResponse)) %>%
  left_join(d.max_round) %>%
  left_join(game_completion %>% 
              select(gameId, gameComplete)) %>%
  left_join(d.games %>% select(-c(createdAt, flowers:scale), condition)) %>%  
  mutate(languageCondition = ifelse(chatEnabled, "Lang", "Nonlang"),
         fullCondition = paste(condition, languageCondition, sep = "-"),
         roundPointsAdjusted = groupPoints/roundMax)

## `summarise()` has grouped output by 'gameId', 'trialNum', 'repNum'. You can override using the `.groups` argument.

## Joining, by = c("gameId", "trialNum", "repNum", "blockNum")

## Joining, by = "gameId"
## Joining, by = "gameId"

by block

d.by_block_metrics <- d.by_round_metrics %>% 
  group_by(gameId, gameComplete, condition, 
           languageCondition, fullCondition, gameLength, blockNum) %>%
  summarize(blockPoints = sum(groupPoints, na.rm = T),
            blockNumWords = sum(groupNumWords, na.rm = T),
            blockNumChars = sum(groupNumChars, na.rm = T),
            blockMaxPoints = sum(roundMax),
            blockNumFlowers = sum(numFlowers)) %>%
  mutate(blockPointsAdjusted = blockPoints/blockMaxPoints,
         meanNumFlowers = blockNumFlowers*3/(6*3))

## `summarise()` has grouped output by 'gameId', 'gameComplete', 'condition', 'languageCondition', 'fullCondition', 'gameLength'. You can override using the `.groups` argument.

by game

d.by_game_metrics <- d.by_block_metrics %>%
  #from this point, only look at completed games
    filter(gameComplete) %>%
  #group by game
  group_by(gameId, condition, languageCondition, fullCondition, gameLength) %>%
  summarize(gamePoints = sum(blockPoints, na.rm = T),
            gameNumWords = sum(blockNumWords, na.rm = T),
            gameNumChars = sum(blockNumChars, na.rm = T),
            gameMaxPoints = sum(blockMaxPoints),
            gameNumFlowers = sum(blockNumFlowers)) %>%
  mutate(pointsAdjusted = gamePoints/gameMaxPoints,
         meanNumFlowers = gameNumFlowers*3/ (6*4*3))

## `summarise()` has grouped output by 'gameId', 'condition', 'languageCondition', 'fullCondition'. You can override using the `.groups` argument.

d.by_player_metrics %>% group_by(gameId, condition, languageCondition, playerId) %>% 
  summarize(mean_player_utility = mean(playerUtility)) %>%
  group_by(condition, languageCondition) %>% 
  summarize_at("mean_player_utility", funs(mean, min, max, sd))

## `summarise()` has grouped output by 'gameId', 'condition', 'languageCondition'. You can override using the `.groups` argument.

## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))

## # A tibble: 4 × 6
## # Groups:   condition [2]
##   condition   languageCondition  mean   min   max    sd
##   <chr>       <chr>             <dbl> <dbl> <dbl> <dbl>
## 1 Competitive Lang               7.53  4.88  9.17 1.17 
## 2 Competitive Nonlang            5.48  3.54  7.33 0.945
## 3 Cooperative Lang               7.48  5.10  8.60 0.968
## 4 Cooperative Nonlang            5.60  3.93  6.71 0.705

Simple Distributions

Points

Unadjusted

d.by_game_metrics %>% 
  ggplot(aes(x = languageCondition, y = gamePoints, color = condition, shape = languageCondition)) + 
  geom_jitter(width = .1, alpha = .5) +
  stat_summary(fun.data = "mean_cl_boot")+
  facet_grid(cols = vars(condition)) + labs(x = "Language Condition", y = "Points Earned by Group", title = "Points Earned by Group") + scale_color_bay() + theme_jmank() + theme(legend.position = "none")

ggsave(here(fig_path, "points_distirbution_raw.png"), width = 3, height = 5)

Max Adjusted

d.by_game_metrics %>%
  ggplot(aes(x = languageCondition, y = pointsAdjusted, color = condition, shape = languageCondition)) + 
  geom_jitter(width = .1, alpha = .5) +
  stat_summary(fun.data = "mean_cl_boot")+
  facet_grid(cols = vars(condition)) + 
  labs(x = "Language Condition", 
       y = "Points Earned by Group (Max Adjusted)", 
       title = "Points by Group") + 
  ylim(0,1) +
  scale_color_bay() + theme_jmank() + theme(legend.position = "none")

ggsave(here(fig_path, "points_distirbution_adjusted.png"),  width = 3, height = 5)

Time

d.by_game_metrics %>%
  ggplot(aes(x = languageCondition, y = gameLength/60, 
             color = condition, shape = languageCondition)) + 
  geom_jitter(width = .1, alpha = .5) +
  stat_summary(fun.data = "mean_cl_boot")+
  facet_grid(cols = vars(condition))+ 
  labs(y = "Time (In Minutes)", title = "Game Length", x = "Condition")+
  scale_color_bay() + theme_jmank() +  theme(legend.position = "none")

ggsave(here(fig_path, "time_distirbution.png"),  width = 5, height = 3)

d.by_game_metrics %>%
  ggplot(aes(x = languageCondition, y = log(gameLength/60), color = condition, shape = languageCondition)) + 
  geom_jitter(width = .1, alpha = .5) +
  stat_summary(fun.data = "mean_cl_boot")+
  facet_grid(cols = vars(condition)) + 
  ylim(0, 4.5)+
  labs(y = "Time (In Log Minutes)", title = "Game Length (Log Mins)")+
  scale_color_bay() + theme_jmank() +  theme(legend.position = "none")

ggsave(here(fig_path, "time_log_distirbution.png"))

## Saving 5 x 3 in image

Number of Words

d.by_game_metrics %>%
  ggplot(aes(x = languageCondition, y = gameNumWords, color = condition, shape = languageCondition)) + 
  geom_jitter(width = .1, alpha = .5) +
  stat_summary(fun.data = "mean_cl_boot")+
  facet_grid(cols = vars(condition))+ 
  labs(y = "N. Words", title = "Number of Words Exchanged", x = "Condition")+scale_color_bay() + theme_jmank() +  theme(legend.position = "none")

ggsave(here(fig_path, "numwords_distirbution.png"), height = 5, width = 3)

d.by_game_metrics %>%
  ggplot(aes(x = languageCondition, y = log(gameNumWords+1), color = condition, shape = languageCondition)) + 
  geom_jitter(width = .1, alpha = .5) +
  stat_summary(fun.data = "mean_cl_boot")+
  facet_grid(cols = vars(condition))+ 
  labs(y = "Log N. Words Exchanged")+
  scale_color_bay() + theme_jmank() +  theme(legend.position = "none")

ggsave(here(fig_path, "numwords_log_distirbution.png"))

## Saving 5 x 3 in image

Covariate

Time x Points

d.by_game_metrics %>%
  ggplot(aes(y = gamePoints, x = gameLength/60, color = condition, shape = languageCondition)) + 
  geom_point(alpha = .5) +
  geom_smooth(method=lm)+ 
  labs(x = "Game length (in Minutes)", y = "Points Earned by Group")+
  scale_color_bay() + theme_jmank() +  theme(legend.position = "none")

## `geom_smooth()` using formula 'y ~ x'

ggsave(here(fig_path, "time_words_scatter.png"))

## Saving 5 x 3 in image
## `geom_smooth()` using formula 'y ~ x'

All Correlated

d.by_game_metrics %>%
  ggplot(aes(x = gameLength/60, y = gamePoints, shape = languageCondition, color = log(gameNumWords+1))) + 
  geom_point(alpha = .8) +
  geom_smooth(aes(group = condition), method=lm, color = "black")+
  facet_grid(cols = vars(condition)) + 
  labs(x = "Game length (in Minutes)", 
       y = "Points Earned by Group",
       color = "Log Num. Words",
       shape = "Language") + 
  theme_jmank() +  scale_color_steps(low = "#edd746", high = "#6B9555")

## `geom_smooth()` using formula 'y ~ x'

ggsave(here(fig_path, "all_correlated.png"), height = 3, width = 5)

## `geom_smooth()` using formula 'y ~ x'

ms <- d.by_game_metrics %>%
  group_by(condition, languageCondition) %>%
  summarise(mean_len = mean(gameLength/60),
            ci_len = 1.96 * sd(gameLength/60) / sqrt(length(gameLength)),
            mean_points = mean(gamePoints),
            ci_points = 1.96 * sd(gamePoints) / sqrt(length(gameLength)))

## `summarise()` has grouped output by 'condition'. You can override using the `.groups` argument.

d.by_game_metrics %>%
  ggplot(aes(x = gameLength/60, y = gamePoints, color = condition, shape = languageCondition)) +#, size = log(groupNumWords+1))) + 
  geom_smooth(aes(group = condition), method = "lm") +
  geom_point(alpha = .2) +
  geom_pointrange(data = ms, aes(x = mean_len, y = mean_points, 
                                 ymin = mean_points - ci_points,
                                 ymax = mean_points + ci_points)) +
  geom_errorbarh(data = ms, aes(x = mean_len, y = mean_points, 
                                 xmin = mean_len - ci_len,
                                 xmax = mean_len + ci_len), width = 0) + 
  # ylim(0,1500) + 
  xlim(0,90) + 
  labs(x = "Game length (in Minutes)", y = "Points Earned by Group")+
  ggthemes::scale_color_solarized() + 
  scale_color_bay() + theme_jmank() +  
  theme(legend.position = "bottom")

## Warning: Ignoring unknown parameters: width

## Warning: Ignoring unknown aesthetics: x

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

## `geom_smooth()` using formula 'y ~ x'

ggsave(here(fig_path, "time_points_scatter_nofacet.png"))

## Saving 5 x 3 in image
## `geom_smooth()` using formula 'y ~ x'

d.by_game_metrics %>% 
  ggplot(aes(x = log(gameLength/60), y = gamePoints, color = condition)) + 
  geom_point(alpha = .5) +
  geom_smooth(method=lm)+
  labs(x = "Game length (in Log Minutes)",y = "Points Earned by Group")+scale_color_bay() + theme_jmank()

## `geom_smooth()` using formula 'y ~ x'

ggsave(here(fig_path, "log_time_points_scatter_nofacet.png"))

## Saving 5 x 3 in image
## `geom_smooth()` using formula 'y ~ x'

Language x Points

ms <- d.by_game_metrics %>%
  mutate(langUse = ifelse(gameNumWords > 0, TRUE, FALSE)) %>% 
  group_by(condition, fullCondition, languageCondition) %>%
  summarise(mean_nw = mean(gameNumWords),
            ci_nw = 1.96 * sd(gameNumWords) / sqrt(length(gameLength)),
            mean_points = mean(gamePoints),
            ci_points = 1.96 * sd(gamePoints) / sqrt(length(gameLength)))

## `summarise()` has grouped output by 'condition', 'fullCondition'. You can override using the `.groups` argument.

d.by_game_metrics %>%
  mutate(langUse = ifelse(gameNumWords > 0, TRUE, FALSE)) %>% 
  #filter(langUse) %>%
  ggplot(aes(x = gameNumWords, y = gamePoints, color = condition, shape = languageCondition)) +
  geom_smooth(aes(group = fullCondition), method = "lm") +
  #geom_smooth(aes(group = fullCondition), method=glm, formula=y~poly(x,2), alpha=.3)+
  geom_point( alpha = .5) +
  geom_pointrange(data = ms, aes(x = mean_nw, y = mean_points,
                                ymin = mean_points - ci_points,
                                ymax = mean_points + ci_points)) +
  geom_errorbarh(data = ms, aes(x = mean_nw, y = mean_points,
                                xmin = mean_nw - ci_nw,
                                xmax = mean_nw + ci_nw), width = 0) +
  # ylim(0,1500) + 
  labs(x = "Words Exchanged", y = "Points Earned", title = ) + 
  scale_color_bay() + theme_jmank() +  
  theme(legend.position = "bottom")

## Warning: Ignoring unknown parameters: width

## Warning: Ignoring unknown aesthetics: x

## `geom_smooth()` using formula 'y ~ x'

d.by_game_metrics %>% filter(languageCondition == "Lang") %>%
  mutate(langUse = ifelse(gameNumWords > 0, TRUE, FALSE)) %>%
  ggplot(aes(x= gameNumWords, y = pointsAdjusted, color = condition, shape = languageCondition)) + 
  geom_point(alpha = .5) +
  geom_smooth(method=lm)+ 
  labs(x = "N. Words Exchanged", y = "Group Points (Adjusted)")+scale_color_bay() + 
  theme_jmank() +  theme(legend.position = "none")

## `geom_smooth()` using formula 'y ~ x'

ggsave(here(fig_path, "nwords_points_scatter.png"), height = 3, width = 3)

## `geom_smooth()` using formula 'y ~ x'

Progress Over Time

tried to make a gif? :/

Including language optional games where they decided not to speak

d.player_word_counts %>%
  left_join(d.games %>% 
              select(gameId, condition,chatEnabled)) %>% 
  mutate(languageCondition = ifelse(chatEnabled, "Lang", "Nonlang"),
         fullCondition = paste(condition, languageCondition, sep = "-"),
         block_name = paste0("Block ", blockNum)) %>%
    filter(languageCondition == "Lang") %>%
  group_by(gameId, fullCondition, block_name, trialNum, repNum) %>% 
  summarize(groupNumWords = sum(total_num_words, na.rm = T)) %>%
  ggplot(aes(x = repNum, y = groupNumWords, color = fullCondition)) + 
  geom_point(alpha = .2) +
  geom_smooth(method=lm)+
  facet_grid(cols = vars(block_name)) + labs(x = "Trial Number", y = "N. Words")+ scale_color_bay() + theme_jmank()

## Joining, by = "gameId"

## `summarise()` has grouped output by 'gameId', 'fullCondition', 'block_name', 'trialNum'. You can override using the `.groups` argument.

## `geom_smooth()` using formula 'y ~ x'

ggsave(here(fig_path, "nwords_over_time.png"))

## Saving 5 x 3 in image
## `geom_smooth()` using formula 'y ~ x'

log (n words+1)

d.player_word_counts %>%left_join(d.games %>% 
                                   select(gameId, condition,chatEnabled)) %>% 
  mutate(languageCondition = ifelse(chatEnabled, "Lang", "Nonlang"),
         fullCondition = paste(condition, languageCondition, sep = "-"),
         block_name = paste0("Block ", blockNum)) %>%
    filter(languageCondition == "Lang") %>%
  group_by(gameId, condition,block_name, trialNum, repNum) %>% 
  summarize(groupNumWords = sum(total_num_words, na.rm = T)+1) %>%
  ggplot(aes(x = repNum, y = log(groupNumWords), color = condition)) + 
  geom_point(alpha = .2) +
  geom_smooth(method=lm)+
  facet_grid(cols = vars(block_name)) + 
  labs(x = "Trial Number", y = "Log (N. Words + 1)")+ scale_color_bay() + theme_jmank() + theme(legend.position = "bottom")

## Joining, by = "gameId"

## `summarise()` has grouped output by 'gameId', 'condition', 'block_name', 'trialNum'. You can override using the `.groups` argument.

## `geom_smooth()` using formula 'y ~ x'

ggsave(here(fig_path, "nwords_log_over_time.png"), height = 3, width = 5)

## `geom_smooth()` using formula 'y ~ x'

Only games where language was used

d.player_word_counts %>%left_join(d.games %>% 
                                   select(gameId, condition,chatEnabled)) %>% 
  mutate(languageCondition = ifelse(chatEnabled, "Lang", "Nonlang"),
         fullCondition = paste(condition, languageCondition, sep = "-"),
         block_name = paste0("Block ", blockNum)) %>%
    filter(languageCondition == "Lang") %>%
  group_by(gameId, fullCondition,block_name, trialNum, repNum) %>% 
  summarize(groupNumWords = sum(total_num_words, na.rm = T)) %>%
  filter(groupNumWords > 0) %>%
  ggplot(aes(x = repNum, y = groupNumWords, color = fullCondition)) + 
  geom_point(alpha = .2) +
  geom_smooth(method=lm)+
  facet_grid(cols = vars(block_name)) + labs(x = "Trial Number", y = "N. Words")+ scale_color_bay() + theme_jmank()

## Joining, by = "gameId"

## `summarise()` has grouped output by 'gameId', 'fullCondition', 'block_name', 'trialNum'. You can override using the `.groups` argument.

## `geom_smooth()` using formula 'y ~ x'

log num words

d.player_word_counts %>%left_join(d.games %>% 
                                   select(gameId, condition,chatEnabled)) %>% 
  mutate(languageCondition = ifelse(chatEnabled, "Lang", "Nonlang"),
         fullCondition = paste(condition, languageCondition, sep = "-"),
         block_name = paste0("Block ", blockNum)) %>%
    filter(languageCondition == "Lang") %>%
  group_by(gameId, fullCondition,block_name, trialNum, repNum) %>% 
  summarize(groupNumWords = sum(total_num_words, na.rm = T)) %>%
  filter(groupNumWords > 0) %>%
  ggplot(aes(x = repNum, y = log(groupNumWords+1), color = fullCondition)) + 
  geom_point(alpha = .2) +
  geom_smooth(method=lm)+
  facet_grid(cols = vars(block_name)) + labs(x = "Trial Number", y = "Log N. Words")+ scale_color_bay() + theme_jmank()

## Joining, by = "gameId"

## `summarise()` has grouped output by 'gameId', 'fullCondition', 'block_name', 'trialNum'. You can override using the `.groups` argument.

## `geom_smooth()` using formula 'y ~ x'

Accuracy

N. Flowers Selected each round

d.by_round_metrics %>% 
  mutate(block_name = paste0("Block ", blockNum)) %>%
  ggplot(aes(x = repNum, y = numFlowers, color = condition, linetype = languageCondition)) + 
  geom_point(alpha = .1) +
  geom_smooth(method=lm)+
  facet_grid(cols = vars(block_name), rows = vars(condition)) + labs(x = "Trial Number", y = "N. Unique Flowers")+ scale_color_bay() + theme_jmank()

## `geom_smooth()` using formula 'y ~ x'

ggsave(ggsave(here(fig_path, "collisions_conditionfacet.png")))

## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'

## Saving 7 x 5 in image

## `geom_smooth()` using formula 'y ~ x'

d.by_round_metrics %>% 
  mutate(block_name = paste0("Block ", blockNum)) %>%
  ggplot(aes(x = repNum, y = numFlowers, color = condition, linetype = languageCondition)) + 
  geom_point(alpha = .1) +
  geom_smooth(method=lm)+
  facet_grid(cols = vars(block_name), rows = vars(languageCondition)) + labs(x = "Trial Number", y = "N. Unique Flowers")+ scale_color_bay() + theme_jmank()

## `geom_smooth()` using formula 'y ~ x'

ggsave(ggsave(here(fig_path, "collisions_language_facet.png")))

## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'

## Saving 7 x 5 in image

## `geom_smooth()` using formula 'y ~ x'

Response Time

d.rounds.final %>% group_by(gameId, repNum, blockNum, trialNum) %>% 
  summarize(mean_response_time = mean(time_sec, na.rm = T))  %>%
  left_join(d.games %>% select(gameId, condition,chatEnabled)) %>% 
  mutate(languageCondition = ifelse(chatEnabled, "Lang", "Nonlang"),
         fullCondition = paste(condition, languageCondition, sep = "-"),
         block_name = paste0("Block ", blockNum)) %>%
    filter(condition != "coopCartel") %>%
  ggplot(aes(x = repNum, y = log(mean_response_time), color = condition, linetype = languageCondition)) + 
  geom_point(alpha = .1) +
  geom_smooth(method=lm)+
  facet_grid(cols = vars(block_name), rows = vars(languageCondition)) + labs(x = "Trial Number", y = "log Mean response time") + scale_color_bay() + theme_jmank()

## `summarise()` has grouped output by 'gameId', 'repNum', 'blockNum'. You can override using the `.groups` argument.

## Joining, by = "gameId"

## `geom_smooth()` using formula 'y ~ x'

#More Accurate proportion possible/accuracy

Are they getting better over games?

d.by_round_metrics %>%
  mutate(blockName = paste0("Block ", blockNum))%>% 
  ggplot(aes(x = groupNumWords, y = roundPointsAdjusted, 
             color = condition)) + 
  geom_point(alpha = .25) + theme_jmank() + scale_color_bay() + facet_wrap(~blockName) +
  labs(title = "Number of points per trial")

### Over/underpowered participants?

d.point_propotions <- d.by_player_metrics %>% 
  #points for entire game
  group_by(condition, languageCondition, gameId, playerId) %>% 
  summarize(player_total = sum(playerUtility)) %>% 
  group_by(condition, gameId) %>% mutate(game_total = sum(player_total)) %>% ungroup() %>%
  mutate(player_proportion = player_total/game_total)

## `summarise()` has grouped output by 'condition', 'languageCondition', 'gameId'. You can override using the `.groups` argument.

#min
min_prop <- d.point_propotions %>% 
  group_by(condition, languageCondition, gameId) %>% 
  #lowest earned
  slice_min(order_by = player_proportion, n = 1) %>% 
  filter(condition == "Competitive") %>% select(-condition) %>% 
  mutate(player_placement = "Lowest Earning")

## Adding missing grouping variables: `condition`

#max
max_prop <- d.point_propotions %>% 
  group_by(condition, languageCondition, gameId) %>% 
  slice_max(order_by = player_proportion, n = 1) %>% 
  filter(condition == "Competitive") %>% 
  select(-condition) %>% 
  mutate(player_placement = "Highest Earning")

## Adding missing grouping variables: `condition`

rbind(min_prop, max_prop) %>%
  ggplot(aes(x = languageCondition, y = player_proportion, color = condition)) + 
  geom_hline(yintercept=.33, linetype = "dashed", alpha = .5)+
  geom_jitter(width = .1, alpha = .5) +
  facet_grid(cols = vars(player_placement)) + 
  stat_summary(fun.data = "mean_cl_boot")+
  labs(x = "Participant Placement", y = "Points earned by participant in place") + scale_color_bay() + theme_jmank() + theme(legend.position = "none")

What about entropy?

entropy_func <- function(p1, p2, p3){
  return((p1 * log(p1)) + (p2 * log(p2)) + (p3 * log(p3)))
  #return((entropy_list[1] * log(entropy_list[1])) + (entropy_list[2] * log(entropy_list[2])) + (entropy_list[3] * log(entropy_list[3])))
}
d.game_entropies <- d.point_propotions %>% select(condition, languageCondition, gameId, player_proportion) %>% 
  mutate(p_num = rep(c("p1","p2","p3"), nrow(d.point_propotions)/3)) %>% 
  pivot_wider(names_from = p_num, values_from = player_proportion) %>% 
  mutate(entropy = 0-entropy_func(p1, p2, p3)) %>% select(-c(p1,p2,p3)) 

d.game_entropies %>% filter(condition == "Competitive") %>% 
  ggplot(aes(x = languageCondition, y = entropy)) + 
  geom_hline(yintercept=(0-entropy_func(1/3, 1/3, 1/3)), linetype = "dashed", alpha = .5)+
  #minimum is tricky? this is 12, 1, and 2 which is the least fair a game _could possible_ be, but isnt adjusted per game
  #geom_hline(yintercept=(0-entropy_func(12/15, 1/15, 2/15)), linetype = "dashed", alpha = .5)+
  geom_jitter(width = .1, alpha = .5) +
  stat_summary(fun.data = "mean_cl_boot")+
  labs(x = "Condition", y = "Entropy of Player Scores (by game)") + scale_color_bay() + theme_jmank() + theme(legend.position = "none")

Selecting Hidden Flowers?

d.rounds.final %>% left_join(d.games %>% 
                                   select(gameId, condition, chatEnabled)) %>%
  left_join(d.contexts %>% select(gameId, blockNum, trialNum, repNum, playerResponse = label, blinded)) %>%
  mutate(select_blinded = playerId == blinded) %>% 
  group_by(gameId, blockNum, trialNum, repNum, chatEnabled, condition) %>%
  summarize(numSelectHidden = sum(select_blinded)) %>% 
  mutate(languageCondition = ifelse(chatEnabled, "Lang", "Nonlang"),
         fullCondition = paste(condition, languageCondition, sep = "-"),
         block_name = paste0("Block ", blockNum)) %>%
    filter(condition != "coopCartel") %>%
  ggplot(aes(x = repNum, y = numSelectHidden, color = condition, linetype = languageCondition)) + 
  geom_point(alpha = .1) +
  geom_smooth(method=lm)+
  facet_grid(cols = vars(block_name), rows = vars(languageCondition)) + labs(x = "Trial Number", y = "N. Select Hidden")+ scale_color_bay() + theme_jmank()

## Joining, by = c("gameId", "condition")

## Joining, by = c("gameId", "repNum", "blockNum", "trialNum", "playerResponse")

## `summarise()` has grouped output by 'gameId', 'blockNum', 'trialNum', 'repNum', 'chatEnabled'. You can override using the `.groups` argument.

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 6 rows containing non-finite values (stat_smooth).

## Warning: Removed 6 rows containing missing values (geom_point).

Language Results

# d.round_results.final %>% left_join(d.games %>% select(gameId, condition, chatEnabled, langUse, gameComplete)) %>%
#   mutate(chat = ifelse(chatEnabled, "lang", "nonlang"),
#          full_condition = paste0(condition, "-", chat)) %>%
#   ggplot(aes(x=time_sec, y = as.numeric(playerUtility), color = full_condition)) + 
#   facet_grid(rows = vars(condition), cols = vars(langUse)) +
#   geom_point(alpha = .25) + 
#   labs(x = "log time (in seconds)",
#        y = "individual utility",
#        title = "Response time in sec and utility", subtitle =  "each participant, each round")
# 
# d.round_results.final %>% left_join(d.games %>% select(gameId, condition, chatEnabled, langUse)) %>%
#   mutate(chat = ifelse(chatEnabled, "lang", "nonlang"),
#          full_condition = paste0(condition, "-", chat)) %>%
#   ggplot(aes(x=log(time_sec), y = as.numeric(playerUtility), color = full_condition)) + 
#   facet_grid(rows = vars(condition), cols = vars(langUse)) +
#   geom_point(alpha = .25) + 
#   labs(x = "log time (in seconds)",
#        y = "individual utility",
#        title = "Response time in sec (log) and utility", subtitle =  "each participant, each round")

Accuracy sum utility for entire group/max possible utility per group (or, the max flower *3) Each condition is going to have a different maximum utility

coopcartel will have a max utility = max flower * 3 (players) * 3 (incentive)

competcartel will have a max utility = sum(top_3_flowers)

# d.max_utility <- d.contexts %>% 
#   left_join(d.games %>% select(gameId, condition)) %>%
#   mutate(utility= as.numeric(utility)) %>%
#   group_by(gameId, blockNum, repNum) %>% 
#   slice_max(order_by =utility,n = 3)%>%
#   summarize(competCartel = sum(utility),
#             coopMulti= sum(utility),
#             coopCartel = max(utility * 9)) %>% ungroup() %>%
#   pivot_longer(cols = c(competCartel, coopMulti, coopCartel), names_to = "condition", values_to = "maxUtility")
# 
# 
# d.utility <- d.round_results.final %>%
#          left_join(d.games %>% select(gameId, condition, chatEnabled, langUse)) %>% 
#   group_by(gameId, blockNum, repNum, trialNum,condition, langUse) %>% 
#   summarize(group_utility = sum(as.numeric(playerUtility)))%>% ungroup() %>%
#   left_join(d.max_utility) %>%
#   mutate(prop_utility = group_utility/maxUtility,
#          langUsed = if_else(langUse, "Lang", "Nonlang"))
# 
# d.utility %>% 
#   ggplot(aes(x = trialNum, y=prop_utility, color = as.factor(gameId))) + 
#   geom_point(alpha = .4) + 
#   facet_grid(cols = vars(condition), rows = vars(langUsed), scales = "free_y") + 
#   #geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
#   theme(legend.position = "none")+
#   labs(title="Accuracy", y="Group utility/max utility", x="Round number", color="gameId")
# 
# ggsave(paste0(image_location, "/accuracy.png"))
# 
# d.utility %>% 
#   ggplot(aes(x = trialNum, y=prop_utility, color = as.factor(gameId))) + 
#   geom_point(alpha = .4) + 
#   facet_grid(cols = vars(condition), rows = vars(langUsed), scales = "free_y") + 
#   #geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
#   theme(legend.position = "none")+
#   labs(title="Accuracy", y="Group utility/max utility", x="Round number", color="gameId")
# 
# ggsave(paste0(image_location, "/accuracy.png"))
# 
# d.utility %>% 
#   filter(langUsed == "Lang") %>%
#   ggplot(aes(x = repNum, y=prop_utility, color = as.factor(gameId))) + 
#   geom_point(alpha = .4) + 
#   facet_grid(cols = vars(blockNum), rows = vars(gameId)) + 
#   #geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
#   theme(legend.position = "none")+
#   labs(title="Accuracy", y="Group utility/max utility", x="Round number", color="gameId") + ylim(0,1)
# 
# ggsave(paste0(image_location, "/accuracy.png"))

Everything here has bootstrapped 95% CIs.

Should find better curves to fit, but using quadratic to allow for some curvature.

# ggplot(d.chat, aes(x=repNum, y=total_num_words, color=role))+
#   facet_wrap(~tangram, nrow=2)+
#   scale_color_brewer(palette="Dark2")+
#      stat_summary(fun.data = "mean_cl_boot")+
#   labs(title="Number of words", y="Number of words", x="Round number")+
#   theme(legend.position="bottom")
# ggplot(d.chat, aes(x=repNum, y=total_num_words, color=as.factor(numPlayers)))+
#   facet_wrap(~role, nrow=1)+
#   scale_color_brewer(palette="Dark2")+
#     geom_jitter(alpha=.05)+
#     geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
#   #geom_smooth(method = "glm", formula = y~x,method.args = list(family = gaussian(link = 'log')))+
#      stat_summary(fun.data = "mean_cl_boot")+
#     scale_y_continuous(limits = c(0,50))+
#   labs(title="Number of words", y="Number of words", x="Round number", color="Player count")+
#   theme(legend.position="bottom")
# ggsave(here(image_location, 'words.pdf'), width=6, height=4)

# d.chat %>% filter(role=="speaker") %>% 
#     mutate(groupxtangram=str_c(gameId,tangram)) %>% 
#   group_by(repNum, numPlayers, gameId,tangram, groupxtangram) %>% 
#   summarize(words=sum(total_num_words)) %>% 
# ggplot(aes(x=repNum, y=words, color=as.factor(numPlayers)))+
#   facet_wrap(~numPlayers, nrow=1)+
#   scale_color_brewer(palette="Dark2")+
#     geom_line(aes(group=groupxtangram), alpha=.1,method=glm, se=F)+
#     geom_smooth(method = "glm", formula = y~x,method.args = list(family = gaussian(link = 'log')))+
#     #geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
#   labs(title="Words from speaker per tangram", y="Number of words", x="Round number", color="Player count")+
#   theme(legend.position="null")
# ggsave(here(image_location, 'words_lines.pdf'), width=6, height=4)

# d.chat %>% filter(role=="speaker") %>% 
# ggplot(aes(x=repNum, y=total_num_words, color=as.factor(numPlayers)))+
#   facet_wrap(~tangram)+
#   scale_color_brewer(palette="Dark2")+
#     #geom_smooth(method=glm, formula=y~poly(x,2), se=T, alpha=.1)+
#       geom_smooth(method = "glm", formula = y~x,method.args = list(family = gaussian(link = 'log')))+
#        stat_summary(fun.data = "mean_cl_boot", size=.2)+
#   labs(title="Tangram variability", y="Number of words", x="Round number", color="Player count")+
#   theme(legend.position="bottom")
# ggsave(here(image_location, 'words_tangrams.pdf'), width=8, height=6)

# d.round_results %>% group_by(playerId,repNum, gameId, numPlayers) %>% 
#   mutate(correct.num=ifelse(correct,1,0)) %>% 
#   ggplot(aes(x=repNum, y=correct.num, color=as.factor(numPlayers)))+
# geom_smooth(method = "glm", method.args = list(family = "binomial")) + 
#   stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width=.2))+
#   #geom_point()+
#   scale_color_brewer(palette="Dark2")+
#   #scale_y_continuous(limits = c(0,1))+
#   labs(x="Round Number", y="Fraction correctly selected", title= "Overall accuracy increases over repetitions", color="Player count")+
#     theme(legend.position="bottom")
# ggsave(here(image_location, 'accuracy.pdf'), width=6, height=4)

# d.round_results %>% group_by(playerId, repNum, gameId, numPlayers) %>% 
#   filter(correct==T) %>% 
#   #summarize(time=mean(time)) %>% 
#   ggplot(aes(x=repNum, y=time, color=as.factor(numPlayers)))+
#   geom_jitter(width=.4, height=0, alpha=.03)+
# geom_smooth(method = "glm", formula = y~x,
#                       method.args = list(family = gaussian(link = 'log')))+
#   stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width=.2))+
#   scale_y_continuous(limits = c(0,180))+
#     scale_color_brewer(palette="Dark2")+
#   labs(x="Round Number", y="Time to selection in seconds",
#        title="People choose faster in later rounds", color="Player count")+
#   theme(legend.position = "bottom")
# ggsave(here(image_location, 'time.pdf'), width=6, height=4)

Models

#summary(model)

#summary(model_speaker_acc)

flowers-pilot-analysis