load packages

library(tidyverse)
library(here)
library(janitor)
library(patchwork)
library(papaja)
library(kableExtra)

the back story

Students in group1 were told that BabyA is preterm and BabyB is fullterm; students in group 2 were told that BabyA is fullterm and BabyB is preterm.

Both BabyA and BabyB were actually full term.

questions

  1. Does knowing the preterm status of an infant impact how we judge their behaviour?
  2. Does failing to code behaviour blind to preterm status (or any other condition we are trying to measure the effect of) bias coding results?

read the data

# fine motor
fineA <- read_csv(here("smartsparrow", "motor_data", "data2019", "fine_A_2019.csv")) %>%
  clean_names() %>%
  select(starts_with("A"), starts_with("B")) %>%
  mutate(group = "group 1", condition = "fine", assignment = "A_pre_B_full")

fineB <- read_csv(here("smartsparrow", "motor_data", "data2019", "fine_B_2019.csv")) %>%
clean_names() %>%
  select(starts_with("A"), starts_with("B")) %>%
  mutate(group = "group 2", condition = "fine", assignment = "A_full_B_pre") 

# add id using nrow 
fineA$id <- 1:nrow(fineA) 
# add id manually to make it start at the end of fineA
fineB$id <- 216:414
  
# pull id to the front  
fine_motor <- rbind(fineA, fineB) %>%
  select(id, everything())


# gross motor

grossA <- read_csv(here("smartsparrow", "motor_data", "data2019", "gross_A_2019.csv")) %>%
  clean_names() %>%
  select(starts_with("A"), starts_with("B")) %>%
  mutate(group = "group 1", condition = "gross", assignment = "A_pre_B_full")

grossB <- read_csv(here("smartsparrow", "motor_data", "data2019", "gross_B_2019.csv")) %>%
  clean_names() %>%
  select(starts_with("A"), starts_with("B")) %>%
  mutate(group = "group 2", condition = "gross", assignment = "A_full_B_pre")

# add id using nrow 
grossA$id <- 1:nrow(grossA) 
# add id manually to make it start at the end of fineA
grossB$id <- 216:416


gross_motor <- rbind(grossA, grossB) %>%
  select(id, everything())


# play

playA <- read_csv(here("smartsparrow", "motor_data", "data2019", "play_A_2019.csv")) %>%
  clean_names() %>%
  select(starts_with("A"), starts_with("B")) %>%
  mutate(group = "group 1", condition = "play", assignment = "A_pre_B_full")

playB <- read_csv(here("smartsparrow", "motor_data", "data2019", "play_B_2019.csv")) %>%
  clean_names() %>%
  select(starts_with("A"), starts_with("B")) %>%
  mutate(group = "group 2", condition = "play", assignment = "A_full_B_pre") 

# add id using nrow 
playA$id <- 1:nrow(playA) 
# add id manually to make it start at the end of fineA
playB$id <- 226:437


play_motor <- rbind(playA, playB) %>%
  select(-a_duplo_apart, -b_duplo_apart) %>%
  select(id, everything()) # drop extra duplo, it makes the pivot complicated

# self

selfA <- read_csv(here("smartsparrow", "motor_data", "data2019", "self_A_2019.csv")) %>%
  clean_names() %>%
  select(starts_with("A"), starts_with("B")) %>%
  mutate(group = "group 1", condition = "self", assignment = "A_pre_B_full")

selfB <- read_csv(here("smartsparrow", "motor_data", "data2019", "self_B_2019.csv")) %>%
  clean_names() %>%
  select(starts_with("A"), starts_with("B")) %>%
  mutate(group = "group 2", condition = "self", assignment = "A_full_B_pre")

# add id using nrow 
selfA$id <- 1:nrow(selfA) 
# add id manually to make it start at the end of fineA
selfB$id <- 211:411

self_motor <- rbind(selfA, selfB) %>%
  select(id, everything())

count how many ratings in each group

tabyl(fine_motor, group)
##    group   n   percent
##  group 1 215 0.5193237
##  group 2 199 0.4806763

make things long

fine_plot <- fine_motor %>%
  pivot_longer(names_to = c("infant", "task"), names_sep = "_", values_to = "rating", 2:5) 

gross_plot <- gross_motor %>%
  pivot_longer(names_to = c("infant", "task"), names_sep = "_", values_to = "rating", 2:5)
## Warning: Expected 2 pieces. Additional pieces discarded in 4 rows [1, 2, 3,
## 4].
play_plot <- play_motor %>%
  pivot_longer(names_to = c("infant", "task"), names_sep = "_", values_to = "rating", 2:7)
## Warning: Expected 2 pieces. Additional pieces discarded in 4 rows [1, 2, 4,
## 6].
self_plot <- self_motor %>%
  pivot_longer(names_to = c("infant", "task"), names_sep = "_", values_to = "rating", 2:5)
## Warning: Expected 2 pieces. Additional pieces discarded in 2 rows [2, 4].

fix factor labels

fine_plot$infant <- factor(fine_plot$infant, levels = c("a", "b"),
                        labels = c("BabyA", "BabyB"))

gross_plot$infant <- factor(gross_plot$infant, levels = c("a", "b"),
                           labels = c("BabyA", "BabyB"))

play_plot$infant <- factor(play_plot$infant, levels = c("a", "b"),
                           labels = c("BabyA", "BabyB"))

self_plot$infant <- factor(self_plot$infant, levels = c("a", "b"),
                           labels = c("BabyA", "BabyB"))

compute summary stats

fine_plot %>%
  group_by(group, infant) %>%
  summarise(mean = mean(rating,  na.rm = TRUE))
## # A tibble: 4 x 3
## # Groups:   group [2]
##   group   infant  mean
##   <chr>   <fct>  <dbl>
## 1 group 1 BabyA   3.03
## 2 group 1 BabyB   2.48
## 3 group 2 BabyA   3.27
## 4 group 2 BabyB   2.39
gross_plot %>%
  group_by(group, infant) %>%
  summarise(mean = mean(rating,  na.rm = TRUE))
## # A tibble: 4 x 3
## # Groups:   group [2]
##   group   infant  mean
##   <chr>   <fct>  <dbl>
## 1 group 1 BabyA   3.19
## 2 group 1 BabyB   3.70
## 3 group 2 BabyA   3.43
## 4 group 2 BabyB   3.55
play_plot %>%
  group_by(group, infant) %>%
  summarise(mean = mean(rating,  na.rm = TRUE))
## # A tibble: 4 x 3
## # Groups:   group [2]
##   group   infant  mean
##   <chr>   <fct>  <dbl>
## 1 group 1 BabyA   2.56
## 2 group 1 BabyB   3.69
## 3 group 2 BabyA   2.91
## 4 group 2 BabyB   3.47
self_plot %>%
  group_by(group, infant) %>%
  summarise(mean = mean(rating,  na.rm = TRUE))
## # A tibble: 4 x 3
## # Groups:   group [2]
##   group   infant  mean
##   <chr>   <fct>  <dbl>
## 1 group 1 BabyA   2.89
## 2 group 1 BabyB   3.81
## 3 group 2 BabyA   3.32
## 4 group 2 BabyB   3.61

Plots

plot fine and gross motor

fplot <- fine_plot %>%
  group_by(group, infant) %>%
  summarise(mean = mean(rating,  na.rm = TRUE)) %>%
  ggplot(aes(x = group, y = mean, fill = group)) +
  geom_col() +
  facet_wrap(~ infant) +
  labs(subtitle = "fine motor ratings") +
  theme(legend.position = "none") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 4))


gplot <- gross_plot %>%
  group_by(group, infant) %>%
  summarise(mean = mean(rating,  na.rm = TRUE)) %>%
  ggplot(aes(x = group, y = mean, fill = group)) +
  geom_col() +
  facet_wrap(~ infant)  +
  labs(subtitle = "gross motor ratings") +
  theme(legend.position = "none") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 4))

#combine fplot and gplot 

patchplot1 <- fplot + gplot +
  plot_annotation(
    title = "group1 thought BabyA was preterm \ngroup2 thought BabyB was preterm") 

Ratings of fine and gross motor skills

#save as png
ggsave(here("smartsparrow", "plots", "patchplot1_2019.png"), width = 8, height = 6, units = c("in"))

plot play and self help

pplot <- play_plot %>%
  group_by(group, infant) %>%
  summarise(mean = mean(rating,  na.rm = TRUE)) %>%
  ggplot(aes(x = group, y = mean, fill = group)) +
  geom_col() +
  facet_wrap(~ infant)  +
  labs(subtitle = "play ratings") +
  theme(legend.position = "none") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 4))


splot <- self_plot %>%
  group_by(group, infant) %>%
  summarise(mean = mean(rating,  na.rm = TRUE)) %>%
  ggplot(aes(x = group, y = mean, fill = group)) +
  geom_col() +
  facet_wrap(~ infant)  +
  labs(subtitle = "self help ratings") +
  theme(legend.position = "none") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 4))
  

#combine pplot and splot 

patchplot2 <- pplot + splot +
  plot_annotation(
    title = "group1 thought BabyA was preterm \ngroup2 thought BabyB was preterm")

Ratings of play and self help skills

# save as png
ggsave(here("smartsparrow", "plots", "patchplot2_2019.png"), width = 8, height = 6, units = c("in"))

Combining datasets for t-test analysis

Combine the plot data in a single df. Create a df that contains averaged scores for each participant for each baby, averaging across task and category. Plot boxplot of individual scores.

all_tasks <- rbind(fine_plot, gross_plot, play_plot, self_plot)

ind_mean <- all_tasks %>%
  group_by(id, group, infant) %>%
  summarise(score = mean(rating)) 

Exploring different ways to look at the data

  1. boxplot
ind_mean %>%
  na.omit() %>%
  ggplot(aes(x = group, y = score, fill = group)) +
  geom_boxplot() +
  facet_wrap(~ infant) +
  labs(title = "Mean locomotor scores for Baby A and Baby B by group", subtitle = "group1 thought BabyA was preterm \ngroup2 thought BabyB was preterm") +
  theme(legend.position = "none") +
  scale_y_continuous(expand = c(0, 0), limits = c(1.5, 4.5)) 

Average across individuals to create column plot

grand_mean <- ind_mean %>%
  na.omit() %>%
  group_by(group, infant) %>%
  summarise(mean_rating = mean(score)) %>%
  arrange(infant)
  
grand_mean %>% ggplot(aes(x = group, y = mean_rating, fill = group)) +
  geom_col() +
  facet_wrap(~ infant) +
  labs(title = "Mean locomotor scores for Baby A and Baby B by group", subtitle = "group1 thought BabyA was preterm \ngroup2 thought BabyB was preterm") +
  theme(legend.position = "none") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 4))

ggsave(here("smartsparrow", "plots", "average_loco_2019.png"), width = 8, height = 6, units = c("in"))

Stats: independent samples t-test

Using jmv package, associated with jamovi software. It makes really nicely formatted test output, kinda like Dani’s lsr package (but the output doesn’t knit into a rmd doc, that is disappointing). Output much simpler than traditional t.test function.

library(jmv) 
## Warning: package 'jmv' was built under R version 3.5.2

Print descriptives by group/infant

kable(grand_mean) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
group infant mean_rating
group 1 BabyA 2.873641
group 2 BabyA 3.185653
group 1 BabyB 3.460710
group 2 BabyB 3.313785

For simplicity, we are going to analyse ratings for Baby A and Baby B separately, testing whether ratings for each baby differ by group

Q1: Did group 1, who thought Baby A was preterm, rate the baby differently than group 2 who thought that baby was full term?

Q2: Did group 2, who thought Baby B was preterm, rate the baby differently than group 1 who thought that baby was full term?

Make separate dataframes for the BabyA and BabyB, make score numeric and group factor. Check sample size per group.

babyA <- ind_mean %>%
  filter(infant == "BabyA")

babyA$score <- as.numeric(babyA$score)
babyA$group <- as.factor(babyA$group)

babyB <- ind_mean %>%
  filter(infant == "BabyB")

babyB$score <- as.numeric(babyB$score)
babyB$group <- as.factor(babyB$group)

tabyl(babyA, group)
##    group   n   percent
##  group 1 225 0.4977876
##  group 2 227 0.5022124
tabyl(babyB, group)
##    group   n   percent
##  group 1 225 0.4977876
##  group 2 227 0.5022124

Independent samples t-test

ttestIS(formula = score ~ group, data = babyA)
## 
##  INDEPENDENT SAMPLES T-TEST
## 
##  Independent Samples T-Test                             
##  ────────────────────────────────────────────────────── 
##                            statistic    df     p        
##  ────────────────────────────────────────────────────── 
##    score    Student's t        -9.10    409    < .001   
##  ──────────────────────────────────────────────────────
ttestIS(formula = score ~ group, data = babyB)
## 
##  INDEPENDENT SAMPLES T-TEST
## 
##  Independent Samples T-Test                             
##  ────────────────────────────────────────────────────── 
##                            statistic    df     p        
##  ────────────────────────────────────────────────────── 
##    score    Student's t       5.25 ᵃ    409    < .001   
##  ────────────────────────────────────────────────────── 
##    ᵃ Levene's test is significant (p < .05),
##    suggesting a violation of the assumption of
##    equal variances