library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(here)
## here() starts at /Users/caoanjie/Desktop/projects/looking_time/adult_analysis
library(stringr)
SIMILARITY_DATA_PATH <- here("data/processed_data/trimmed_similaritydata.csv")
COMPLEXITY_DATA_PATH <- here("data/processed_data/trimmed_complexitydata.csv")
df.similarity <- read_csv(SIMILARITY_DATA_PATH)
## Parsed with column specification:
## cols(
##   subject = col_character(),
##   question_type = col_character(),
##   stimulus_left = col_character(),
##   stimulus_right = col_character(),
##   rating = col_double()
## )
df.complexity <- read_csv(COMPLEXITY_DATA_PATH)
## Parsed with column specification:
## cols(
##   subject = col_character(),
##   question_type = col_character(),
##   stimulus = col_character(),
##   rating = col_double(),
##   complexity = col_character()
## )

Similarity ratings

data preprocessing

just to make sure the average ratings for similar pairs are higher than dissimilar pairs

df.similarity <- df.similarity %>% 
  mutate(
    complexity = case_when(
      grepl("complex", stimulus_left) | grepl("complex", stimulus_right) ~ "complex", 
      grepl("simple", stimulus_right) | grepl("simple", stimulus_right) ~ "simple"
    ), 
    stimulus_left_number = as.numeric(str_extract(stimulus_left, "[[:digit:]]+")), 
    stimulus_right_number = as.numeric(str_extract(stimulus_right, "[[:digit:]]+")), 
    similarity = case_when(
      stimulus_left_number == stimulus_right_number ~ "similar", 
      TRUE ~ "dissimilar"
    )
  )

visualization

raw

df.similarity %>% 
  ggplot(aes(x = rating))+ 
  geom_histogram(bins = 30)

by complexity block

df.similarity %>% 
  ggplot(aes(x = similarity, y= rating)) + 
  geom_point(alpha = 0.1, 
             position = position_jitter(width = 0.3)) + 
   stat_summary(fun.data = "mean_cl_boot") + 
  facet_wrap(~complexity)

formal testing

lm.similarity <- lm(formula = rating ~ similarity + complexity, 
                    data = df.similarity)
summary(lm.similarity)
## 
## Call:
## lm(formula = rating ~ similarity + complexity, data = df.similarity)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5855 -1.0683 -0.0683  0.9317  3.9317 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        2.06826    0.04425  46.738   <2e-16 ***
## similaritysimilar  2.49707    0.05085  49.108   <2e-16 ***
## complexitysimple   0.02019    0.05084   0.397    0.691    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.442 on 3217 degrees of freedom
## Multiple R-squared:  0.4285, Adjusted R-squared:  0.4281 
## F-statistic:  1206 on 2 and 3217 DF,  p-value: < 2.2e-16

Complexity:

data preprocessing

df.complexity <- df.complexity %>% 
  mutate(
    complexity = case_when(
      grepl("complex", stimulus) ~ "complex", 
      grepl("simple", stimulus) ~ "simple"
    )
  )

visualization

raw ratings

df.complexity %>% 
  ggplot(aes(x = rating))+ 
  geom_histogram(bins = 30)

plotting

df.complexity %>% 
  ggplot(aes(x = complexity, y = rating)) + 
  geom_point(alpha = 0.1, 
             position = position_jitter(width = 0.3)) + 
   stat_summary(fun.data = "mean_cl_boot") 

formal testing

lm.complexity <- lm(formula = rating ~ complexity, 
                    data = df.complexity)
summary(lm.complexity)
## 
## Call:
## lm(formula = rating ~ complexity, data = df.complexity)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.3649 -1.3649 -0.3649  0.6351  3.6291 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.36488    0.03627  120.35   <2e-16 ***
## complexitysimple -1.99400    0.05128  -38.88   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.453 on 3211 degrees of freedom
## Multiple R-squared:  0.3201, Adjusted R-squared:  0.3199 
## F-statistic:  1512 on 1 and 3211 DF,  p-value: < 2.2e-16