Load data

DISTANCE_DF_PATH <- here("data/processed/human_data/conceptviz_1_by_item_data_with_hausdorff_r.csv")

distance_data <- read_csv(DISTANCE_DF_PATH)
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   pair_id = col_double(),
##   category = col_character(),
##   drawing_key_id_1 = col_double(),
##   drawing_key_id_2 = col_double(),
##   ci_lower_human = col_double(),
##   ci_upper_human = col_double(),
##   human_rating_mean = col_double(),
##   n_participants = col_double(),
##   hausdorff = col_double()
## )

Plot data

distance_data_with_log <- distance_data %>%
  mutate(log_hausdorff = log(hausdorff))

ggplot(distance_data_with_log, aes(x = log_hausdorff, y = human_rating_mean)) +
  geom_point() +
  geom_linerange(aes(ymin = ci_lower_human, ymax = ci_upper_human)) +
  geom_smooth(method = "lm") +
  theme_classic() 

Fit models

cor.test(distance_data_with_log$log_hausdorff, 
         distance_data_with_log$human_rating_mean)
## 
##  Pearson's product-moment correlation
## 
## data:  distance_data_with_log$log_hausdorff and distance_data_with_log$human_rating_mean
## t = 6.8688, df = 398, p-value = 2.504e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2349938 0.4104932
## sample estimates:
##       cor 
## 0.3255445
lm(human_rating_mean ~ log_hausdorff, distance_data_with_log) %>%
  summary()
## 
## Call:
## lm(formula = human_rating_mean ~ log_hausdorff, data = distance_data_with_log)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0457 -0.8543  0.1510  0.9983  2.5035 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.2905     0.6803   0.427     0.67    
## log_hausdorff   1.0438     0.1520   6.869  2.5e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.232 on 398 degrees of freedom
## Multiple R-squared:  0.106,  Adjusted R-squared:  0.1037 
## F-statistic: 47.18 on 1 and 398 DF,  p-value: 2.504e-11