Load data
DISTANCE_DF_PATH <- here("data/processed/human_data/conceptviz_1_by_item_data_with_hausdorff_r.csv")
distance_data <- read_csv(DISTANCE_DF_PATH)
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## pair_id = col_double(),
## category = col_character(),
## drawing_key_id_1 = col_double(),
## drawing_key_id_2 = col_double(),
## ci_lower_human = col_double(),
## ci_upper_human = col_double(),
## human_rating_mean = col_double(),
## n_participants = col_double(),
## hausdorff = col_double()
## )
Plot data
distance_data_with_log <- distance_data %>%
mutate(log_hausdorff = log(hausdorff))
ggplot(distance_data_with_log, aes(x = log_hausdorff, y = human_rating_mean)) +
geom_point() +
geom_linerange(aes(ymin = ci_lower_human, ymax = ci_upper_human)) +
geom_smooth(method = "lm") +
theme_classic()

Fit models
cor.test(distance_data_with_log$log_hausdorff,
distance_data_with_log$human_rating_mean)
##
## Pearson's product-moment correlation
##
## data: distance_data_with_log$log_hausdorff and distance_data_with_log$human_rating_mean
## t = 6.8688, df = 398, p-value = 2.504e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2349938 0.4104932
## sample estimates:
## cor
## 0.3255445
lm(human_rating_mean ~ log_hausdorff, distance_data_with_log) %>%
summary()
##
## Call:
## lm(formula = human_rating_mean ~ log_hausdorff, data = distance_data_with_log)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0457 -0.8543 0.1510 0.9983 2.5035
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2905 0.6803 0.427 0.67
## log_hausdorff 1.0438 0.1520 6.869 2.5e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.232 on 398 degrees of freedom
## Multiple R-squared: 0.106, Adjusted R-squared: 0.1037
## F-statistic: 47.18 on 1 and 398 DF, p-value: 2.504e-11