Additive models
lm(human_rating_mean ~ mahalanobis.y, data = log_tidy_df) %>%
summary()
##
## Call:
## lm(formula = human_rating_mean ~ mahalanobis.y, data = log_tidy_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5610 -0.8922 0.0901 0.9282 2.5470
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.5756 0.7161 -0.804 0.422
## mahalanobis.y 8.7511 1.1312 7.736 8.52e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.214 on 398 degrees of freedom
## Multiple R-squared: 0.1307, Adjusted R-squared: 0.1285
## F-statistic: 59.84 on 1 and 398 DF, p-value: 8.519e-14
lm(human_rating_mean ~ avg_hausdorff.x + mahalanobis.y, data = log_tidy_df) %>%
summary()
##
## Call:
## lm(formula = human_rating_mean ~ avg_hausdorff.x + mahalanobis.y,
## data = log_tidy_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.97500 -0.79438 0.03128 0.84820 2.81598
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1310 0.6816 0.192 0.848
## avg_hausdorff.x 0.7359 0.1024 7.187 3.31e-12 ***
## mahalanobis.y 7.2684 1.0853 6.697 7.27e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.144 on 397 degrees of freedom
## Multiple R-squared: 0.2308, Adjusted R-squared: 0.2269
## F-statistic: 59.55 on 2 and 397 DF, p-value: < 2.2e-16
m2 <- lm(human_rating_mean ~ mahalanobis.y + avg_hausdorff.x+ euclidean, data = log_tidy_df)
m2 %>%
summary()
##
## Call:
## lm(formula = human_rating_mean ~ mahalanobis.y + avg_hausdorff.x +
## euclidean, data = log_tidy_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.79754 -0.80321 0.02951 0.88274 2.66284
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.46915 0.86626 2.850 0.0046 **
## mahalanobis.y 7.13799 1.06327 6.713 6.62e-11 ***
## avg_hausdorff.x 1.07160 0.12783 8.383 9.13e-16 ***
## euclidean -0.01863 0.00440 -4.235 2.85e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.12 on 396 degrees of freedom
## Multiple R-squared: 0.2641, Adjusted R-squared: 0.2585
## F-statistic: 47.37 on 3 and 396 DF, p-value: < 2.2e-16
m3 <- lm(human_rating_mean ~ mahalanobis.y + avg_hausdorff.x+ euclidean + first_three, data = log_tidy_df)
m3 %>%
summary()
##
## Call:
## lm(formula = human_rating_mean ~ mahalanobis.y + avg_hausdorff.x +
## euclidean + first_three, data = log_tidy_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7255 -0.8031 0.0233 0.8688 2.6868
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.424287 0.864692 2.804 0.0053 **
## mahalanobis.y 6.960192 1.066108 6.529 2.04e-10 ***
## avg_hausdorff.x 0.977410 0.139327 7.015 1.00e-11 ***
## euclidean -0.019468 0.004418 -4.406 1.36e-05 ***
## first_three 0.002817 0.001678 1.679 0.0939 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.118 on 395 degrees of freedom
## Multiple R-squared: 0.2693, Adjusted R-squared: 0.2619
## F-statistic: 36.4 on 4 and 395 DF, p-value: < 2.2e-16
m4 <- lm(human_rating_mean ~ mahalanobis.y + avg_hausdorff.x+ euclidean + three_longest, data = log_tidy_df)
m4 %>%
summary()
##
## Call:
## lm(formula = human_rating_mean ~ mahalanobis.y + avg_hausdorff.x +
## euclidean + three_longest, data = log_tidy_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.70471 -0.80269 0.03677 0.86377 2.72431
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.351767 0.865930 2.716 0.0069 **
## mahalanobis.y 6.764822 1.078969 6.270 9.49e-10 ***
## avg_hausdorff.x 0.969963 0.138736 6.991 1.17e-11 ***
## euclidean -0.018107 0.004396 -4.119 4.64e-05 ***
## three_longest 0.002795 0.001508 1.853 0.0646 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.117 on 395 degrees of freedom
## Multiple R-squared: 0.2704, Adjusted R-squared: 0.2631
## F-statistic: 36.61 on 4 and 395 DF, p-value: < 2.2e-16
data_with_predictions <- log_tidy_df %>%
add_predictions(m3, var = "m2") %>%
add_predictions(m3, var = "m3") %>%
add_predictions(m4, var = "m4")
cor.test(data_with_predictions$m2, data_with_predictions$human_rating_mean)
##
## Pearson's product-moment correlation
##
## data: data_with_predictions$m2 and data_with_predictions$human_rating_mean
## t = 12.112, df = 398, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4434759 0.5871366
## sample estimates:
## cor
## 0.5189614
cor.test(data_with_predictions$m3, data_with_predictions$human_rating_mean)
##
## Pearson's product-moment correlation
##
## data: data_with_predictions$m3 and data_with_predictions$human_rating_mean
## t = 12.112, df = 398, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4434759 0.5871366
## sample estimates:
## cor
## 0.5189614
cor.test(data_with_predictions$m4, data_with_predictions$human_rating_mean)
##
## Pearson's product-moment correlation
##
## data: data_with_predictions$m4 and data_with_predictions$human_rating_mean
## t = 12.147, df = 398, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4446702 0.5881105
## sample estimates:
## cor
## 0.5200475
ggplot(data_with_predictions, aes(x = m4, y = human_rating_mean)) +
geom_point(aes(color = category)) +
geom_smooth(method = "lm") +
theme_classic()