Linear Models for Scoring APIs
# Perspective ---------------------------------------------------------------------
# Perspective score
perspective_model <- lm(perspective_ME_score ~ BI_non_white + BI_lgbt_related +
BI_non_christian + BI_men + BI_christian + BI_white +
BI_straight + BI_disability + BI_women, data = gdf)
summary(perspective_model)
##
## Call:
## lm(formula = perspective_ME_score ~ BI_non_white + BI_lgbt_related +
## BI_non_christian + BI_men + BI_christian + BI_white + BI_straight +
## BI_disability + BI_women, data = gdf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31612 -0.07239 -0.03143 0.03762 0.85466
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1017139 0.0007087 143.513 < 2e-16 ***
## BI_non_whiteTRUE 0.0359028 0.0031455 11.414 < 2e-16 ***
## BI_lgbt_relatedTRUE 0.0208087 0.0028082 7.410 1.3e-13 ***
## BI_non_christianTRUE 0.0405044 0.0047953 8.447 < 2e-16 ***
## BI_menTRUE 0.1250739 0.0036967 33.834 < 2e-16 ***
## BI_christianTRUE -0.0062764 0.0035938 -1.746 0.080744 .
## BI_whiteTRUE 0.0202685 0.0060285 3.362 0.000775 ***
## BI_straightTRUE -0.0037469 0.0032769 -1.143 0.252865
## BI_disabilityTRUE 0.0001235 0.0039854 0.031 0.975271
## BI_womenTRUE 0.0244237 0.0021173 11.535 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1097 on 30570 degrees of freedom
## (307 observations deleted due to missingness)
## Multiple R-squared: 0.06454, Adjusted R-squared: 0.06426
## F-statistic: 234.3 on 9 and 30570 DF, p-value: < 2.2e-16
# Perspective summary score
perspective_summary_model <- lm(perspective_ME_summary ~ BI_non_white + BI_lgbt_related +
BI_non_christian + BI_men + BI_christian + BI_white +
BI_straight + BI_disability + BI_women, data = gdf)
summary(perspective_summary_model)
##
## Call:
## lm(formula = perspective_ME_summary ~ BI_non_white + BI_lgbt_related +
## BI_non_christian + BI_men + BI_christian + BI_white + BI_straight +
## BI_disability + BI_women, data = gdf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31612 -0.07239 -0.03143 0.03762 0.85466
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1017139 0.0007087 143.513 < 2e-16 ***
## BI_non_whiteTRUE 0.0359028 0.0031455 11.414 < 2e-16 ***
## BI_lgbt_relatedTRUE 0.0208087 0.0028082 7.410 1.3e-13 ***
## BI_non_christianTRUE 0.0405044 0.0047953 8.447 < 2e-16 ***
## BI_menTRUE 0.1250739 0.0036967 33.834 < 2e-16 ***
## BI_christianTRUE -0.0062764 0.0035938 -1.746 0.080744 .
## BI_whiteTRUE 0.0202685 0.0060285 3.362 0.000775 ***
## BI_straightTRUE -0.0037469 0.0032769 -1.143 0.252865
## BI_disabilityTRUE 0.0001235 0.0039854 0.031 0.975271
## BI_womenTRUE 0.0244237 0.0021173 11.535 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1097 on 30570 degrees of freedom
## (307 observations deleted due to missingness)
## Multiple R-squared: 0.06454, Adjusted R-squared: 0.06426
## F-statistic: 234.3 on 9 and 30570 DF, p-value: < 2.2e-16
# Google ---------------------------------------------------------------------
google_toxic_model <- lm(Google_toxic ~ BI_non_white + BI_lgbt_related +
BI_non_christian + BI_men + BI_christian + BI_white +
BI_straight + BI_disability + BI_women, data = gdf)
summary(google_toxic_model)
##
## Call:
## lm(formula = Google_toxic ~ BI_non_white + BI_lgbt_related +
## BI_non_christian + BI_men + BI_christian + BI_white + BI_straight +
## BI_disability + BI_women, data = gdf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28445 -0.02837 -0.02627 -0.00266 0.93059
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0429880 0.0006121 70.233 < 2e-16 ***
## BI_non_whiteTRUE 0.0298926 0.0026720 11.187 < 2e-16 ***
## BI_lgbt_relatedTRUE 0.0245620 0.0024122 10.182 < 2e-16 ***
## BI_non_christianTRUE 0.0538060 0.0041131 13.082 < 2e-16 ***
## BI_menTRUE 0.1584046 0.0031670 50.017 < 2e-16 ***
## BI_christianTRUE -0.0113409 0.0030873 -3.673 0.000240 ***
## BI_whiteTRUE 0.0206221 0.0051049 4.040 5.37e-05 ***
## BI_straightTRUE -0.0109018 0.0028145 -3.873 0.000108 ***
## BI_disabilityTRUE -0.0111559 0.0034121 -3.270 0.001078 **
## BI_womenTRUE 0.0276577 0.0018028 15.342 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09505 on 30877 degrees of freedom
## Multiple R-squared: 0.1186, Adjusted R-squared: 0.1183
## F-statistic: 461.6 on 9 and 30877 DF, p-value: < 2.2e-16
# OpenAI ---------------------------------------------------------------------
openai_model <- lm(OpenAI_ME_bool ~ BI_non_white + BI_lgbt_related +
BI_non_christian + BI_men + BI_christian + BI_white +
BI_straight + BI_disability + BI_women, data = gdf)
summary(openai_model)
##
## Call:
## lm(formula = OpenAI_ME_bool ~ BI_non_white + BI_lgbt_related +
## BI_non_christian + BI_men + BI_christian + BI_white + BI_straight +
## BI_disability + BI_women, data = gdf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.62929 -0.03587 -0.03587 -0.03587 0.97281
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.035866 0.001599 22.433 < 2e-16 ***
## BI_non_whiteTRUE 0.112991 0.006979 16.189 < 2e-16 ***
## BI_lgbt_relatedTRUE 0.074886 0.006301 11.885 < 2e-16 ***
## BI_non_christianTRUE 0.126179 0.010743 11.745 < 2e-16 ***
## BI_menTRUE 0.223721 0.008272 27.044 < 2e-16 ***
## BI_christianTRUE -0.005077 0.008064 -0.630 0.529
## BI_whiteTRUE 0.068562 0.013334 5.142 2.74e-07 ***
## BI_straightTRUE -0.003596 0.007352 -0.489 0.625
## BI_disabilityTRUE 0.107917 0.008912 12.109 < 2e-16 ***
## BI_womenTRUE 0.148791 0.004709 31.598 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2483 on 30877 degrees of freedom
## Multiple R-squared: 0.1098, Adjusted R-squared: 0.1096
## F-statistic: 423.2 on 9 and 30877 DF, p-value: < 2.2e-16
# Anthropic ---------------------------------------------------------------------
anthropic_model <- lm(Anthropic_ME_bool ~ BI_non_white + BI_lgbt_related +
BI_non_christian + BI_men + BI_christian + BI_white +
BI_straight + BI_disability + BI_women, data = gdf)
summary(anthropic_model)
##
## Call:
## lm(formula = Anthropic_ME_bool ~ BI_non_white + BI_lgbt_related +
## BI_non_christian + BI_men + BI_christian + BI_white + BI_straight +
## BI_disability + BI_women, data = gdf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6837 -0.1123 -0.1123 -0.1123 0.9374
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.112304 0.002199 51.063 < 2e-16 ***
## BI_non_whiteTRUE 0.090908 0.009601 9.468 < 2e-16 ***
## BI_lgbt_relatedTRUE 0.068136 0.008668 7.861 3.93e-15 ***
## BI_non_christianTRUE 0.143590 0.014779 9.716 < 2e-16 ***
## BI_menTRUE 0.258339 0.011380 22.702 < 2e-16 ***
## BI_christianTRUE 0.002781 0.011093 0.251 0.8021
## BI_whiteTRUE 0.037939 0.018343 2.068 0.0386 *
## BI_straightTRUE -0.049730 0.010113 -4.917 8.81e-07 ***
## BI_disabilityTRUE 0.070541 0.012260 5.754 8.82e-09 ***
## BI_womenTRUE 0.101340 0.006478 15.644 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3415 on 30877 degrees of freedom
## Multiple R-squared: 0.0485, Adjusted R-squared: 0.04823
## F-statistic: 174.9 on 9 and 30877 DF, p-value: < 2.2e-16
# OctoAI ---------------------------------------------------------------------
octo_model <- lm(OctoAI_ME_bool ~ BI_non_white + BI_lgbt_related +
BI_non_christian + BI_men + BI_christian + BI_white +
BI_straight + BI_disability + BI_women, data = gdf)
summary(octo_model)
##
## Call:
## lm(formula = OctoAI_ME_bool ~ BI_non_white + BI_lgbt_related +
## BI_non_christian + BI_men + BI_christian + BI_white + BI_straight +
## BI_disability + BI_women, data = gdf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41282 -0.07761 -0.07761 -0.07761 0.99249
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.077609 0.001776 43.694 < 2e-16 ***
## BI_non_whiteTRUE 0.005934 0.007754 0.765 0.44408
## BI_lgbt_relatedTRUE 0.027080 0.007000 3.869 0.00011 ***
## BI_non_christianTRUE 0.088387 0.011935 7.406 1.34e-13 ***
## BI_menTRUE 0.224727 0.009190 24.454 < 2e-16 ***
## BI_christianTRUE -0.016650 0.008963 -1.858 0.06323 .
## BI_whiteTRUE -0.019405 0.014813 -1.310 0.19022
## BI_straightTRUE -0.012390 0.008167 -1.517 0.12925
## BI_disabilityTRUE -0.045720 0.009901 -4.618 3.90e-06 ***
## BI_womenTRUE -0.004978 0.005231 -0.952 0.34130
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2758 on 30874 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.02349, Adjusted R-squared: 0.02321
## F-statistic: 82.52 on 9 and 30874 DF, p-value: < 2.2e-16
Models for True/False APIs
# OpenAI ---------------------------------------------------------------------
openai_model <- glm(OpenAI_match ~ BI_non_white + BI_lgbt_related +
BI_non_christian + BI_men + BI_christian + BI_white +
BI_straight + BI_disability + BI_women, family = binomial, data = gdf)
summary(openai_model)
##
## Call:
## glm(formula = OpenAI_match ~ BI_non_white + BI_lgbt_related +
## BI_non_christian + BI_men + BI_christian + BI_white + BI_straight +
## BI_disability + BI_women, family = binomial, data = gdf)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.64168 0.01728 94.978 < 2e-16 ***
## BI_non_whiteTRUE -0.38618 0.06441 -5.995 2.03e-09 ***
## BI_lgbt_relatedTRUE -0.98152 0.05350 -18.346 < 2e-16 ***
## BI_non_christianTRUE -0.45982 0.09694 -4.744 2.10e-06 ***
## BI_menTRUE 0.23723 0.08484 2.796 0.005169 **
## BI_christianTRUE -0.24784 0.07725 -3.208 0.001336 **
## BI_whiteTRUE -0.43921 0.11806 -3.720 0.000199 ***
## BI_straightTRUE -0.26761 0.06898 -3.879 0.000105 ***
## BI_disabilityTRUE -0.05722 0.08742 -0.655 0.512741
## BI_womenTRUE -0.42788 0.04439 -9.639 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 29830 on 30886 degrees of freedom
## Residual deviance: 29215 on 30877 degrees of freedom
## AIC: 29235
##
## Number of Fisher Scoring iterations: 4
# Anthropic ---------------------------------------------------------------------
anthropic_model <- glm(Anthropic_match ~ BI_non_white + BI_lgbt_related +
BI_non_christian + BI_men + BI_christian + BI_white +
BI_straight + BI_disability + BI_women, family = binomial, data = gdf)
summary(anthropic_model)
##
## Call:
## glm(formula = Anthropic_match ~ BI_non_white + BI_lgbt_related +
## BI_non_christian + BI_men + BI_christian + BI_white + BI_straight +
## BI_disability + BI_women, family = binomial, data = gdf)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.39727 0.01604 87.131 < 2e-16 ***
## BI_non_whiteTRUE -0.22801 0.06358 -3.586 0.000336 ***
## BI_lgbt_relatedTRUE -0.80477 0.05298 -15.192 < 2e-16 ***
## BI_non_christianTRUE -0.20179 0.09764 -2.067 0.038771 *
## BI_menTRUE -0.05810 0.07675 -0.757 0.449054
## BI_christianTRUE -0.18345 0.07459 -2.459 0.013917 *
## BI_whiteTRUE -0.36530 0.11601 -3.149 0.001639 **
## BI_straightTRUE -0.14235 0.06778 -2.100 0.035711 *
## BI_disabilityTRUE -0.19799 0.08126 -2.437 0.014829 *
## BI_womenTRUE -0.24842 0.04339 -5.725 1.03e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 32450 on 30886 degrees of freedom
## Residual deviance: 32085 on 30877 degrees of freedom
## AIC: 32105
##
## Number of Fisher Scoring iterations: 4
# OctoAI ---------------------------------------------------------------------
octo_model <- glm(OctoAI_match ~ BI_non_white + BI_lgbt_related +
BI_non_christian + BI_men + BI_christian + BI_white +
BI_straight + BI_disability + BI_women, family = binomial, data = gdf)
summary(octo_model)
##
## Call:
## glm(formula = OctoAI_match ~ BI_non_white + BI_lgbt_related +
## BI_non_christian + BI_men + BI_christian + BI_white + BI_straight +
## BI_disability + BI_women, family = binomial, data = gdf)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.21835 0.01526 79.833 < 2e-16 ***
## BI_non_whiteTRUE -0.39404 0.05987 -6.582 4.66e-11 ***
## BI_lgbt_relatedTRUE -0.88534 0.05199 -17.030 < 2e-16 ***
## BI_non_christianTRUE -0.53651 0.09058 -5.923 3.16e-09 ***
## BI_menTRUE 0.60960 0.08287 7.356 1.90e-13 ***
## BI_christianTRUE -0.14368 0.07239 -1.985 0.0472 *
## BI_whiteTRUE -0.54104 0.11118 -4.866 1.14e-06 ***
## BI_straightTRUE 0.03101 0.06698 0.463 0.6433
## BI_disabilityTRUE -0.36459 0.07636 -4.774 1.80e-06 ***
## BI_womenTRUE -0.45794 0.04088 -11.201 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 35103 on 30883 degrees of freedom
## Residual deviance: 34435 on 30874 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 34455
##
## Number of Fisher Scoring iterations: 4