content-mod-analysis

Linear Models for Scoring APIs

# Perspective ---------------------------------------------------------------------
# Perspective score
perspective_model <- lm(perspective_ME_score ~ BI_non_white + BI_lgbt_related + 
                          BI_non_christian + BI_men + BI_christian + BI_white +
                          BI_straight + BI_disability + BI_women, data = gdf)
summary(perspective_model)

## 
## Call:
## lm(formula = perspective_ME_score ~ BI_non_white + BI_lgbt_related + 
##     BI_non_christian + BI_men + BI_christian + BI_white + BI_straight + 
##     BI_disability + BI_women, data = gdf)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31612 -0.07239 -0.03143  0.03762  0.85466 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           0.1017139  0.0007087 143.513  < 2e-16 ***
## BI_non_whiteTRUE      0.0359028  0.0031455  11.414  < 2e-16 ***
## BI_lgbt_relatedTRUE   0.0208087  0.0028082   7.410  1.3e-13 ***
## BI_non_christianTRUE  0.0405044  0.0047953   8.447  < 2e-16 ***
## BI_menTRUE            0.1250739  0.0036967  33.834  < 2e-16 ***
## BI_christianTRUE     -0.0062764  0.0035938  -1.746 0.080744 .  
## BI_whiteTRUE          0.0202685  0.0060285   3.362 0.000775 ***
## BI_straightTRUE      -0.0037469  0.0032769  -1.143 0.252865    
## BI_disabilityTRUE     0.0001235  0.0039854   0.031 0.975271    
## BI_womenTRUE          0.0244237  0.0021173  11.535  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1097 on 30570 degrees of freedom
##   (307 observations deleted due to missingness)
## Multiple R-squared:  0.06454,    Adjusted R-squared:  0.06426 
## F-statistic: 234.3 on 9 and 30570 DF,  p-value: < 2.2e-16

# Perspective summary score
perspective_summary_model <- lm(perspective_ME_summary ~ BI_non_white + BI_lgbt_related + 
                          BI_non_christian + BI_men + BI_christian + BI_white +
                          BI_straight + BI_disability + BI_women, data = gdf)
summary(perspective_summary_model)

## 
## Call:
## lm(formula = perspective_ME_summary ~ BI_non_white + BI_lgbt_related + 
##     BI_non_christian + BI_men + BI_christian + BI_white + BI_straight + 
##     BI_disability + BI_women, data = gdf)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31612 -0.07239 -0.03143  0.03762  0.85466 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           0.1017139  0.0007087 143.513  < 2e-16 ***
## BI_non_whiteTRUE      0.0359028  0.0031455  11.414  < 2e-16 ***
## BI_lgbt_relatedTRUE   0.0208087  0.0028082   7.410  1.3e-13 ***
## BI_non_christianTRUE  0.0405044  0.0047953   8.447  < 2e-16 ***
## BI_menTRUE            0.1250739  0.0036967  33.834  < 2e-16 ***
## BI_christianTRUE     -0.0062764  0.0035938  -1.746 0.080744 .  
## BI_whiteTRUE          0.0202685  0.0060285   3.362 0.000775 ***
## BI_straightTRUE      -0.0037469  0.0032769  -1.143 0.252865    
## BI_disabilityTRUE     0.0001235  0.0039854   0.031 0.975271    
## BI_womenTRUE          0.0244237  0.0021173  11.535  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1097 on 30570 degrees of freedom
##   (307 observations deleted due to missingness)
## Multiple R-squared:  0.06454,    Adjusted R-squared:  0.06426 
## F-statistic: 234.3 on 9 and 30570 DF,  p-value: < 2.2e-16

# Google ---------------------------------------------------------------------
google_toxic_model <- lm(Google_toxic ~ BI_non_white + BI_lgbt_related + 
                                  BI_non_christian + BI_men + BI_christian + BI_white +
                                  BI_straight + BI_disability + BI_women, data = gdf)
summary(google_toxic_model)

## 
## Call:
## lm(formula = Google_toxic ~ BI_non_white + BI_lgbt_related + 
##     BI_non_christian + BI_men + BI_christian + BI_white + BI_straight + 
##     BI_disability + BI_women, data = gdf)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28445 -0.02837 -0.02627 -0.00266  0.93059 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           0.0429880  0.0006121  70.233  < 2e-16 ***
## BI_non_whiteTRUE      0.0298926  0.0026720  11.187  < 2e-16 ***
## BI_lgbt_relatedTRUE   0.0245620  0.0024122  10.182  < 2e-16 ***
## BI_non_christianTRUE  0.0538060  0.0041131  13.082  < 2e-16 ***
## BI_menTRUE            0.1584046  0.0031670  50.017  < 2e-16 ***
## BI_christianTRUE     -0.0113409  0.0030873  -3.673 0.000240 ***
## BI_whiteTRUE          0.0206221  0.0051049   4.040 5.37e-05 ***
## BI_straightTRUE      -0.0109018  0.0028145  -3.873 0.000108 ***
## BI_disabilityTRUE    -0.0111559  0.0034121  -3.270 0.001078 ** 
## BI_womenTRUE          0.0276577  0.0018028  15.342  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09505 on 30877 degrees of freedom
## Multiple R-squared:  0.1186, Adjusted R-squared:  0.1183 
## F-statistic: 461.6 on 9 and 30877 DF,  p-value: < 2.2e-16

# OpenAI ---------------------------------------------------------------------
openai_model <- lm(OpenAI_ME_bool ~ BI_non_white + BI_lgbt_related + 
                           BI_non_christian + BI_men + BI_christian + BI_white +
                           BI_straight + BI_disability + BI_women, data = gdf)
summary(openai_model)

## 
## Call:
## lm(formula = OpenAI_ME_bool ~ BI_non_white + BI_lgbt_related + 
##     BI_non_christian + BI_men + BI_christian + BI_white + BI_straight + 
##     BI_disability + BI_women, data = gdf)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.62929 -0.03587 -0.03587 -0.03587  0.97281 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           0.035866   0.001599  22.433  < 2e-16 ***
## BI_non_whiteTRUE      0.112991   0.006979  16.189  < 2e-16 ***
## BI_lgbt_relatedTRUE   0.074886   0.006301  11.885  < 2e-16 ***
## BI_non_christianTRUE  0.126179   0.010743  11.745  < 2e-16 ***
## BI_menTRUE            0.223721   0.008272  27.044  < 2e-16 ***
## BI_christianTRUE     -0.005077   0.008064  -0.630    0.529    
## BI_whiteTRUE          0.068562   0.013334   5.142 2.74e-07 ***
## BI_straightTRUE      -0.003596   0.007352  -0.489    0.625    
## BI_disabilityTRUE     0.107917   0.008912  12.109  < 2e-16 ***
## BI_womenTRUE          0.148791   0.004709  31.598  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2483 on 30877 degrees of freedom
## Multiple R-squared:  0.1098, Adjusted R-squared:  0.1096 
## F-statistic: 423.2 on 9 and 30877 DF,  p-value: < 2.2e-16

# Anthropic ---------------------------------------------------------------------
anthropic_model <- lm(Anthropic_ME_bool ~ BI_non_white + BI_lgbt_related + 
                     BI_non_christian + BI_men + BI_christian + BI_white +
                     BI_straight + BI_disability + BI_women, data = gdf)
summary(anthropic_model)

## 
## Call:
## lm(formula = Anthropic_ME_bool ~ BI_non_white + BI_lgbt_related + 
##     BI_non_christian + BI_men + BI_christian + BI_white + BI_straight + 
##     BI_disability + BI_women, data = gdf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6837 -0.1123 -0.1123 -0.1123  0.9374 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           0.112304   0.002199  51.063  < 2e-16 ***
## BI_non_whiteTRUE      0.090908   0.009601   9.468  < 2e-16 ***
## BI_lgbt_relatedTRUE   0.068136   0.008668   7.861 3.93e-15 ***
## BI_non_christianTRUE  0.143590   0.014779   9.716  < 2e-16 ***
## BI_menTRUE            0.258339   0.011380  22.702  < 2e-16 ***
## BI_christianTRUE      0.002781   0.011093   0.251   0.8021    
## BI_whiteTRUE          0.037939   0.018343   2.068   0.0386 *  
## BI_straightTRUE      -0.049730   0.010113  -4.917 8.81e-07 ***
## BI_disabilityTRUE     0.070541   0.012260   5.754 8.82e-09 ***
## BI_womenTRUE          0.101340   0.006478  15.644  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3415 on 30877 degrees of freedom
## Multiple R-squared:  0.0485, Adjusted R-squared:  0.04823 
## F-statistic: 174.9 on 9 and 30877 DF,  p-value: < 2.2e-16

# OctoAI ---------------------------------------------------------------------
octo_model <- lm(OctoAI_ME_bool ~ BI_non_white + BI_lgbt_related + 
                        BI_non_christian + BI_men + BI_christian + BI_white +
                        BI_straight + BI_disability + BI_women, data = gdf)
summary(octo_model)

## 
## Call:
## lm(formula = OctoAI_ME_bool ~ BI_non_white + BI_lgbt_related + 
##     BI_non_christian + BI_men + BI_christian + BI_white + BI_straight + 
##     BI_disability + BI_women, data = gdf)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41282 -0.07761 -0.07761 -0.07761  0.99249 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           0.077609   0.001776  43.694  < 2e-16 ***
## BI_non_whiteTRUE      0.005934   0.007754   0.765  0.44408    
## BI_lgbt_relatedTRUE   0.027080   0.007000   3.869  0.00011 ***
## BI_non_christianTRUE  0.088387   0.011935   7.406 1.34e-13 ***
## BI_menTRUE            0.224727   0.009190  24.454  < 2e-16 ***
## BI_christianTRUE     -0.016650   0.008963  -1.858  0.06323 .  
## BI_whiteTRUE         -0.019405   0.014813  -1.310  0.19022    
## BI_straightTRUE      -0.012390   0.008167  -1.517  0.12925    
## BI_disabilityTRUE    -0.045720   0.009901  -4.618 3.90e-06 ***
## BI_womenTRUE         -0.004978   0.005231  -0.952  0.34130    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2758 on 30874 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.02349,    Adjusted R-squared:  0.02321 
## F-statistic: 82.52 on 9 and 30874 DF,  p-value: < 2.2e-16

Models for True/False APIs

# OpenAI ---------------------------------------------------------------------
openai_model <- glm(OpenAI_match ~ BI_non_white + BI_lgbt_related + 
                           BI_non_christian + BI_men + BI_christian + BI_white +
                           BI_straight + BI_disability + BI_women, family = binomial, data = gdf)
summary(openai_model)

## 
## Call:
## glm(formula = OpenAI_match ~ BI_non_white + BI_lgbt_related + 
##     BI_non_christian + BI_men + BI_christian + BI_white + BI_straight + 
##     BI_disability + BI_women, family = binomial, data = gdf)
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           1.64168    0.01728  94.978  < 2e-16 ***
## BI_non_whiteTRUE     -0.38618    0.06441  -5.995 2.03e-09 ***
## BI_lgbt_relatedTRUE  -0.98152    0.05350 -18.346  < 2e-16 ***
## BI_non_christianTRUE -0.45982    0.09694  -4.744 2.10e-06 ***
## BI_menTRUE            0.23723    0.08484   2.796 0.005169 ** 
## BI_christianTRUE     -0.24784    0.07725  -3.208 0.001336 ** 
## BI_whiteTRUE         -0.43921    0.11806  -3.720 0.000199 ***
## BI_straightTRUE      -0.26761    0.06898  -3.879 0.000105 ***
## BI_disabilityTRUE    -0.05722    0.08742  -0.655 0.512741    
## BI_womenTRUE         -0.42788    0.04439  -9.639  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 29830  on 30886  degrees of freedom
## Residual deviance: 29215  on 30877  degrees of freedom
## AIC: 29235
## 
## Number of Fisher Scoring iterations: 4

# Anthropic ---------------------------------------------------------------------
anthropic_model <- glm(Anthropic_match ~ BI_non_white + BI_lgbt_related + 
                     BI_non_christian + BI_men + BI_christian + BI_white +
                     BI_straight + BI_disability + BI_women, family = binomial, data = gdf)
summary(anthropic_model)

## 
## Call:
## glm(formula = Anthropic_match ~ BI_non_white + BI_lgbt_related + 
##     BI_non_christian + BI_men + BI_christian + BI_white + BI_straight + 
##     BI_disability + BI_women, family = binomial, data = gdf)
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           1.39727    0.01604  87.131  < 2e-16 ***
## BI_non_whiteTRUE     -0.22801    0.06358  -3.586 0.000336 ***
## BI_lgbt_relatedTRUE  -0.80477    0.05298 -15.192  < 2e-16 ***
## BI_non_christianTRUE -0.20179    0.09764  -2.067 0.038771 *  
## BI_menTRUE           -0.05810    0.07675  -0.757 0.449054    
## BI_christianTRUE     -0.18345    0.07459  -2.459 0.013917 *  
## BI_whiteTRUE         -0.36530    0.11601  -3.149 0.001639 ** 
## BI_straightTRUE      -0.14235    0.06778  -2.100 0.035711 *  
## BI_disabilityTRUE    -0.19799    0.08126  -2.437 0.014829 *  
## BI_womenTRUE         -0.24842    0.04339  -5.725 1.03e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 32450  on 30886  degrees of freedom
## Residual deviance: 32085  on 30877  degrees of freedom
## AIC: 32105
## 
## Number of Fisher Scoring iterations: 4

# OctoAI ---------------------------------------------------------------------
octo_model <- glm(OctoAI_match ~ BI_non_white + BI_lgbt_related + 
                        BI_non_christian + BI_men + BI_christian + BI_white +
                        BI_straight + BI_disability + BI_women, family = binomial, data = gdf)
summary(octo_model)

## 
## Call:
## glm(formula = OctoAI_match ~ BI_non_white + BI_lgbt_related + 
##     BI_non_christian + BI_men + BI_christian + BI_white + BI_straight + 
##     BI_disability + BI_women, family = binomial, data = gdf)
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           1.21835    0.01526  79.833  < 2e-16 ***
## BI_non_whiteTRUE     -0.39404    0.05987  -6.582 4.66e-11 ***
## BI_lgbt_relatedTRUE  -0.88534    0.05199 -17.030  < 2e-16 ***
## BI_non_christianTRUE -0.53651    0.09058  -5.923 3.16e-09 ***
## BI_menTRUE            0.60960    0.08287   7.356 1.90e-13 ***
## BI_christianTRUE     -0.14368    0.07239  -1.985   0.0472 *  
## BI_whiteTRUE         -0.54104    0.11118  -4.866 1.14e-06 ***
## BI_straightTRUE       0.03101    0.06698   0.463   0.6433    
## BI_disabilityTRUE    -0.36459    0.07636  -4.774 1.80e-06 ***
## BI_womenTRUE         -0.45794    0.04088 -11.201  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 35103  on 30883  degrees of freedom
## Residual deviance: 34435  on 30874  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 34455
## 
## Number of Fisher Scoring iterations: 4

content-mod-analysis

DM

2024-08-06

Linear Models for Scoring APIs

Models for True/False APIs