df <- read_csv("../GPT-TV-Benchmark/Data/all_combined_forstats.csv",col_types = cols(col_factor(NULL)))
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
df$true_label <- as.logical(df$true_label)
df$GenAI <- as.logical(df$GenAI)
df$has_slur <- as.logical(df$has_slur)

df <- df %>%  mutate(OpenAI_match = OpenAI_flagged == true_label)

df$OctoAI_ME_bool <- as.logical(df$OctoAI_ME_bool)
df <- df %>%  mutate(OctoAI_match = OctoAI_ME_bool == true_label)

df$Anthropic_ME_bool <- as.logical(df$Anthropic_ME_bool)
df <- df %>%  mutate(Anthropic_match = Anthropic_ME_bool == true_label)

Linear Models for Scoring APIs

All Data

Perspective Score

## 
## Call:
## lm(formula = perspective_ME_score ~ GenAI + has_slur + BI_non_white + 
##     BI_lgbt_related + BI_non_christian + BI_men + BI_christian + 
##     BI_white + BI_straight + BI_disability + BI_women + word_length, 
##     data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.88743 -0.17612 -0.05085  0.12802  0.80291 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           2.194e-01  1.520e-03 144.378  < 2e-16 ***
## GenAITRUE            -6.199e-02  6.447e-03  -9.614  < 2e-16 ***
## has_slurTRUE          4.120e-01  4.342e-03  94.889  < 2e-16 ***
## BI_non_whiteTRUE      8.378e-03  5.535e-03   1.514   0.1301    
## BI_lgbt_relatedTRUE   6.374e-02  1.192e-02   5.349 8.91e-08 ***
## BI_non_christianTRUE  1.375e-01  7.754e-03  17.737  < 2e-16 ***
## BI_menTRUE            5.166e-02  2.905e-03  17.785  < 2e-16 ***
## BI_christianTRUE     -1.142e-02  1.151e-02  -0.992   0.3213    
## BI_whiteTRUE          3.973e-02  1.003e-02   3.963 7.43e-05 ***
## BI_straightTRUE      -1.532e-01  7.387e-02  -2.073   0.0381 *  
## BI_disabilityTRUE     4.063e-02  1.923e-02   2.113   0.0346 *  
## BI_womenTRUE          1.456e-01  3.078e-03  47.323  < 2e-16 ***
## word_length          -2.078e-04  3.828e-05  -5.427 5.75e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2206 on 39349 degrees of freedom
##   (535384 observations deleted due to missingness)
## Multiple R-squared:  0.3708, Adjusted R-squared:  0.3706 
## F-statistic:  1932 on 12 and 39349 DF,  p-value: < 2.2e-16

Google

## 
## Call:
## lm(formula = Google_cat_max ~ GenAI + has_slur + BI_non_white + 
##     BI_lgbt_related + BI_non_christian + BI_men + BI_christian + 
##     BI_white + BI_straight + BI_disability + BI_women + word_length, 
##     data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.22747 -0.27349  0.05029  0.26365  0.53766 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           5.189e-01  2.109e-03 246.055  < 2e-16 ***
## GenAITRUE            -6.212e-02  9.003e-03  -6.900 5.29e-12 ***
## has_slurTRUE          2.452e-01  6.078e-03  40.348  < 2e-16 ***
## BI_non_whiteTRUE     -3.951e-03  7.744e-03  -0.510   0.6099    
## BI_lgbt_relatedTRUE  -3.779e-02  1.654e-02  -2.285   0.0223 *  
## BI_non_christianTRUE  3.471e-01  1.086e-02  31.967  < 2e-16 ***
## BI_menTRUE            1.885e-02  4.063e-03   4.640 3.49e-06 ***
## BI_christianTRUE      2.100e-01  1.610e-02  13.042  < 2e-16 ***
## BI_whiteTRUE         -2.384e-02  1.404e-02  -1.698   0.0895 .  
## BI_straightTRUE      -2.617e-01  1.035e-01  -2.529   0.0114 *  
## BI_disabilityTRUE     1.215e-01  2.694e-02   4.509 6.52e-06 ***
## BI_womenTRUE          6.390e-02  4.306e-03  14.839  < 2e-16 ***
## word_length           7.946e-04  5.279e-05  15.052  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.309 on 39673 degrees of freedom
##   (535060 observations deleted due to missingness)
## Multiple R-squared:  0.1151, Adjusted R-squared:  0.1149 
## F-statistic: 430.1 on 12 and 39673 DF,  p-value: < 2.2e-16

OpenAI Normalized Max

## 
## Call:
## lm(formula = OpenAI_normalized_max ~ GenAI + has_slur + BI_non_white + 
##     BI_lgbt_related + BI_non_christian + BI_men + BI_christian + 
##     BI_white + BI_straight + BI_disability + BI_women + word_length, 
##     data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5262 -0.4959 -0.4218  0.5016  8.5042 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           0.4777181  0.0055743  85.699  < 2e-16 ***
## GenAITRUE            -0.0451031  0.0237970  -1.895  0.05806 .  
## has_slurTRUE          0.8136988  0.0160660  50.647  < 2e-16 ***
## BI_non_whiteTRUE      0.3771278  0.0204687  18.425  < 2e-16 ***
## BI_lgbt_relatedTRUE   0.4055143  0.0437163   9.276  < 2e-16 ***
## BI_non_christianTRUE  0.9309609  0.0287029  32.434  < 2e-16 ***
## BI_menTRUE            0.0850082  0.0107399   7.915 2.53e-15 ***
## BI_christianTRUE      0.1265982  0.0425662   2.974  0.00294 ** 
## BI_whiteTRUE          0.4556494  0.0371221  12.274  < 2e-16 ***
## BI_straightTRUE      -0.4707211  0.2734726  -1.721  0.08521 .  
## BI_disabilityTRUE     0.0253205  0.0712038   0.356  0.72214    
## BI_womenTRUE          0.2618726  0.0113824  23.007  < 2e-16 ***
## word_length           0.0013185  0.0001395   9.449  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8169 on 39673 degrees of freedom
##   (535060 observations deleted due to missingness)
## Multiple R-squared:  0.1899, Adjusted R-squared:  0.1897 
## F-statistic: 775.2 on 12 and 39673 DF,  p-value: < 2.2e-16

Only True Negatives

Perspective Score

## 
## Call:
## lm(formula = perspective_ME_score ~ GenAI + has_slur + BI_non_white + 
##     BI_lgbt_related + BI_non_christian + BI_men + BI_christian + 
##     BI_white + BI_straight + BI_disability + BI_women + word_length, 
##     data = df %>% filter(true_label == FALSE))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.87905 -0.12592 -0.05686  0.09572  0.81369 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           1.523e-01  1.520e-03 100.253  < 2e-16 ***
## GenAITRUE            -6.481e-02  6.410e-03 -10.110  < 2e-16 ***
## has_slurTRUE          4.555e-01  5.347e-03  85.185  < 2e-16 ***
## BI_non_whiteTRUE      3.923e-02  6.515e-03   6.022 1.75e-09 ***
## BI_lgbt_relatedTRUE   6.436e-02  1.416e-02   4.546 5.50e-06 ***
## BI_non_christianTRUE  1.263e-01  9.153e-03  13.804  < 2e-16 ***
## BI_menTRUE            5.085e-02  2.981e-03  17.061  < 2e-16 ***
## BI_christianTRUE      1.925e-04  1.132e-02   0.017  0.98644    
## BI_whiteTRUE          5.591e-02  1.059e-02   5.281 1.29e-07 ***
## BI_straightTRUE      -9.721e-02  6.985e-02  -1.392  0.16399    
## BI_disabilityTRUE     5.185e-02  1.948e-02   2.662  0.00777 ** 
## BI_womenTRUE          1.136e-01  3.212e-03  35.378  < 2e-16 ***
## word_length          -3.917e-05  4.406e-05  -0.889  0.37402    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.184 on 27545 degrees of freedom
##   (448263 observations deleted due to missingness)
## Multiple R-squared:  0.3535, Adjusted R-squared:  0.3533 
## F-statistic:  1255 on 12 and 27545 DF,  p-value: < 2.2e-16

Google

## 
## Call:
## lm(formula = Google_cat_max ~ GenAI + has_slur + BI_non_white + 
##     BI_lgbt_related + BI_non_christian + BI_men + BI_christian + 
##     BI_white + BI_straight + BI_disability + BI_women + word_length, 
##     data = df %>% filter(true_label == FALSE))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.64885 -0.33263  0.01172  0.29493  0.61236 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           4.542e-01  2.673e-03 169.928  < 2e-16 ***
## GenAITRUE            -7.605e-02  1.139e-02  -6.680 2.44e-11 ***
## has_slurTRUE          2.802e-01  9.513e-03  29.458  < 2e-16 ***
## BI_non_whiteTRUE      2.365e-02  1.158e-02   2.042 0.041131 *  
## BI_lgbt_relatedTRUE  -4.083e-02  2.494e-02  -1.638 0.101515    
## BI_non_christianTRUE  3.937e-01  1.630e-02  24.147  < 2e-16 ***
## BI_menTRUE            1.835e-02  5.302e-03   3.460 0.000541 ***
## BI_christianTRUE      2.579e-01  2.014e-02  12.809  < 2e-16 ***
## BI_whiteTRUE         -1.924e-02  1.886e-02  -1.020 0.307724    
## BI_straightTRUE      -2.661e-01  1.244e-01  -2.138 0.032486 *  
## BI_disabilityTRUE     1.404e-01  3.469e-02   4.048 5.18e-05 ***
## BI_womenTRUE          4.400e-02  5.716e-03   7.697 1.44e-14 ***
## word_length           1.349e-03  7.706e-05  17.509  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3278 on 27853 degrees of freedom
##   (447955 observations deleted due to missingness)
## Multiple R-squared:  0.0912, Adjusted R-squared:  0.09081 
## F-statistic: 232.9 on 12 and 27853 DF,  p-value: < 2.2e-16

OpenAI Normalized Max

## 
## Call:
## lm(formula = OpenAI_normalized_max ~ GenAI + has_slur + BI_non_white + 
##     BI_lgbt_related + BI_non_christian + BI_men + BI_christian + 
##     BI_white + BI_straight + BI_disability + BI_women + word_length, 
##     data = df %>% filter(true_label == FALSE))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1275 -0.2953 -0.2692  0.0652  8.5436 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           0.267803   0.005202  51.476  < 2e-16 ***
## GenAITRUE            -0.218322   0.022158  -9.853  < 2e-16 ***
## has_slurTRUE          0.777339   0.018515  41.985  < 2e-16 ***
## BI_non_whiteTRUE      0.380660   0.022541  16.887  < 2e-16 ***
## BI_lgbt_relatedTRUE   0.365973   0.048528   7.541 4.79e-14 ***
## BI_non_christianTRUE  0.750490   0.031731  23.651  < 2e-16 ***
## BI_menTRUE            0.134911   0.010319  13.074  < 2e-16 ***
## BI_christianTRUE      0.217503   0.039187   5.550 2.88e-08 ***
## BI_whiteTRUE          0.447042   0.036707  12.179  < 2e-16 ***
## BI_straightTRUE      -0.063167   0.242143  -0.261   0.7942    
## BI_disabilityTRUE     0.117101   0.067507   1.735   0.0828 .  
## BI_womenTRUE          0.166118   0.011124  14.933  < 2e-16 ***
## word_length           0.001334   0.000150   8.891  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.638 on 27853 degrees of freedom
##   (447955 observations deleted due to missingness)
## Multiple R-squared:  0.1659, Adjusted R-squared:  0.1656 
## F-statistic: 461.8 on 12 and 27853 DF,  p-value: < 2.2e-16

Models for True/False APIs

OpenAI

## 
## Call:
## glm(formula = OpenAI_match ~ GenAI + has_slur + BI_non_white + 
##     BI_lgbt_related + BI_non_christian + BI_men + BI_christian + 
##     BI_white + BI_straight + BI_disability + BI_women + word_length, 
##     family = binomial, data = df)
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           1.4147551  0.0167966  84.229  < 2e-16 ***
## GenAITRUE             1.0376707  0.0888792  11.675  < 2e-16 ***
## has_slurTRUE         -0.2287393  0.0434134  -5.269 1.37e-07 ***
## BI_non_whiteTRUE     -0.2367614  0.0556996  -4.251 2.13e-05 ***
## BI_lgbt_relatedTRUE  -0.3301771  0.1183575  -2.790 0.005276 ** 
## BI_non_christianTRUE -0.5873620  0.0752834  -7.802 6.09e-15 ***
## BI_menTRUE           -0.3731387  0.0296715 -12.576  < 2e-16 ***
## BI_christianTRUE     -0.4239418  0.1128523  -3.757 0.000172 ***
## BI_whiteTRUE         -0.5149442  0.0971688  -5.299 1.16e-07 ***
## BI_straightTRUE      -0.9627597  0.6988327  -1.378 0.168306    
## BI_disabilityTRUE    -0.5115144  0.1902752  -2.688 0.007182 ** 
## BI_womenTRUE         -0.2203011  0.0320287  -6.878 6.06e-12 ***
## word_length          -0.0017783  0.0004003  -4.442 8.90e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 42809  on 39685  degrees of freedom
## Residual deviance: 42132  on 39673  degrees of freedom
##   (535060 observations deleted due to missingness)
## AIC: 42158
## 
## Number of Fisher Scoring iterations: 4

Anthropic

## 
## Call:
## glm(formula = Anthropic_match ~ GenAI + has_slur + BI_non_white + 
##     BI_lgbt_related + BI_non_christian + BI_men + BI_christian + 
##     BI_white + BI_straight + BI_disability + BI_women + word_length, 
##     family = binomial, data = df)
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           0.7762361  0.0146649  52.932  < 2e-16 ***
## GenAITRUE             0.8826914  0.0719698  12.265  < 2e-16 ***
## has_slurTRUE          0.1886597  0.0418579   4.507 6.57e-06 ***
## BI_non_whiteTRUE     -0.1374392  0.0527316  -2.606 0.009150 ** 
## BI_lgbt_relatedTRUE  -0.3730108  0.1109975  -3.361 0.000778 ***
## BI_non_christianTRUE -0.3813624  0.0717895  -5.312 1.08e-07 ***
## BI_menTRUE           -0.3018103  0.0273139 -11.050  < 2e-16 ***
## BI_christianTRUE     -0.2836708  0.1065799  -2.662 0.007777 ** 
## BI_whiteTRUE         -0.6700231  0.0917708  -7.301 2.86e-13 ***
## BI_straightTRUE       0.5496111  0.8155020   0.674 0.500340    
## BI_disabilityTRUE    -0.1788775  0.1829405  -0.978 0.328178    
## BI_womenTRUE         -0.0507696  0.0293848  -1.728 0.084033 .  
## word_length          -0.0018322  0.0003679  -4.981 6.34e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 50647  on 39685  degrees of freedom
## Residual deviance: 50245  on 39673  degrees of freedom
##   (535060 observations deleted due to missingness)
## AIC: 50271
## 
## Number of Fisher Scoring iterations: 4

OctoAI

## 
## Call:
## glm(formula = OctoAI_match ~ GenAI + has_slur + BI_non_white + 
##     BI_lgbt_related + BI_non_christian + BI_men + BI_christian + 
##     BI_white + BI_straight + BI_disability + BI_women + word_length, 
##     family = binomial, data = df)
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           0.9350778  0.0153807  60.795  < 2e-16 ***
## GenAITRUE             1.1638596  0.0829419  14.032  < 2e-16 ***
## has_slurTRUE          0.0510761  0.0423905   1.205 0.228244    
## BI_non_whiteTRUE     -0.2016011  0.0535431  -3.765 0.000166 ***
## BI_lgbt_relatedTRUE  -0.4261696  0.1139660  -3.739 0.000184 ***
## BI_non_christianTRUE -0.3429256  0.0738242  -4.645 3.40e-06 ***
## BI_menTRUE           -0.3005942  0.0280004 -10.735  < 2e-16 ***
## BI_christianTRUE     -0.1793614  0.1107880  -1.619 0.105456    
## BI_whiteTRUE         -0.7281355  0.0923259  -7.887 3.11e-15 ***
## BI_straightTRUE      -0.3318021  0.7367084  -0.450 0.652433    
## BI_disabilityTRUE    -0.3369321  0.1870525  -1.801 0.071660 .  
## BI_womenTRUE         -0.0744462  0.0302018  -2.465 0.013703 *  
## word_length          -0.0008184  0.0004024  -2.034 0.041984 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 48454  on 39685  degrees of freedom
## Residual deviance: 47964  on 39673  degrees of freedom
##   (535060 observations deleted due to missingness)
## AIC: 47990
## 
## Number of Fisher Scoring iterations: 4