Init

library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +

load_packages(
  googlesheets4,
  rms
)

Data

gs4_deauth()
stereotypes = read_sheet("https://docs.google.com/spreadsheets/d/116buPqHRbwetijsO_1vHyRBOEs7WuS9c2D-l-cu0HNM/edit#gid=0", range = "A51:F68")

## ✔ Reading from "1960s surveys academics stereotypes fields".

## ✔ Range 'A51:F68'.

survey_politics = read_sheet("https://docs.google.com/spreadsheets/d/116buPqHRbwetijsO_1vHyRBOEs7WuS9c2D-l-cu0HNM/edit#gid=0", range = "A5:D23") %>% set_colnames(c("Field", "Liberal", "Campus_activisim", "Black_support"))

## ✔ Reading from "1960s surveys academics stereotypes fields".

## ✔ Range 'A5:D23'.

jews = read_sheet("https://docs.google.com/spreadsheets/d/116buPqHRbwetijsO_1vHyRBOEs7WuS9c2D-l-cu0HNM/edit#gid=0", range = "A30:E47") %>% set_colnames(c("Field", "father_college", "father_occu_manual", "father_occu_high", "jewish"))

## ✔ Reading from "1960s surveys academics stereotypes fields".

## ✔ Range 'A30:E47'.

Analysis

#join stereotypes and survey
d = full_join(
  stereotypes %>% df_add_affix(prefix = "stereotype_") %>% rename(Field = stereotype_Field),
  survey_politics,
  by = "Field"
)

#fix data
d %<>% 
  mutate(
    stereotyped_as_left = stereotype_Left + stereotype_Liberal
  ) %>% 
  filter(
    #remove aggregates
    !Field %in% c("All social sciences", "All fields")
  )

d %>% 
  GG_scatter("Liberal", "stereotyped_as_left", case_names = "Field", repel_names = T) +
  xlab("Liberal politics % of professors (1960s)") +
  ylab("Students' perceptions of % of professors who are left or liberal") +
  ggtitle("Stereotypes of professors' politics vs. actual politics (survey data, 1960s)") +
  geom_abline(slope = 1, linetype = "dotted")

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/stereotype_accuracy.png")

## `geom_smooth()` using formula = 'y ~ x'

#model
ols(Liberal ~ stereotyped_as_left, data = d)

## Frequencies of Missing Values Due to Each Variable
##             Liberal stereotyped_as_left 
##                   0                   1 
## 
## Linear Regression Model
## 
## ols(formula = Liberal ~ stereotyped_as_left, data = d)
## 
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs      15    LR chi2     49.87    R2       0.964    
## sigma3.7305    d.f.            1    R2 adj   0.961    
## d.f.     13    Pr(> chi2) 0.0000    g       21.928    
## 
## Residuals
## 
##     Min      1Q  Median      3Q     Max 
## -5.8961 -2.7922  0.9195  2.5831  5.1039 
## 
## 
##                     Coef     S.E.   t     Pr(>|t|)
## Intercept           -10.7144 3.1986 -3.35 0.0052  
## stereotyped_as_left   1.2935 0.0693 18.66 <0.0001

#jewish vs. politics
d2 = full_join(
  survey_politics,
  jews,
  by = "Field"
)

#fix
d2 %<>% 
  filter(
    #remove aggregates
    !Field %in% c("All social sciences", "All fields")
  )

GG_scatter(d2, "jewish", "Liberal", case_names = "Field", repel_names = T)

## `geom_smooth()` using formula = 'y ~ x'

Sociologist surveys

Init

Data

Analysis