Data from https://ncses.nsf.gov/pubs/nsf19301/data.
gendphd_0 <- read_csv('./gender_phd_data.csv')
## Parsed with column specification:
## cols(
## field_gend = col_character(),
## count_1987 = col_double(),
## perc_1987 = col_double(),
## count_1992 = col_double(),
## perc_1992 = col_double(),
## count_1997 = col_double(),
## perc_1997 = col_double(),
## count_2002 = col_double(),
## perc_2002 = col_double(),
## count_2007 = col_double(),
## perc_2007 = col_double(),
## count_2012 = col_double(),
## perc_2012 = col_double(),
## count_2017 = col_double(),
## perc_2017 = col_double()
## )
Create the field and gender stuff
field_gend <- str_split(gendphd_0$field_gend, pattern = "_") %>%
lapply(function(s) tibble(field = s[1], gend = s[2])) %>%
bind_rows()
gendphd_1 <- cbind(gendphd_0, field_gend)
gendphd_count_year <- gendphd_1 %>%
select(field, gend, starts_with('count')) %>%
gather(year,count, starts_with('count')) %>%
mutate(year = gsub('count_', '', year))
gendphd_perc_year <- gendphd_1 %>%
select(field, gend, starts_with('perc')) %>%
gather(year,perc, starts_with('perc')) %>%
mutate(year = gsub('perc_', '', year))
gendphd_2 <- merge(gendphd_count_year, gendphd_perc_year, by = c('field', 'gend', 'year'))
field_labels <- read_csv('./field_labels.csv')
## Parsed with column specification:
## cols(
## field = col_character(),
## label = col_character()
## )
gendphd_3 <- merge(gendphd_2, field_labels, by = 'field', all.x = T, all.y = F)
gendphd_4 <- gendphd_3 %>%
mutate(year_num = as.numeric(year)) %>%
filter(gend != 'both')
ggplot(gendphd_4,
aes(x = year_num, y = perc, group = gend, color = gend)) +
geom_line() +
facet_wrap(~ label)
l <- gendphd_4 %>%
filter(gend == 'female' & year == 2017) %>%
arrange(perc) %>%
{.[['field']]}
gendphd_4 <- gendphd_4 %>%
mutate(field = factor(field, levels = l)) %>%
mutate(label_wrap = str_wrap(label, width = 20))
ggplot(gendphd_4,
aes(x = year_num, y = perc, group = gend, color = gend)) +
geom_line() +
facet_wrap(~ field)
library(ggrepel)
# l <- gendphd_4 %>%
# filter(gend == 'female' & year == 2017) %>%
# arrange(perc) %>%
# {.[['label_wrap']]}
# gendphd_4 <- gendphd_4 %>%
# mutate(label_wrap = factor(label_wrap, levels = l))
# palette from https://colorcurves.app/
pal <- str_split("#f17f32, #e5cf34, #97ce42, #56b063, #5680a7, #6a3aba, #be20ca, #cf1182, #cb0b22", ",") %>% unlist %>% str_trim()
p1 <- ggplot() +
geom_hline(yintercept = 50, color = 'grey60', linetype = 3) +
geom_text(data = data.frame(year_num = sort(unique(gendphd_4$year_num)),y = 52),
aes(x = year_num, y = y, label = year_num),
family='Inconsolata', color = 'grey60') +
annotate(x = 1987, y = 48, geom = 'text', label = '50%', color = 'grey60') +
geom_line(data = gendphd_4 %>% filter(gend == 'female'),
aes(x = year_num, y = perc, group = label_wrap, color = label_wrap)) +
coord_cartesian(xlim = c(min(gendphd_4$year_num), max(gendphd_4$year_num) + 10)) +
geom_text_repel(data = gendphd_4 %>% filter(gend == 'female' & year_num == 2017),
size = 3, nudge_x = 0, segment.color = 'grey30',hjust = 0,
direction = 'y',family = 'Inconsolata',
aes(x = year_num, y = perc, label = label_wrap, color =label_wrap)) +
scale_color_manual('', values = pal) +
scale_x_continuous('', breaks = NULL) +
scale_y_continuous('', breaks = NULL) +
labs(title = 'Percentage of Female Doctorates Granted by Field',
subtitle = '1987-2017') +
theme_classic() +
theme(legend.position = 'none', text = element_text(family = 'Inconsolata'),
axis.line = element_blank(), axis.ticks = element_blank())
perc_1987 <- gendphd_1 %>%
filter(gend == 'female') %>%
select(field, perc_1987)
gendphd_5 <- gendphd_4 %>%
filter(gend == 'female') %>%
left_join(perc_1987, by = 'field') %>%
mutate(perc_change = perc - perc_1987)
## Warning: Column `field` joining factor and character vector, coercing into
## character vector
p2 <- ggplot() +
geom_text(data = data.frame(year_num = sort(unique(gendphd_5$year_num)),y = -2),
aes(x = year_num, y = y, label = year_num),
family='Inconsolata', color = 'grey60') +
geom_line(data = gendphd_5,
aes(x = year_num, y = perc_change, group = label_wrap, color = label_wrap)) +
coord_cartesian(xlim = c(min(gendphd_5$year_num), max(gendphd_5$year_num) + 10)) +
geom_text_repel(data = gendphd_5 %>% filter(year_num == 2017),
size = 3, nudge_x = 0, segment.color = 'grey30',hjust = 0,
direction = 'y',family = 'Inconsolata',
aes(x = year_num, y = perc_change, label = label_wrap, color =label_wrap)) +
scale_color_manual('', values = pal) +
scale_x_continuous('', breaks = NULL) +
scale_y_continuous('', labels = function(x) paste0('+', x, '%')) +
labs(title = 'Change in Percentage of Doctorates Granted to Women by Field',
subtitle = 'Changes relative to 1987 values',
caption = 'Data: https://ncses.nsf.gov/pubs/nsf19301/data, /u/aftersox') +
theme_classic() +
theme(legend.position = 'none', text = element_text(family = 'Inconsolata'),
axis.line = element_blank(), axis.ticks = element_blank(),
panel.grid.major.x = element_line(linetype = 3, color = 'grey30'))
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
grid.arrange(p1, p2, nrow = 1)
# install.packages("waffle", repos = "https://cinc.rud.is")
library(waffle)
ggplot(gendphd_4 %>%
filter(year == 2017)) +
geom_waffle(aes(fill = gend, values = perc),
make_proportional = T, color = 'white', size = 0.2) +
scale_fill_manual('', labels = c('Female', 'Male'),
values = c('#f2ac6a', '#6ab0f2')) +
facet_wrap(~ label_wrap) +
theme_void() +
theme(legend.position = 'top')
library(animation)
saveGIF({
for(yr in unique(sort(gendphd_4$year_num))) {
p <- gendphd_4 %>%
filter(year_num == yr) %>%
ggplot(aes(fill = gend, values = perc)) +
geom_waffle(make_proportional = T, color = 'white', size = 0.2) +
scale_fill_manual('', labels = c('Female', 'Male'), values = c('#f2ac6a', '#6ab0f2')) +
coord_equal() +
facet_wrap(~ label_wrap) +
labs(title = 'Doctorate Recipients by Field of Study and Gender',
subtitle = paste('Year: ', yr)) +
theme_void() +
theme(legend.position = 'top')
print(p)
}
}, movie.name = 'gendphd_1987_2017_waffle.gif')
## Output at: gendphd_1987_2017_waffle.gif
## [1] TRUE