Homework 4

Gender bias among graduate school admissions to UC Berkeley.

(Worth up to 20 points) Take any of the datasets included in “dslabs” and create a new multivariable graph. You may use any of the datasests including those used in the notes examples, as long as you change something meaningful about the graph. You must include the following in your graph:

• Comments describing all chunks of code • Meaningful labels for x- and y-axes • Meaningful title • A theme for the graph (you must change the generic ggplot style) • Colors for a third variable, with a legend

# set the environment or perpare the apporprate libraries

library("dslabs")
data(package="dslabs")
list.files(system.file("script", package = "dslabs"))
##  [1] "make-admissions.R"                   
##  [2] "make-brca.R"                         
##  [3] "make-brexit_polls.R"                 
##  [4] "make-death_prob.R"                   
##  [5] "make-divorce_margarine.R"            
##  [6] "make-gapminder-rdas.R"               
##  [7] "make-greenhouse_gases.R"             
##  [8] "make-historic_co2.R"                 
##  [9] "make-mnist_27.R"                     
## [10] "make-movielens.R"                    
## [11] "make-murders-rda.R"                  
## [12] "make-na_example-rda.R"               
## [13] "make-nyc_regents_scores.R"           
## [14] "make-olive.R"                        
## [15] "make-outlier_example.R"              
## [16] "make-polls_2008.R"                   
## [17] "make-polls_us_election_2016.R"       
## [18] "make-reported_heights-rda.R"         
## [19] "make-research_funding_rates.R"       
## [20] "make-stars.R"                        
## [21] "make-temp_carbon.R"                  
## [22] "make-tissue-gene-expression.R"       
## [23] "make-trump_tweets.R"                 
## [24] "make-weekly_us_contagious_diseases.R"
## [25] "save-gapminder-example-csv.R"

Gender bias among graduate school admissions to UC Berkeley.

## set the environment or perpare the apporprate libraries
data("admissions")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.2     ✓ dplyr   1.0.6
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggthemes)
library(ggrepel)
str(admissions)
## 'data.frame':    12 obs. of  4 variables:
##  $ major     : chr  "A" "B" "C" "D" ...
##  $ gender    : chr  "men" "men" "men" "men" ...
##  $ admitted  : num  62 63 37 33 28 6 82 68 34 35 ...
##  $ applicants: num  825 560 325 417 191 373 108 25 593 375 ...
# calculate the average rate for male from the entire dataset
f <- admissions %>% filter(gender == "men")%>%
  summarize(rate = sum(admitted) /  sum(applicants)) %>% 
  pull(rate)
# calculate the average rate for female from the entire dataset
m = admissions %>% filter(gender == "women")%>%
  summarize(rate = sum(admitted) /  sum(applicants)) %>% 
  pull(rate)
# output the graph and set the abline line red for female, blue for male
ds_theme_set()
p = admissions %>% 
  ggplot(aes(x = applicants, y = admitted, label = major)) +
  geom_abline(intercept = log10(f), lty=2, col="red")+
  geom_abline(intercept = log10(m), lty=2, col="blue")+
  geom_point(aes(color=gender), size = 3) +
  geom_text_repel(nudge_x = 0.005) +
  scale_x_log10("Applicant (log scale)") +
  scale_y_log10("admitted per major (log scale)") +
  ggtitle("Gender bias among graduate school admissions to UC Berkeley") +
  scale_color_discrete(name="Region") + theme(plot.title = element_text(hjust = 0.5))+
  # Remove legend title
  theme(legend.title=element_blank())
p

# only compare for the major A and major B
admissions %>% 
  filter(major%in%c("A", "B")) %>%
  mutate(rate = admitted/applicants) %>%
  ggplot( aes(applicants, admitted, col = gender, size = rate)) +
  geom_point(alpha = 0.8) +
  guides(size=FALSE) +
  theme(plot.title = element_blank(), legend.title = element_blank()) +
  coord_cartesian(ylim = c(40, 90)) +
  xlab("Applicant") +
  ylab("admitted") +
  geom_text(aes(x=200, y=82, label=major), cex=12, color="grey") +
  facet_wrap(. ~ major) +
  theme(strip.background = element_blank(),
        strip.text.x = element_blank(),
        strip.text.y = element_blank(),
   legend.position = "top")
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.