Load the DSLabs (Data Science Labs)

library("dslabs")
data(package="dslabs")
list.files(system.file("script", package = "dslabs"))
##  [1] "make-admissions.R"                   
##  [2] "make-brca.R"                         
##  [3] "make-brexit_polls.R"                 
##  [4] "make-death_prob.R"                   
##  [5] "make-divorce_margarine.R"            
##  [6] "make-gapminder-rdas.R"               
##  [7] "make-greenhouse_gases.R"             
##  [8] "make-historic_co2.R"                 
##  [9] "make-mnist_27.R"                     
## [10] "make-movielens.R"                    
## [11] "make-murders-rda.R"                  
## [12] "make-na_example-rda.R"               
## [13] "make-nyc_regents_scores.R"           
## [14] "make-olive.R"                        
## [15] "make-outlier_example.R"              
## [16] "make-polls_2008.R"                   
## [17] "make-polls_us_election_2016.R"       
## [18] "make-reported_heights-rda.R"         
## [19] "make-research_funding_rates.R"       
## [20] "make-stars.R"                        
## [21] "make-temp_carbon.R"                  
## [22] "make-tissue-gene-expression.R"       
## [23] "make-trump_tweets.R"                 
## [24] "make-weekly_us_contagious_diseases.R"
## [25] "save-gapminder-example-csv.R"

Admissions datasets

data("admissions")
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.1
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggthemes)
library(ggrepel)
view(admissions)
write_csv(admissions, "admissions.csv", na="")

Look at the dimensions and structure

dim(admissions)
## [1] 12  4
str(admissions)
## 'data.frame':    12 obs. of  4 variables:
##  $ major     : chr  "A" "B" "C" "D" ...
##  $ gender    : chr  "men" "men" "men" "men" ...
##  $ admitted  : num  62 63 37 33 28 6 82 68 34 35 ...
##  $ applicants: num  825 560 325 417 191 373 108 25 593 375 ...

Load the rest of the libraries

library(RColorBrewer)
library(tidyverse)
library(dplyr)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)

Mutate

acceptance <- admissions %>% mutate(rejected= applicants-admitted)

Prepare chart

rate_Chart <- ggplot(acceptance, aes(x = major, y =admitted )) + 
  xlab("Majors") +
  ylab("Admission rate") +
  theme_minimal(base_size = 14) 
rate_Chart

## Plot 1

rate_Chart+
     geom_line(aes(color=gender)) +
     geom_point(aes(x = major, y = admitted, colour = factor(gender))) +
     ggtitle("UC Berkeley Grad Admissions", sub="Gender Bias") +
     labs(x="Majors", y="Number of admitted students") +
     scale_colour_wsj("colors6", "") + theme_wsj(color = "gray") + theme(axis.title=element_text(size=12))
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

Load libraries

library(highcharter)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## 
## Attaching package: 'highcharter'
## The following object is masked from 'package:dslabs':
## 
##     stars
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor

Prepare Chart

rate_Chart <- ggplot(acceptance, aes(x = major, y =rejected)) + 
  xlab("Majors") +
  ylab("Rejection Rate") +
  theme_minimal(base_size = 14) 
rate_Chart

Plot 2

highchart() %>%
  hc_add_series(data = acceptance,
                   type = "line",
                   hcaes(x = major,
                   y = rejected, 
                   group = gender)) %>%
  hc_xAxis(title = list(text="Major")) %>%
  hc_yAxis(title = list(text="Number of Rejected Students"))

Summary

I chose the admissions dataset because I wanted to see if there was any gender bias as it relates to the UC Berkeley admissions process. I wanted to see if there was a higher proportion of men were accepted than women, especially by their major. As I worked on the dataset, it would be great if the listed students’ majors/departments (i.e., masters of business, art, tech, science, etc.). I started this assignment by loading all libraries( ggplot, color brewer etc.), mutating the data, and plotting the line chart with the wall street journal theme to compare the gender bias by major/department. Based on the first plot, more males were admitted to the UC Berkeley masters program. In my second plot, I wanted to see how many students were rejects/ not admitted to UC Berkeley by gender.