library("dslabs")
data(package="dslabs")
list.files(system.file("script", package = "dslabs"))
## [1] "make-admissions.R"
## [2] "make-brca.R"
## [3] "make-brexit_polls.R"
## [4] "make-death_prob.R"
## [5] "make-divorce_margarine.R"
## [6] "make-gapminder-rdas.R"
## [7] "make-greenhouse_gases.R"
## [8] "make-historic_co2.R"
## [9] "make-mnist_27.R"
## [10] "make-movielens.R"
## [11] "make-murders-rda.R"
## [12] "make-na_example-rda.R"
## [13] "make-nyc_regents_scores.R"
## [14] "make-olive.R"
## [15] "make-outlier_example.R"
## [16] "make-polls_2008.R"
## [17] "make-polls_us_election_2016.R"
## [18] "make-reported_heights-rda.R"
## [19] "make-research_funding_rates.R"
## [20] "make-stars.R"
## [21] "make-temp_carbon.R"
## [22] "make-tissue-gene-expression.R"
## [23] "make-trump_tweets.R"
## [24] "make-weekly_us_contagious_diseases.R"
## [25] "save-gapminder-example-csv.R"
data("admissions")
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.1
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggthemes)
library(ggrepel)
view(admissions)
write_csv(admissions, "admissions.csv", na="")
dim(admissions)
## [1] 12 4
str(admissions)
## 'data.frame': 12 obs. of 4 variables:
## $ major : chr "A" "B" "C" "D" ...
## $ gender : chr "men" "men" "men" "men" ...
## $ admitted : num 62 63 37 33 28 6 82 68 34 35 ...
## $ applicants: num 825 560 325 417 191 373 108 25 593 375 ...
library(RColorBrewer)
library(tidyverse)
library(dplyr)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
acceptance <- admissions %>% mutate(rejected= applicants-admitted)
rate_Chart <- ggplot(acceptance, aes(x = major, y =admitted )) +
xlab("Majors") +
ylab("Admission rate") +
theme_minimal(base_size = 14)
rate_Chart
## Plot 1
rate_Chart+
geom_line(aes(color=gender)) +
geom_point(aes(x = major, y = admitted, colour = factor(gender))) +
ggtitle("UC Berkeley Grad Admissions", sub="Gender Bias") +
labs(x="Majors", y="Number of admitted students") +
scale_colour_wsj("colors6", "") + theme_wsj(color = "gray") + theme(axis.title=element_text(size=12))
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'highcharter'
## The following object is masked from 'package:dslabs':
##
## stars
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
rate_Chart <- ggplot(acceptance, aes(x = major, y =rejected)) +
xlab("Majors") +
ylab("Rejection Rate") +
theme_minimal(base_size = 14)
rate_Chart
highchart() %>%
hc_add_series(data = acceptance,
type = "line",
hcaes(x = major,
y = rejected,
group = gender)) %>%
hc_xAxis(title = list(text="Major")) %>%
hc_yAxis(title = list(text="Number of Rejected Students"))
I chose the admissions dataset because I wanted to see if there was any gender bias as it relates to the UC Berkeley admissions process. I wanted to see if there was a higher proportion of men were accepted than women, especially by their major. As I worked on the dataset, it would be great if the listed students’ majors/departments (i.e., masters of business, art, tech, science, etc.). I started this assignment by loading all libraries( ggplot, color brewer etc.), mutating the data, and plotting the line chart with the wall street journal theme to compare the gender bias by major/department. Based on the first plot, more males were admitted to the UC Berkeley masters program. In my second plot, I wanted to see how many students were rejects/ not admitted to UC Berkeley by gender.