For information about the data, click the link here. https://github.com/rfordatascience/tidytuesday/tree/master/data/2018/2018-10-16
library(tidyverse)
library(scales)
# Import data
recent_grads <- read.csv("~/R/Business Sat/DATA/recent_grads.csv") %>% as_tibble()
Hint: Use count.
recent_grads %>% count(Major_category)
## # A tibble: 16 x 2
## Major_category n
## <fct> <int>
## 1 Agriculture & Natural Resources 10
## 2 Arts 8
## 3 Biology & Life Science 14
## 4 Business 13
## 5 Communications & Journalism 4
## 6 Computers & Mathematics 11
## 7 Education 16
## 8 Engineering 29
## 9 Health 12
## 10 Humanities & Liberal Arts 15
## 11 Industrial Arts & Consumer Services 7
## 12 Interdisciplinary 1
## 13 Law & Public Policy 5
## 14 Physical Sciences 10
## 15 Psychology & Social Work 9
## 16 Social Science 9
Hint: Take recent_grads, pipe it to dplyr::arrange, and pipe it to dplyr::select.
recent_grads %>% ggplot(aes(ShareWomen, Median)) +
geom_point()
Hint: Add the third variable to the aes function by mapping Major_category to color.
recent_grads %>% ggplot(aes(ShareWomen, Median, color = Major_category)) +
geom_point()
Hint: Take recent_grads, pipe it to mutate(Major_category = fct_lump(Major_category, 4)), and pipe it to ggplot().
recent_grads %>% mutate(Major_category = fct_lump(Major_category, 4)) %>% ggplot(aes(ShareWomen, Median, color = Major_category)) + geom_point()
Hint: Add geom_smooth(aes(group=1), method = “lm”) to to the ggplot() code.
recent_grads %>%
mutate(Major_category = fct_lump(Major_category, 4)) %>%
ggplot(aes(ShareWomen, Median, color = Major_category)) +
geom_point() +
geom_smooth(aes(group = 1), method = "lm")
Hint: Add scale_x_continuous(labels = percent_format()) to to the ggplot() code.
recent_grads %>%
mutate(Major_category = fct_lump(Major_category, 4)) %>%
ggplot(aes(ShareWomen, Median, color = Major_category)) +
geom_point() +
geom_smooth(aes(group = 1), method = "lm") +
scale_x_continuous(labels = percent_format())
Hint: Add scale_y_continuous(labels = scales::dollar_format()) to to the ggplot() code.
recent_grads %>%
mutate(Major_category = fct_lump(Major_category, 4)) %>%
ggplot(aes(ShareWomen, Median, color = Major_category)) +
geom_point() +
geom_smooth(aes(group = 1), method = "lm") +
scale_x_continuous(labels = percent_format())+
scale_y_continuous(labels = scales::dollar_format())
Hint: Add expand_limits() to the ggplot() code.
recent_grads %>%
mutate(Major_category = fct_lump(Major_category, 4)) %>%
ggplot(aes(ShareWomen, Median, color = Major_category)) +
scale_x_continuous(labels = percent_format()) +
scale_y_continuous(labels = scales::dollar_format()) +
expand_limits(y=0) +
geom_point() +
geom_smooth(aes(group = 1), method = "lm")
Hint: Add the third variable to the aes function by mapping Major to label. Assign the result to g and, in the next two lines, type library(plotly) and then ggplotly(g).
g <-
recent_grads %>%
mutate(Major_category = fct_lump(Major_category, 4)) %>%
ggplot(aes(ShareWomen, Median, color = Major_category), label = Major) +
scale_x_continuous(labels = percent_format()) +
scale_y_continuous(labels = scales::dollar_format()) +
expand_limits(y=0) +
geom_point() +
geom_smooth(aes(group = 1), method = "lm")
library(plotly)
ggplotly(g)
Hint: Add the third variable to the aes function by mapping Sample_size to size.
g <-
recent_grads %>%
mutate(Major_category = fct_lump(Major_category, 4)) %>%
ggplot(aes(ShareWomen, Median, color = Major_category), label = Major, Sample_size = size) +
scale_x_continuous(labels = percent_format()) +
scale_y_continuous(labels = scales::dollar_format()) +
expand_limits(y=0) +
geom_point() +
geom_smooth(aes(group = 1), method = "lm")
library(plotly)
ggplotly(g)