mydata <- read_excel("../01_module4/data/myData.xlsx")
Make two bar charts here - one before ordering another after
mydata %>%
ggplot(aes(technical_winner)) +
geom_bar()
techincalwins_by_avgage <- mydata %>%
group_by(technical_winner) %>%
summarise(avg_age = mean(age))
# Plot
techincalwins_by_avgage %>%
ggplot(aes(x = avg_age, y = technical_winner)) +
geom_point()
date_appeared_by_avgage <- mydata %>%
group_by(first_date_appeared) %>%
summarise(avg_age = mean(age))
date_appeared_by_avgage %>%
ggplot(aes(x = avg_age, y = fct_reorder(.f = first_date_appeared, .x = avg_age))) +
geom_point()
Show examples of three functions:
mydata %>%
mutate(firstdate = fct_recode(first_date_appeared, "first_season" = "2010-08-17")) %>%
select(first_date_appeared, firstdate) %>%
filter(first_date_appeared == "2010-08-17")
## # A tibble: 10 × 2
## first_date_appeared firstdate
## <chr> <fct>
## 1 2010-08-17 first_season
## 2 2010-08-17 first_season
## 3 2010-08-17 first_season
## 4 2010-08-17 first_season
## 5 2010-08-17 first_season
## 6 2010-08-17 first_season
## 7 2010-08-17 first_season
## 8 2010-08-17 first_season
## 9 2010-08-17 first_season
## 10 2010-08-17 first_season
mydata %>%
mutate(first_two_seasons = fct_collapse(first_date_appeared, "firstdays_in_twoyears" = c("2010-08-17", "2011-08-16"))) %>%
select(first_date_appeared, first_two_seasons) %>%
filter(first_date_appeared != "firstdays_in_twoyears")
## # A tibble: 120 × 2
## first_date_appeared first_two_seasons
## <chr> <fct>
## 1 2010-08-17 firstdays_in_twoyears
## 2 2010-08-17 firstdays_in_twoyears
## 3 2010-08-17 firstdays_in_twoyears
## 4 2010-08-17 firstdays_in_twoyears
## 5 2010-08-17 firstdays_in_twoyears
## 6 2010-08-17 firstdays_in_twoyears
## 7 2010-08-17 firstdays_in_twoyears
## 8 2010-08-17 firstdays_in_twoyears
## 9 2010-08-17 firstdays_in_twoyears
## 10 2010-08-17 firstdays_in_twoyears
## # ℹ 110 more rows
gss_cat %>% mutate(race_lump = fct_lump(race)) %>% distinct(race_lump)
## # A tibble: 2 × 1
## race_lump
## <fct>
## 1 White
## 2 Other
mydata %>%
mutate(firstday = fct_lump(first_date_appeared)) %>% distinct(firstday)
## # A tibble: 9 × 1
## firstday
## <fct>
## 1 Other
## 2 2011-08-16
## 3 2012-08-14
## 4 2013-08-20
## 5 2014-08-06
## 6 2015-08-05
## 7 2016-08-24
## 8 2017-08-29
## 9 NA
No need to do anything here.