Import your data

mydata <- read_excel("../01_module4/data/myData.xlsx")

Chapter 15

Create a factor

Modify factor order

Make two bar charts here - one before ordering another after

mydata %>%
    ggplot(aes(technical_winner)) +
    geom_bar()

techincalwins_by_avgage <- mydata %>%
    group_by(technical_winner) %>%
    summarise(avg_age = mean(age))
# Plot
techincalwins_by_avgage %>%
    ggplot(aes(x = avg_age, y = technical_winner)) +
    geom_point()

date_appeared_by_avgage <- mydata %>%
    group_by(first_date_appeared) %>%
    summarise(avg_age = mean(age))

date_appeared_by_avgage %>%
    ggplot(aes(x = avg_age, y = fct_reorder(.f = first_date_appeared, .x = avg_age))) +
    geom_point()

Modify factor levels

Show examples of three functions:

  • fct_recode
mydata %>%
    mutate(firstdate = fct_recode(first_date_appeared, "first_season" = "2010-08-17")) %>%
    select(first_date_appeared, firstdate) %>%
    filter(first_date_appeared == "2010-08-17")
## # A tibble: 10 × 2
##    first_date_appeared firstdate   
##    <chr>               <fct>       
##  1 2010-08-17          first_season
##  2 2010-08-17          first_season
##  3 2010-08-17          first_season
##  4 2010-08-17          first_season
##  5 2010-08-17          first_season
##  6 2010-08-17          first_season
##  7 2010-08-17          first_season
##  8 2010-08-17          first_season
##  9 2010-08-17          first_season
## 10 2010-08-17          first_season
  • fct_collapse
mydata %>%
    mutate(first_two_seasons = fct_collapse(first_date_appeared, "firstdays_in_twoyears" = c("2010-08-17", "2011-08-16"))) %>%
    select(first_date_appeared, first_two_seasons) %>%
    filter(first_date_appeared != "firstdays_in_twoyears")
## # A tibble: 120 × 2
##    first_date_appeared first_two_seasons    
##    <chr>               <fct>                
##  1 2010-08-17          firstdays_in_twoyears
##  2 2010-08-17          firstdays_in_twoyears
##  3 2010-08-17          firstdays_in_twoyears
##  4 2010-08-17          firstdays_in_twoyears
##  5 2010-08-17          firstdays_in_twoyears
##  6 2010-08-17          firstdays_in_twoyears
##  7 2010-08-17          firstdays_in_twoyears
##  8 2010-08-17          firstdays_in_twoyears
##  9 2010-08-17          firstdays_in_twoyears
## 10 2010-08-17          firstdays_in_twoyears
## # ℹ 110 more rows
  • fct_lump
gss_cat %>% mutate(race_lump = fct_lump(race)) %>% distinct(race_lump)
## # A tibble: 2 × 1
##   race_lump
##   <fct>    
## 1 White    
## 2 Other
mydata %>%
    mutate(firstday = fct_lump(first_date_appeared)) %>% distinct(firstday)
## # A tibble: 9 × 1
##   firstday  
##   <fct>     
## 1 Other     
## 2 2011-08-16
## 3 2012-08-14
## 4 2013-08-20
## 5 2014-08-06
## 6 2015-08-05
## 7 2016-08-24
## 8 2017-08-29
## 9 NA

Chapter 16

No need to do anything here.