Import your data

powerrangers <- read_csv("../00_data/PowerRangers.csv")
## Rows: 922 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): season, title, air.date, description
## dbl (3): episode, imdb.rating, total.votes
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Chapter 15

Create a factor

powerrangers %>% count(season)
## # A tibble: 28 × 2
##    season                                  n
##    <chr>                               <int>
##  1 "Be the first one to add a plot.\""     1
##  2 "Beast Morphers (Season 1)"            22
##  3 "Beast Morphers (Season 2)"            22
##  4 "Dino Charge"                          22
##  5 "Dino Super Charge"                    22
##  6 "Dino Thunder"                         38
##  7 "In Space"                             43
##  8 "Jungle Fury"                          32
##  9 "Lightspeed Rescue"                    40
## 10 "Lost Galaxy"                          45
## # ℹ 18 more rows
season_levels<- c("Beast Morphers (Season 2)", "    
Ninja Steel", "Dino Super Charge", "Dino Charge", " 
Super Megaforce", "Megaforce", "    
Super Samurai", "Samurai", "R.P.M.", "Jungle Fury
", "Operation Overdrive","Mystic Force","S.P.D.","Dino Thunder","Ninja Storm","Wild Force","Time Force","Lightspeed Rescue","Lost Galaxy","In Space","Turbo","Zeo","Mighty Morphin (Season 3)","Mighty Morphin (Season 1)")
powerrangers_rev <- powerrangers %>%
    mutate(season = season %>% factor(levels = season_levels))

Modify factor order

Make two bar charts here - one before ordering another after

powerrangers_summary <- powerrangers %>%
    group_by(season)%>%
    summarise(
        imdb.rating = mean(imdb.rating, na.rm = TRUE)
    )

powerrangers_summary
## # A tibble: 28 × 2
##    season                              imdb.rating
##    <chr>                                     <dbl>
##  1 "Be the first one to add a plot.\""      NaN   
##  2 "Beast Morphers (Season 1)"                8.53
##  3 "Beast Morphers (Season 2)"                8.84
##  4 "Dino Charge"                              8.33
##  5 "Dino Super Charge"                        7.59
##  6 "Dino Thunder"                             7.31
##  7 "In Space"                                 8.15
##  8 "Jungle Fury"                              7.49
##  9 "Lightspeed Rescue"                        8.09
## 10 "Lost Galaxy"                              7.99
## # ℹ 18 more rows
ggplot(powerrangers_summary, aes(imdb.rating, season)) + geom_point()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(powerrangers_summary, aes(imdb.rating, fct_reorder(as.factor(imdb.rating), season))) + geom_point()
## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument is not numeric or logical: returning NA
## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

Modify factor levels

Show examples of three functions:

  • fct_recode
powerrangers <- powerrangers %>%
  mutate(season = fct_recode(season,
                             "MMPR Season 1" = "Mighty Morphin Power Rangers",
                             "Zeo" = "Power Rangers Zeo",
                             "Turbo" = "Power Rangers Turbo")) 
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `season = fct_recode(...)`.
## Caused by warning:
## ! Unknown levels in `f`: Mighty Morphin Power Rangers, Power Rangers Zeo, Power Rangers Turbo
  • fct_collapse

  • fct_lump

Chapter 16

No need to do anything here.