Import your data

Gas_Prices <-read_csv("../00_data/Gas_Prices.csv")
## Rows: 22360 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): fuel, grade, formulation
## dbl  (1): price
## date (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Gas_Prices
## # A tibble: 22,360 × 5
##    date       fuel     grade   formulation  price
##    <date>     <chr>    <chr>   <chr>        <dbl>
##  1 1990-08-20 gasoline regular all           1.19
##  2 1990-08-20 gasoline regular conventional  1.19
##  3 1990-08-27 gasoline regular all           1.25
##  4 1990-08-27 gasoline regular conventional  1.25
##  5 1990-09-03 gasoline regular all           1.24
##  6 1990-09-03 gasoline regular conventional  1.24
##  7 1990-09-10 gasoline regular all           1.25
##  8 1990-09-10 gasoline regular conventional  1.25
##  9 1990-09-17 gasoline regular all           1.27
## 10 1990-09-17 gasoline regular conventional  1.27
## # ℹ 22,350 more rows

Chapter 15

Create a factor

Gas_Prices %>% count(grade)
## # A tibble: 6 × 2
##   grade                n
##   <chr>            <int>
## 1 all               6506
## 2 low_sulfur          96
## 3 midgrade          4788
## 4 premium           4788
## 5 regular           5222
## 6 ultra_low_sulfur   960
x <- factor(c("regular","ultra_low_sulfur","all","midgrade","premium","low_sulfur"))

grade_levels <- c("regular","ultra_low_sulfur","all")

Gas_Prices2 <- Gas_Prices %>%
    mutate(grade = grade %>% factor(levels = grade_levels))
Gas_Prices2
## # A tibble: 22,360 × 5
##    date       fuel     grade   formulation  price
##    <date>     <chr>    <fct>   <chr>        <dbl>
##  1 1990-08-20 gasoline regular all           1.19
##  2 1990-08-20 gasoline regular conventional  1.19
##  3 1990-08-27 gasoline regular all           1.25
##  4 1990-08-27 gasoline regular conventional  1.25
##  5 1990-09-03 gasoline regular all           1.24
##  6 1990-09-03 gasoline regular conventional  1.24
##  7 1990-09-10 gasoline regular all           1.25
##  8 1990-09-10 gasoline regular conventional  1.25
##  9 1990-09-17 gasoline regular all           1.27
## 10 1990-09-17 gasoline regular conventional  1.27
## # ℹ 22,350 more rows
x1 <- factor(x, levels = grade_levels)
x1
## [1] regular          ultra_low_sulfur all              <NA>            
## [5] <NA>             <NA>            
## Levels: regular ultra_low_sulfur all

Modify factor order

Make two bar charts here - one before ordering another after

Gas_Price_by_grade <- Gas_Prices %>%
    
    group_by(grade) %>%
    summarise(
        avg_gas_price = mean(price, na.rm = TRUE)
    )
Gas_Price_by_grade
## # A tibble: 6 × 2
##   grade            avg_gas_price
##   <chr>                    <dbl>
## 1 all                       2.50
## 2 low_sulfur                3.35
## 3 midgrade                  2.63
## 4 premium                   2.78
## 5 regular                   2.31
## 6 ultra_low_sulfur          3.37
ggplot(Gas_Price_by_grade, aes(avg_gas_price, grade)) + geom_point()

ggplot(Gas_Price_by_grade, aes(avg_gas_price, fct_reorder(grade, avg_gas_price))) + geom_point()

Modify factor levels

Show examples of three functions:

  • fct_recode
Gas_Prices %>%
    mutate(grade = fct_recode(grade,
    "ALL" = "all",
    "LOW_SULFUR" = "low_sulfur",
    "MIDGRADE" = "midgrade",
    "PREMIUM" = "premium",
    "REGULAR" = "regular",
    "ULTRA_LOW_SULFUR" = "ultra_low_sulfur")) %>%
    
    count(grade)
## # A tibble: 6 × 2
##   grade                n
##   <fct>            <int>
## 1 ALL               6506
## 2 LOW_SULFUR          96
## 3 MIDGRADE          4788
## 4 PREMIUM           4788
## 5 REGULAR           5222
## 6 ULTRA_LOW_SULFUR   960
  • fct_collapse
Gas_Prices %>%
    mutate(grade = fct_collapse(grade,
        PREMIUM = "premium",
        REGULAR= "regular",
        Other= c("all", "low_sulfur", "midgrade", "ultra_low_sulfur" ))) %>%
    count(grade)
## # A tibble: 3 × 2
##   grade       n
##   <fct>   <int>
## 1 Other   12350
## 2 PREMIUM  4788
## 3 REGULAR  5222
  • fct_lump
Gas_Prices %>%
    mutate(grade = fct_lump(grade, prop = 0.05)) %>%
    count(grade)
## # A tibble: 5 × 2
##   grade        n
##   <fct>    <int>
## 1 all       6506
## 2 midgrade  4788
## 3 premium   4788
## 4 regular   5222
## 5 Other     1056
Gas_Prices %>%
    mutate(grade = fct_lump(grade, prop = 0.08)) %>%
    count(grade)
## # A tibble: 5 × 2
##   grade        n
##   <fct>    <int>
## 1 all       6506
## 2 midgrade  4788
## 3 premium   4788
## 4 regular   5222
## 5 Other     1056
Gas_Prices %>%
    mutate(grade = fct_lump(grade, prop = 0.1)) %>%
    count(grade)
## # A tibble: 5 × 2
##   grade        n
##   <fct>    <int>
## 1 all       6506
## 2 midgrade  4788
## 3 premium   4788
## 4 regular   5222
## 5 Other     1056

Chapter 16

No need to do anything here.