Gas_Prices <-read_csv("../00_data/Gas_Prices.csv")
## Rows: 22360 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): fuel, grade, formulation
## dbl (1): price
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Gas_Prices
## # A tibble: 22,360 × 5
## date fuel grade formulation price
## <date> <chr> <chr> <chr> <dbl>
## 1 1990-08-20 gasoline regular all 1.19
## 2 1990-08-20 gasoline regular conventional 1.19
## 3 1990-08-27 gasoline regular all 1.25
## 4 1990-08-27 gasoline regular conventional 1.25
## 5 1990-09-03 gasoline regular all 1.24
## 6 1990-09-03 gasoline regular conventional 1.24
## 7 1990-09-10 gasoline regular all 1.25
## 8 1990-09-10 gasoline regular conventional 1.25
## 9 1990-09-17 gasoline regular all 1.27
## 10 1990-09-17 gasoline regular conventional 1.27
## # ℹ 22,350 more rows
Gas_Prices %>% count(grade)
## # A tibble: 6 × 2
## grade n
## <chr> <int>
## 1 all 6506
## 2 low_sulfur 96
## 3 midgrade 4788
## 4 premium 4788
## 5 regular 5222
## 6 ultra_low_sulfur 960
x <- factor(c("regular","ultra_low_sulfur","all","midgrade","premium","low_sulfur"))
grade_levels <- c("regular","ultra_low_sulfur","all")
Gas_Prices2 <- Gas_Prices %>%
mutate(grade = grade %>% factor(levels = grade_levels))
Gas_Prices2
## # A tibble: 22,360 × 5
## date fuel grade formulation price
## <date> <chr> <fct> <chr> <dbl>
## 1 1990-08-20 gasoline regular all 1.19
## 2 1990-08-20 gasoline regular conventional 1.19
## 3 1990-08-27 gasoline regular all 1.25
## 4 1990-08-27 gasoline regular conventional 1.25
## 5 1990-09-03 gasoline regular all 1.24
## 6 1990-09-03 gasoline regular conventional 1.24
## 7 1990-09-10 gasoline regular all 1.25
## 8 1990-09-10 gasoline regular conventional 1.25
## 9 1990-09-17 gasoline regular all 1.27
## 10 1990-09-17 gasoline regular conventional 1.27
## # ℹ 22,350 more rows
x1 <- factor(x, levels = grade_levels)
x1
## [1] regular ultra_low_sulfur all <NA>
## [5] <NA> <NA>
## Levels: regular ultra_low_sulfur all
Make two bar charts here - one before ordering another after
Gas_Price_by_grade <- Gas_Prices %>%
group_by(grade) %>%
summarise(
avg_gas_price = mean(price, na.rm = TRUE)
)
Gas_Price_by_grade
## # A tibble: 6 × 2
## grade avg_gas_price
## <chr> <dbl>
## 1 all 2.50
## 2 low_sulfur 3.35
## 3 midgrade 2.63
## 4 premium 2.78
## 5 regular 2.31
## 6 ultra_low_sulfur 3.37
ggplot(Gas_Price_by_grade, aes(avg_gas_price, grade)) + geom_point()
ggplot(Gas_Price_by_grade, aes(avg_gas_price, fct_reorder(grade, avg_gas_price))) + geom_point()
Show examples of three functions:
Gas_Prices %>%
mutate(grade = fct_recode(grade,
"ALL" = "all",
"LOW_SULFUR" = "low_sulfur",
"MIDGRADE" = "midgrade",
"PREMIUM" = "premium",
"REGULAR" = "regular",
"ULTRA_LOW_SULFUR" = "ultra_low_sulfur")) %>%
count(grade)
## # A tibble: 6 × 2
## grade n
## <fct> <int>
## 1 ALL 6506
## 2 LOW_SULFUR 96
## 3 MIDGRADE 4788
## 4 PREMIUM 4788
## 5 REGULAR 5222
## 6 ULTRA_LOW_SULFUR 960
Gas_Prices %>%
mutate(grade = fct_collapse(grade,
PREMIUM = "premium",
REGULAR= "regular",
Other= c("all", "low_sulfur", "midgrade", "ultra_low_sulfur" ))) %>%
count(grade)
## # A tibble: 3 × 2
## grade n
## <fct> <int>
## 1 Other 12350
## 2 PREMIUM 4788
## 3 REGULAR 5222
Gas_Prices %>%
mutate(grade = fct_lump(grade, prop = 0.05)) %>%
count(grade)
## # A tibble: 5 × 2
## grade n
## <fct> <int>
## 1 all 6506
## 2 midgrade 4788
## 3 premium 4788
## 4 regular 5222
## 5 Other 1056
Gas_Prices %>%
mutate(grade = fct_lump(grade, prop = 0.08)) %>%
count(grade)
## # A tibble: 5 × 2
## grade n
## <fct> <int>
## 1 all 6506
## 2 midgrade 4788
## 3 premium 4788
## 4 regular 5222
## 5 Other 1056
Gas_Prices %>%
mutate(grade = fct_lump(grade, prop = 0.1)) %>%
count(grade)
## # A tibble: 5 × 2
## grade n
## <fct> <int>
## 1 all 6506
## 2 midgrade 4788
## 3 premium 4788
## 4 regular 5222
## 5 Other 1056
No need to do anything here.