# csv file
mydata <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-01/weekly_gas_prices.csv')
## Rows: 22360 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): fuel, grade, formulation
## dbl (1): price
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mydata %>% count(grade)
## # A tibble: 6 × 2
## grade n
## <chr> <int>
## 1 all 6506
## 2 low_sulfur 96
## 3 midgrade 4788
## 4 premium 4788
## 5 regular 5222
## 6 ultra_low_sulfur 960
grade_levels <- c("all","regular","midgrade","premium","ultra_low_sulfur","low_sulfur")
mydata_rev <- mydata %>%
mutate(grade = grade %>% factor(levels = grade_levels))
mydata_rev %>% count(grade)
## # A tibble: 6 × 2
## grade n
## <fct> <int>
## 1 all 6506
## 2 regular 5222
## 3 midgrade 4788
## 4 premium 4788
## 5 ultra_low_sulfur 960
## 6 low_sulfur 96
Make two bar charts here - one before ordering another after
mydata %>%
count(grade) %>%
ggplot(aes(x = grade, y = n)) +
geom_bar(stat = "identity") +
labs(x = "Grade", y = "Amount of Values", title = "Amount of Values per version of Grade") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
mydata_rev %>%
count(grade) %>%
ggplot(aes(x = grade, y = n)) +
geom_bar(stat = "identity") +
labs(x = "Grade", y = "Amount of Values", title = "Amount of Values per version of Grade") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Show examples of three functions:
mydata %>%
mutate(grade = fct_recode(grade,
"Total" = "all",
"87" = "regular",
"89" = "midgrade",
"92" = "premium",
"Diesel" = "ultra_low_sulfur",
"Low sulfer Diesel" = "low_sulfur",
)) %>%
count(grade)
## # A tibble: 6 × 2
## grade n
## <fct> <int>
## 1 Total 6506
## 2 Low sulfer Diesel 96
## 3 89 4788
## 4 92 4788
## 5 87 5222
## 6 Diesel 960
mydata %>%
mutate(grade = fct_collapse(grade,
Gasoline = c("regular","midgrade","premium"),
Diesel = "ultra_low_sulfur","low_sulfur",
All = "all")) %>%
count(grade)
## # A tibble: 4 × 2
## grade n
## <fct> <int>
## 1 "All" 6506
## 2 "" 96
## 3 "Gasoline" 14798
## 4 "Diesel" 960
mydata %>%
mutate(grade = fct_lump(grade)) %>%
count(grade)
## # A tibble: 5 × 2
## grade n
## <fct> <int>
## 1 all 6506
## 2 midgrade 4788
## 3 premium 4788
## 4 regular 5222
## 5 Other 1056
No need to do anything here.