data <- read_excel("../00_data/MyData.xlsx")
Make two bar charts here - one before ordering another after
Brand_summary <- data %>%
group_by(Brand) %>%
summarise(
Year = mean(Year, na.rm = TRUE),
Mileage = mean(Mileage, na.rm = TRUE),
n = n()
)
Brand_summary
## # A tibble: 11 × 4
## Brand Year Mileage n
## <chr> <dbl> <dbl> <int>
## 1 Audi 2017. 15.7 10
## 2 BMW 2019 16.2 10
## 3 Ford 2018. 15.1 11
## 4 Honda 2018 22.3 6
## 5 Hyundai 2018. 19.1 11
## 6 Mahindra 2020. 17 5
## 7 Maruti 2020. 18.7 6
## 8 Mercedes 2018. 15 10
## 9 Tata 2019 19.6 11
## 10 Toyota 2018. 15 10
## 11 Volkswagen 2018. 17.9 10
ggplot(Brand_summary, aes(Mileage, Brand)) + geom_point()
# Plot
brand_by_mileage <- Brand_summary %>%
ggplot(aes(x = Mileage,
y = fct_reorder(.f = Brand, .x = Mileage) %>%
fct_relevel("Don`t know"))) +
geom_point() +
# Labeling
labs(y = NULL, x = "Mean Brand off Mileage")
brand_by_mileage
## Warning: 1 unknown level in `f`: Don`t know
Show examples of three functions:
data %>% distinct(Brand)
## # A tibble: 11 × 1
## Brand
## <chr>
## 1 Toyota
## 2 Honda
## 3 Ford
## 4 Maruti
## 5 Hyundai
## 6 Tata
## 7 Mahindra
## 8 Volkswagen
## 9 Audi
## 10 BMW
## 11 Mercedes
# Recode
data %>%
# Rename levels
mutate(Brand_rev = fct_recode(Brand, "Audi" = "BMW")) %>%
select(Brand, Brand_rev) %>%
filter(Brand == "BMW")
## # A tibble: 10 × 2
## Brand Brand_rev
## <chr> <fct>
## 1 BMW Audi
## 2 BMW Audi
## 3 BMW Audi
## 4 BMW Audi
## 5 BMW Audi
## 6 BMW Audi
## 7 BMW Audi
## 8 BMW Audi
## 9 BMW Audi
## 10 BMW Audi
# Collapse multiple levels into one
data %>%
mutate(Brand_col = fct_collapse(Brand, "Mercedes" = c("BMW", "Ford"))) %>%
select(Brand, Brand_col) %>%
filter(Brand != "Toyota")
## # A tibble: 90 × 2
## Brand Brand_col
## <chr> <fct>
## 1 Honda Honda
## 2 Ford Mercedes
## 3 Maruti Maruti
## 4 Hyundai Hyundai
## 5 Tata Tata
## 6 Mahindra Mahindra
## 7 Volkswagen Volkswagen
## 8 Audi Audi
## 9 BMW Mercedes
## 10 Mercedes Mercedes
## # ℹ 80 more rows
# Lump Small Levels into Other Levels
data %>% count(Brand)
## # A tibble: 11 × 2
## Brand n
## <chr> <int>
## 1 Audi 10
## 2 BMW 10
## 3 Ford 11
## 4 Honda 6
## 5 Hyundai 11
## 6 Mahindra 5
## 7 Maruti 6
## 8 Mercedes 10
## 9 Tata 11
## 10 Toyota 10
## 11 Volkswagen 10
data %>% mutate(Brand_lump = fct_lump(Brand)) %>% distinct(Brand_lump)
## # A tibble: 11 × 1
## Brand_lump
## <fct>
## 1 Toyota
## 2 Honda
## 3 Ford
## 4 Maruti
## 5 Hyundai
## 6 Tata
## 7 Other
## 8 Volkswagen
## 9 Audi
## 10 BMW
## 11 Mercedes
No need to do anything here.