Import your data

data <- read_excel("../00_data/MyData.xlsx")

Chapter 15

Create a factor

Modify factor order

Make two bar charts here - one before ordering another after

Brand_summary <- data %>%
  group_by(Brand) %>%
  summarise(
    Year = mean(Year, na.rm = TRUE),
    Mileage = mean(Mileage, na.rm = TRUE),
    n = n()
  )

Brand_summary
## # A tibble: 11 × 4
##    Brand       Year Mileage     n
##    <chr>      <dbl>   <dbl> <int>
##  1 Audi       2017.    15.7    10
##  2 BMW        2019     16.2    10
##  3 Ford       2018.    15.1    11
##  4 Honda      2018     22.3     6
##  5 Hyundai    2018.    19.1    11
##  6 Mahindra   2020.    17       5
##  7 Maruti     2020.    18.7     6
##  8 Mercedes   2018.    15      10
##  9 Tata       2019     19.6    11
## 10 Toyota     2018.    15      10
## 11 Volkswagen 2018.    17.9    10
ggplot(Brand_summary, aes(Mileage, Brand)) + geom_point()

# Plot
brand_by_mileage <- Brand_summary %>%
    
    ggplot(aes(x = Mileage,
               y = fct_reorder(.f = Brand, .x = Mileage) %>% 
                   fct_relevel("Don`t know"))) +
    geom_point() +
    
    # Labeling 
    labs(y = NULL, x = "Mean Brand off Mileage")
brand_by_mileage
## Warning: 1 unknown level in `f`: Don`t know

Modify factor levels

Show examples of three functions:

data %>% distinct(Brand)
## # A tibble: 11 × 1
##    Brand     
##    <chr>     
##  1 Toyota    
##  2 Honda     
##  3 Ford      
##  4 Maruti    
##  5 Hyundai   
##  6 Tata      
##  7 Mahindra  
##  8 Volkswagen
##  9 Audi      
## 10 BMW       
## 11 Mercedes
# Recode 
data %>%
    
    # Rename levels 
    mutate(Brand_rev = fct_recode(Brand, "Audi" = "BMW")) %>%
    select(Brand, Brand_rev) %>%
    filter(Brand == "BMW")
## # A tibble: 10 × 2
##    Brand Brand_rev
##    <chr> <fct>    
##  1 BMW   Audi     
##  2 BMW   Audi     
##  3 BMW   Audi     
##  4 BMW   Audi     
##  5 BMW   Audi     
##  6 BMW   Audi     
##  7 BMW   Audi     
##  8 BMW   Audi     
##  9 BMW   Audi     
## 10 BMW   Audi
# Collapse multiple levels into one
data %>%
    
    mutate(Brand_col = fct_collapse(Brand, "Mercedes" = c("BMW", "Ford"))) %>%
    select(Brand, Brand_col) %>%
    filter(Brand != "Toyota") 
## # A tibble: 90 × 2
##    Brand      Brand_col 
##    <chr>      <fct>     
##  1 Honda      Honda     
##  2 Ford       Mercedes  
##  3 Maruti     Maruti    
##  4 Hyundai    Hyundai   
##  5 Tata       Tata      
##  6 Mahindra   Mahindra  
##  7 Volkswagen Volkswagen
##  8 Audi       Audi      
##  9 BMW        Mercedes  
## 10 Mercedes   Mercedes  
## # ℹ 80 more rows
# Lump Small Levels into Other Levels 
data %>% count(Brand)
## # A tibble: 11 × 2
##    Brand          n
##    <chr>      <int>
##  1 Audi          10
##  2 BMW           10
##  3 Ford          11
##  4 Honda          6
##  5 Hyundai       11
##  6 Mahindra       5
##  7 Maruti         6
##  8 Mercedes      10
##  9 Tata          11
## 10 Toyota        10
## 11 Volkswagen    10
data %>% mutate(Brand_lump = fct_lump(Brand)) %>% distinct(Brand_lump)
## # A tibble: 11 × 1
##    Brand_lump
##    <fct>     
##  1 Toyota    
##  2 Honda     
##  3 Ford      
##  4 Maruti    
##  5 Hyundai   
##  6 Tata      
##  7 Other     
##  8 Volkswagen
##  9 Audi      
## 10 BMW       
## 11 Mercedes
  • fct_recode
  • fct_collapse
  • fct_lump

Chapter 16

No need to do anything here.