Import your data

bee_colonies <- read_excel("../00_data/MyData3.xlsx")

Chapter 15

Create a factor

Modify factor order

Unordered factor levels

 # Average colony_lost for each period

colonylost_by_period <- bee_colonies %>%
    
    group_by(months) %>%
    summarise(
        avg_colonylost = mean(colony_lost, na.rm = TRUE)
    )
colonylost_by_period
## # A tibble: 4 × 2
##   months           avg_colonylost
##   <chr>                     <dbl>
## 1 April-June               11963.
## 2 January-March            17604.
## 3 July-September           17536.
## 4 October-December         18927.
# Plot
colonylost_by_period %>% 
    ggplot(aes(x = avg_colonylost, y = months)) +
    geom_point()

Ordered factor levels

colonylost_by_period %>%
    
    ggplot(aes(x = avg_colonylost, y = fct_reorder(.f = months, .x = avg_colonylost))) +
    geom_point() +
    
# Labels
    labs(y = NULL, x = "Mean colony lost each period")

Modify factor levels

bee_colonies %>% distinct(months)
## # A tibble: 4 × 1
##   months          
##   <chr>           
## 1 January-March   
## 2 April-June      
## 3 July-September  
## 4 October-December
# Recode 
bee_colonies %>%
    
    # Rename levels
    mutate(period_rev = fct_recode(months, "Spring" = "January-March")) %>%
    select(months, period_rev) %>%
    filter(period_rev == "Spring")
## # A tibble: 329 × 2
##    months        period_rev
##    <chr>         <fct>     
##  1 January-March Spring    
##  2 January-March Spring    
##  3 January-March Spring    
##  4 January-March Spring    
##  5 January-March Spring    
##  6 January-March Spring    
##  7 January-March Spring    
##  8 January-March Spring    
##  9 January-March Spring    
## 10 January-March Spring    
## # ℹ 319 more rows
# Collapse multiple levels into one
bee_colonies %>%
    
    mutate(state_col = fct_collapse(state, "Unknown state" = c("Other States", "United States"))) %>%
    select(state, state_col)
## # A tibble: 1,222 × 2
##    state       state_col  
##    <chr>       <fct>      
##  1 Alabama     Alabama    
##  2 Arizona     Arizona    
##  3 Arkansas    Arkansas   
##  4 California  California 
##  5 Colorado    Colorado   
##  6 Connecticut Connecticut
##  7 Florida     Florida    
##  8 Georgia     Georgia    
##  9 Hawaii      Hawaii     
## 10 Idaho       Idaho      
## # ℹ 1,212 more rows
# Lump small levels into other levels
bee_colonies %>% count(state)
## # A tibble: 47 × 2
##    state           n
##    <chr>       <int>
##  1 Alabama        26
##  2 Arizona        26
##  3 Arkansas       26
##  4 California     26
##  5 Colorado       26
##  6 Connecticut    26
##  7 Florida        26
##  8 Georgia        26
##  9 Hawaii         26
## 10 Idaho          26
## # ℹ 37 more rows
bee_colonies %>% mutate(state_lump = fct_lump(state)) %>% distinct(state_lump)
## # A tibble: 47 × 1
##    state_lump 
##    <fct>      
##  1 Alabama    
##  2 Arizona    
##  3 Arkansas   
##  4 California 
##  5 Colorado   
##  6 Connecticut
##  7 Florida    
##  8 Georgia    
##  9 Hawaii     
## 10 Idaho      
## # ℹ 37 more rows

Chapter 16

No need to do anything here.