Import your data

colony <- read_excel("../00_data/myData.xlsx")

Chapter 15

Create a factor

months <- c("January-March", "April-June", "July-September", "October-December")
months
## [1] "January-March"    "April-June"       "July-September"   "October-December"
month_levels <- c("Janurary", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")

colony_months <- factor(months, levels = month_levels)
colony_months
## [1] <NA> <NA> <NA> <NA>
## 12 Levels: Janurary February March April May June July August ... December

Modify factor order

set.seed(123)

colony_small <- colony %>%
    sample_n(9) %>%
    select(year, state, colony_lost, colony_added)

colony_small
## # A tibble: 9 × 4
##    year state      colony_lost colony_added
##   <dbl> <chr>            <dbl> <chr>       
## 1  2017 Utah              2700 2900        
## 2  2017 Vermont            170 390         
## 3  2015 Texas            25000 13000       
## 4  2017 Hawaii             130 970         
## 5  2016 Florida          45000 36000       
## 6  2019 Wyoming           3300 100         
## 7  2021 Kansas            1400 2300        
## 8  2020 California       69000 61000       
## 9  2018 Florida          30000 53000

Before

loss_summary <- colony_small %>%
    group_by(state) %>%
    summarise(colony_added = mean(colony_added, na.rm = TRUE), colony_lost = mean(colony_lost, na.rm = TRUE), n = n())
## Warning: There were 8 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `colony_added = mean(colony_added, na.rm = TRUE)`.
## ℹ In group 1: `state = "California"`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run ]8;;ide:run:dplyr::last_dplyr_warnings()dplyr::last_dplyr_warnings()]8;; to see the 7 remaining warnings.
ggplot(loss_summary, aes(colony_lost, state)) + geom_point()

After

ggplot(loss_summary, aes(colony_lost, fct_reorder(state, colony_lost))) +
  geom_point()

Modify factor levels

Recode

colony %>% distinct(months)
## # A tibble: 4 × 1
##   months          
##   <chr>           
## 1 January-March   
## 2 April-June      
## 3 July-September  
## 4 October-December
colony %>%
    mutate(Jan_group = fct_recode(months, "Jan, Feb, Mar" = "January-March")) %>%
    select(months, Jan_group) %>%
    filter(months == "January-March")
## # A tibble: 329 × 2
##    months        Jan_group    
##    <chr>         <fct>        
##  1 January-March Jan, Feb, Mar
##  2 January-March Jan, Feb, Mar
##  3 January-March Jan, Feb, Mar
##  4 January-March Jan, Feb, Mar
##  5 January-March Jan, Feb, Mar
##  6 January-March Jan, Feb, Mar
##  7 January-March Jan, Feb, Mar
##  8 January-March Jan, Feb, Mar
##  9 January-March Jan, Feb, Mar
## 10 January-March Jan, Feb, Mar
## # … with 319 more rows

Collapse

  • fct_collapse
colony_small %>%
    mutate(state_col = fct_collapse(state, "Southern state" = c("Texas", "Florida"))) %>%
    select(state, state_col)
## # A tibble: 9 × 2
##   state      state_col     
##   <chr>      <fct>         
## 1 Utah       Utah          
## 2 Vermont    Vermont       
## 3 Texas      Southern state
## 4 Hawaii     Hawaii        
## 5 Florida    Southern state
## 6 Wyoming    Wyoming       
## 7 Kansas     Kansas        
## 8 California California    
## 9 Florida    Southern state

Lump

colony_small %>% count(state)
## # A tibble: 8 × 2
##   state          n
##   <chr>      <int>
## 1 California     1
## 2 Florida        2
## 3 Hawaii         1
## 4 Kansas         1
## 5 Texas          1
## 6 Utah           1
## 7 Vermont        1
## 8 Wyoming        1
colony_small %>% mutate(state_lump = fct_lump(state)) %>% distinct(state_lump)
## # A tibble: 8 × 1
##   state_lump
##   <fct>     
## 1 Utah      
## 2 Vermont   
## 3 Texas     
## 4 Hawaii    
## 5 Florida   
## 6 Wyoming   
## 7 Kansas    
## 8 California

Chapter 16

No need to do anything here.