Import your data

mydata <- read_excel("../00_data/mydata.xlsx")

Chapter 15

Create a factor

mydata %>%
  mutate(stateProvince = as_factor(stateProvince))
## # A tibble: 136,621 × 11
##    occurrenceID eventID decimalLatitude decimalLongitude scientificName         
##           <dbl>   <dbl>           <dbl>            <dbl> <chr>                  
##  1        12832  525618           -28.5             153. Philoria loveridgei    
##  2        12833  526341           -33.7             151. Heleioporus australiac…
##  3        12834  526673           -28.7             153. Mixophyes iteratus     
##  4        12835  526673           -28.7             153. Mixophyes fasciolatus  
##  5        12836  526673           -28.7             153. Litoria latopalmata    
##  6        12837  527056           -30.4             153. Assa darlingtoni       
##  7        12838  527058           -30.4             153. Assa darlingtoni       
##  8        12839  528103           -30.4             153  Litoria nasuta         
##  9        12840  528103           -30.4             153  Mixophyes iteratus     
## 10        12841  528103           -30.4             153  Litoria gracilenta     
## # ℹ 136,611 more rows
## # ℹ 6 more variables: eventDate <dttm>, eventTime <dttm>, timezone <chr>,
## #   coordinateUncertaintyInMeters <dbl>, recordedBy <dbl>, stateProvince <fct>

Modify factor order

Make two bar charts here - one before ordering another after

mydata %>%
  ggplot(aes(y = stateProvince)) +
  geom_bar() +
  labs(title = "Before Ordering (Alphabetical)", y = "State")

mydata %>%
  ggplot(aes(y = fct_infreq(stateProvince))) +
  geom_bar() +
  labs(title = "After Ordering (By Frequency)", y = "State")

Modify factor levels

Show examples of three functions:

  • fct_recode
mydata %>%
  mutate(state_rev = fct_recode(stateProvince, 
                                "NSW" = "New South Wales",
                                "QLD" = "Queensland")) %>%
  select(stateProvince, state_rev)
## # A tibble: 136,621 × 2
##    stateProvince   state_rev
##    <chr>           <fct>    
##  1 New South Wales NSW      
##  2 New South Wales NSW      
##  3 New South Wales NSW      
##  4 New South Wales NSW      
##  5 New South Wales NSW      
##  6 New South Wales NSW      
##  7 New South Wales NSW      
##  8 New South Wales NSW      
##  9 New South Wales NSW      
## 10 New South Wales NSW      
## # ℹ 136,611 more rows
  • fct_collapse
mydata %>%
  mutate(state_col = fct_collapse(stateProvince, 
                                  "East Coast" = c("New South Wales", "Queensland", "Victoria"))) %>%
  select(stateProvince, state_col)
## # A tibble: 136,621 × 2
##    stateProvince   state_col 
##    <chr>           <fct>     
##  1 New South Wales East Coast
##  2 New South Wales East Coast
##  3 New South Wales East Coast
##  4 New South Wales East Coast
##  5 New South Wales East Coast
##  6 New South Wales East Coast
##  7 New South Wales East Coast
##  8 New South Wales East Coast
##  9 New South Wales East Coast
## 10 New South Wales East Coast
## # ℹ 136,611 more rows
  • fct_lump
mydata %>%
  mutate(state_lump = fct_lump(stateProvince, n = 3)) %>%
  count(state_lump)
## # A tibble: 4 × 2
##   state_lump          n
##   <fct>           <int>
## 1 New South Wales 58749
## 2 Queensland      23334
## 3 Victoria        32383
## 4 Other           22155

Chapter 16

No need to do anything here.