Import your data

fishing <- read_csv("../00_data/fishing.csv")
## Rows: 65706 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): lake, species, comments, region
## dbl (3): year, grand_total, values
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Chapter 15

Create a factor

x1 <- c("Canada", "Ohio", "Pennsylvania", "Michigan", "New York", "U.S Total")

Lakes_levels <- c("Michigan", "New York", "Ohio", "Pennsylvania", "U.S Total", "Canada")

y1 <- factor(x1, levels = Lakes_levels)

Modify factor order

Make two bar charts here - one before ordering another after

values_by_region <- fishing %>%
    group_by(region) %>%
    summarise(
        avg_values = mean(values, na.rm = TRUE)
    )

values_by_region %>% 
    ggplot(aes(x = avg_values, y = region)) +
    geom_point()

values_by_region %>% 
    ggplot(aes(x = avg_values, y = fct_reorder(.f = region, .x = avg_values))) +
    geom_point()

labs(y = NULL, x = "Average Fish Caught by Region")
## $y
## NULL
## 
## $x
## [1] "Average Fish Caught by Region"
## 
## attr(,"class")
## [1] "labels"

Modify factor levels

Show examples of three functions:

  • fct_recode
fishing %>%
    
    mutate(region_rev = fct_recode(region, "Michigan, MI" = "Michigan (MI)", "New York, NY" = "New York (NY)", "Green Bay, Michigan" = "Green Bay (MI)", "Green Bay, Wisconsin" = "Green Bay (WI)")) %>%
    select(region, region_rev)
## # A tibble: 65,706 × 2
##    region            region_rev       
##    <chr>             <fct>            
##  1 Michigan (MI)     Michigan, MI     
##  2 New York (NY)     New York, NY     
##  3 Ohio (OH)         Ohio (OH)        
##  4 Pennsylvania (PA) Pennsylvania (PA)
##  5 U.S. Total        U.S. Total       
##  6 Canada (ONT)      Canada (ONT)     
##  7 Michigan (MI)     Michigan, MI     
##  8 New York (NY)     New York, NY     
##  9 Ohio (OH)         Ohio (OH)        
## 10 Pennsylvania (PA) Pennsylvania (PA)
## # ℹ 65,696 more rows
  • fct_collapse
fishing %>%
       mutate(region_col = fct_collapse(region, "Green Bay" = c("Green Bay (WI)","Green Bay (MI)"))) %>%
    select(region, region_col)
## # A tibble: 65,706 × 2
##    region            region_col       
##    <chr>             <fct>            
##  1 Michigan (MI)     Michigan (MI)    
##  2 New York (NY)     New York (NY)    
##  3 Ohio (OH)         Ohio (OH)        
##  4 Pennsylvania (PA) Pennsylvania (PA)
##  5 U.S. Total        U.S. Total       
##  6 Canada (ONT)      Canada (ONT)     
##  7 Michigan (MI)     Michigan (MI)    
##  8 New York (NY)     New York (NY)    
##  9 Ohio (OH)         Ohio (OH)        
## 10 Pennsylvania (PA) Pennsylvania (PA)
## # ℹ 65,696 more rows
  • fct_lump
fishing %>% count(region)
## # A tibble: 24 × 2
##    region                n
##    <chr>             <int>
##  1 Canada (ONT)       7479
##  2 Georgian Bay (GB)  2445
##  3 Green Bay (MI)     2126
##  4 Green Bay (WI)     2126
##  5 Huron Proper (HP)  2445
##  6 Illinois (IL)      2126
##  7 Indiana (IN)       2126
##  8 MI State Total     2126
##  9 Mich. Proper (MI)  2126
## 10 Mich. Proper (WI)  2126
## # ℹ 14 more rows
fishing %>% mutate(region_lump = fct_lump(region)) %>% distinct(region_lump)
## # A tibble: 24 × 1
##    region_lump           
##    <fct>                 
##  1 Michigan (MI)         
##  2 New York (NY)         
##  3 Ohio (OH)             
##  4 Pennsylvania (PA)     
##  5 U.S. Total            
##  6 Canada (ONT)          
##  7 U.S. Total (NY)       
##  8 U.S. Huron Proper (HP)
##  9 U.S. Saginaw Bay (SB) 
## 10 U.S. Total (MI)       
## # ℹ 14 more rows

```

Chapter 16

No need to do anything here.