fishing <- read_csv("../00_data/fishing.csv")
## Rows: 65706 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): lake, species, comments, region
## dbl (3): year, grand_total, values
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
x1 <- c("Canada", "Ohio", "Pennsylvania", "Michigan", "New York", "U.S Total")
Lakes_levels <- c("Michigan", "New York", "Ohio", "Pennsylvania", "U.S Total", "Canada")
y1 <- factor(x1, levels = Lakes_levels)
Make two bar charts here - one before ordering another after
values_by_region <- fishing %>%
group_by(region) %>%
summarise(
avg_values = mean(values, na.rm = TRUE)
)
values_by_region %>%
ggplot(aes(x = avg_values, y = region)) +
geom_point()
values_by_region %>%
ggplot(aes(x = avg_values, y = fct_reorder(.f = region, .x = avg_values))) +
geom_point()
labs(y = NULL, x = "Average Fish Caught by Region")
## $y
## NULL
##
## $x
## [1] "Average Fish Caught by Region"
##
## attr(,"class")
## [1] "labels"
Show examples of three functions:
fishing %>%
mutate(region_rev = fct_recode(region, "Michigan, MI" = "Michigan (MI)", "New York, NY" = "New York (NY)", "Green Bay, Michigan" = "Green Bay (MI)", "Green Bay, Wisconsin" = "Green Bay (WI)")) %>%
select(region, region_rev)
## # A tibble: 65,706 × 2
## region region_rev
## <chr> <fct>
## 1 Michigan (MI) Michigan, MI
## 2 New York (NY) New York, NY
## 3 Ohio (OH) Ohio (OH)
## 4 Pennsylvania (PA) Pennsylvania (PA)
## 5 U.S. Total U.S. Total
## 6 Canada (ONT) Canada (ONT)
## 7 Michigan (MI) Michigan, MI
## 8 New York (NY) New York, NY
## 9 Ohio (OH) Ohio (OH)
## 10 Pennsylvania (PA) Pennsylvania (PA)
## # ℹ 65,696 more rows
fishing %>%
mutate(region_col = fct_collapse(region, "Green Bay" = c("Green Bay (WI)","Green Bay (MI)"))) %>%
select(region, region_col)
## # A tibble: 65,706 × 2
## region region_col
## <chr> <fct>
## 1 Michigan (MI) Michigan (MI)
## 2 New York (NY) New York (NY)
## 3 Ohio (OH) Ohio (OH)
## 4 Pennsylvania (PA) Pennsylvania (PA)
## 5 U.S. Total U.S. Total
## 6 Canada (ONT) Canada (ONT)
## 7 Michigan (MI) Michigan (MI)
## 8 New York (NY) New York (NY)
## 9 Ohio (OH) Ohio (OH)
## 10 Pennsylvania (PA) Pennsylvania (PA)
## # ℹ 65,696 more rows
fishing %>% count(region)
## # A tibble: 24 × 2
## region n
## <chr> <int>
## 1 Canada (ONT) 7479
## 2 Georgian Bay (GB) 2445
## 3 Green Bay (MI) 2126
## 4 Green Bay (WI) 2126
## 5 Huron Proper (HP) 2445
## 6 Illinois (IL) 2126
## 7 Indiana (IN) 2126
## 8 MI State Total 2126
## 9 Mich. Proper (MI) 2126
## 10 Mich. Proper (WI) 2126
## # ℹ 14 more rows
fishing %>% mutate(region_lump = fct_lump(region)) %>% distinct(region_lump)
## # A tibble: 24 × 1
## region_lump
## <fct>
## 1 Michigan (MI)
## 2 New York (NY)
## 3 Ohio (OH)
## 4 Pennsylvania (PA)
## 5 U.S. Total
## 6 Canada (ONT)
## 7 U.S. Total (NY)
## 8 U.S. Huron Proper (HP)
## 9 U.S. Saginaw Bay (SB)
## 10 U.S. Total (MI)
## # ℹ 14 more rows
```
No need to do anything here.