Import your data
fishing <- read_csv("../00_data/fishing.csv")
## Rows: 65706 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): lake, species, comments, region
## dbl (3): year, grand_total, values
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fishing_small <- fishing %>%
select(year, lake, species, grand_total, values) %>%
filter(lake == "Erie", year %in% c(1990, 2000), species %in% c("Walleye", "Bullheads"))
Pivoting
wide to long form
Separating and Uniting
Unite Two column
fishingUnited <- fishing %>%
unite(col = "Observed/CaughtinLbs", grand_total, values, sep = "/")
Separate A Column
fishingUnited %>%
separate(col = "Observed/CaughtinLbs", into = c("grand_total", "values"), sep = "/")
## # A tibble: 65,706 × 7
## year lake species grand_total values comments region
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1991 Erie American Eel 1 0 <NA> Michigan (MI)
## 2 1991 Erie American Eel 1 0 <NA> New York (NY)
## 3 1991 Erie American Eel 1 0 <NA> Ohio (OH)
## 4 1991 Erie American Eel 1 0 <NA> Pennsylvania (PA)
## 5 1991 Erie American Eel 1 0 <NA> U.S. Total
## 6 1991 Erie American Eel 1 1 <NA> Canada (ONT)
## 7 1992 Erie American Eel 0 0 <NA> Michigan (MI)
## 8 1992 Erie American Eel 0 0 <NA> New York (NY)
## 9 1992 Erie American Eel 0 0 <NA> Ohio (OH)
## 10 1992 Erie American Eel 0 0 <NA> Pennsylvania (PA)
## # ℹ 65,696 more rows
Missing Values
fishing %>%
pivot_wider(names_from = year, values_from = comments)
## Warning: Values from `comments` are not uniquely identified; output will contain
## list-cols.
## • Use `values_fn = list` to suppress this warning.
## • Use `values_fn = {summary_fun}` to summarise duplicates.
## • Use the following dplyr code to identify duplicates.
## {data} |>
## dplyr::summarise(n = dplyr::n(), .by = c(lake, species, grand_total, region,
## values, year)) |>
## dplyr::filter(n > 1L)
## # A tibble: 37,552 × 154
## lake species grand_total region values `1991` `1992` `1993` `1994` `1995`
## <chr> <chr> <dbl> <chr> <dbl> <list> <list> <list> <list> <list>
## 1 Erie American … 1 Michi… 0 <chr> <NULL> <NULL> <NULL> <NULL>
## 2 Erie American … 1 New Y… 0 <chr> <NULL> <NULL> <NULL> <NULL>
## 3 Erie American … 1 Ohio … 0 <chr> <NULL> <NULL> <NULL> <NULL>
## 4 Erie American … 1 Penns… 0 <chr> <NULL> <NULL> <NULL> <NULL>
## 5 Erie American … 1 U.S. … 0 <chr> <NULL> <NULL> <NULL> <NULL>
## 6 Erie American … 1 Canad… 1 <chr> <NULL> <NULL> <NULL> <NULL>
## 7 Erie American … 0 Michi… 0 <NULL> <chr> <chr> <chr> <chr>
## 8 Erie American … 0 New Y… 0 <NULL> <chr> <chr> <chr> <chr>
## 9 Erie American … 0 Ohio … 0 <NULL> <chr> <chr> <chr> <chr>
## 10 Erie American … 0 Penns… 0 <NULL> <chr> <chr> <chr> <chr>
## # ℹ 37,542 more rows
## # ℹ 144 more variables: `1996` <list>, `1997` <list>, `1998` <list>,
## # `1999` <list>, `1885` <list>, `1886` <list>, `1887` <list>, `1888` <list>,
## # `1889` <list>, `1890` <list>, `1891` <list>, `1892` <list>, `1893` <list>,
## # `1894` <list>, `1895` <list>, `1896` <list>, `1897` <list>, `1898` <list>,
## # `1899` <list>, `1900` <list>, `1901` <list>, `1902` <list>, `1903` <list>,
## # `1904` <list>, `1905` <list>, `1906` <list>, `1907` <list>, …