Import data
# excel file
Mydata <- read_excel("../00_data/Mydata.xlsx")
Mydata
## # A tibble: 65,706 × 8
## ...1 year lake species grand_total comments region values
## <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 1991 Erie American Eel 1 NA Michigan (MI) 0
## 2 2 1991 Erie American Eel 1 NA New York (NY) 0
## 3 3 1991 Erie American Eel 1 NA Ohio (OH) 0
## 4 4 1991 Erie American Eel 1 NA Pennsylvania (PA) 0
## 5 5 1991 Erie American Eel 1 NA U.S. Total 0
## 6 6 1991 Erie American Eel 1 NA Canada (ONT) 1
## 7 7 1992 Erie American Eel 0 NA Michigan (MI) 0
## 8 8 1992 Erie American Eel 0 NA New York (NY) 0
## 9 9 1992 Erie American Eel 0 NA Ohio (OH) 0
## 10 10 1992 Erie American Eel 0 NA Pennsylvania (PA) 0
## # ℹ 65,696 more rows
Apply the following dplyr verbs to your data
Filter rows
filter(Mydata, lake == "Erie", region == "Canada (ONT)")
## # A tibble: 2,439 × 8
## ...1 year lake species grand_total comments region values
## <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 6 1991 Erie American Eel 1 NA Canada (ONT) 1
## 2 12 1992 Erie American Eel 0 NA Canada (ONT) 0
## 3 18 1993 Erie American Eel 0 NA Canada (ONT) 0
## 4 24 1994 Erie American Eel 0 NA Canada (ONT) 0
## 5 30 1995 Erie American Eel 0 NA Canada (ONT) 0
## 6 36 1996 Erie American Eel 0 NA Canada (ONT) 0
## 7 42 1997 Erie American Eel 0 NA Canada (ONT) 0
## 8 48 1998 Erie American Eel 0 NA Canada (ONT) 0
## 9 54 1999 Erie American Eel 0 NA Canada (ONT) 0
## 10 60 1885 Erie Blue Pike NA NA Canada (ONT) NA
## # ℹ 2,429 more rows
Arrange rows
arrange(Mydata, (grand_total), desc(year))
## # A tibble: 65,706 × 8
## ...1 year lake species grand_total comments region values
## <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 799 2015 Erie Bowfin 0 NA Michigan (MI) 0
## 2 800 2015 Erie Bowfin 0 NA New York (NY) NA
## 3 801 2015 Erie Bowfin 0 NA Ohio (OH) NA
## 4 802 2015 Erie Bowfin 0 NA Pennsylvania (P… NA
## 5 803 2015 Erie Bowfin 0 NA U.S. Total 0
## 6 804 2015 Erie Bowfin 0 NA Canada (ONT) NA
## 7 9463 2015 Erie Pacific Salmon 0 NA Michigan (MI) NA
## 8 9464 2015 Erie Pacific Salmon 0 NA New York (NY) NA
## 9 9465 2015 Erie Pacific Salmon 0 NA Ohio (OH) NA
## 10 9466 2015 Erie Pacific Salmon 0 NA Pennsylvania (P… 0
## # ℹ 65,696 more rows
Select columns
select(Mydata, year:grand_total, region)
## # A tibble: 65,706 × 5
## year lake species grand_total region
## <dbl> <chr> <chr> <chr> <chr>
## 1 1991 Erie American Eel 1 Michigan (MI)
## 2 1991 Erie American Eel 1 New York (NY)
## 3 1991 Erie American Eel 1 Ohio (OH)
## 4 1991 Erie American Eel 1 Pennsylvania (PA)
## 5 1991 Erie American Eel 1 U.S. Total
## 6 1991 Erie American Eel 1 Canada (ONT)
## 7 1992 Erie American Eel 0 Michigan (MI)
## 8 1992 Erie American Eel 0 New York (NY)
## 9 1992 Erie American Eel 0 Ohio (OH)
## 10 1992 Erie American Eel 0 Pennsylvania (PA)
## # ℹ 65,696 more rows
Add columns
select(Mydata, lake, grand_total) %>%
mutate(grand_total_cumsum = cumsum(grand_total))
## # A tibble: 65,706 × 3
## lake grand_total grand_total_cumsum
## <chr> <chr> <dbl>
## 1 Erie 1 1
## 2 Erie 1 2
## 3 Erie 1 3
## 4 Erie 1 4
## 5 Erie 1 5
## 6 Erie 1 6
## 7 Erie 0 6
## 8 Erie 0 6
## 9 Erie 0 6
## 10 Erie 0 6
## # ℹ 65,696 more rows
Summarize by groups
Mydata %>%
#group by species
group_by(species) %>%
#count how many fish per species
summarise(count = n(), .groups = "drop") %>%
#arrange in descending order
arrange(desc(count))
## # A tibble: 51 × 2
## species count
## <chr> <int>
## 1 Lake Whitefish 4355
## 2 Walleye 4180
## 3 Northern Pike 3966
## 4 Suckers 3959
## 5 Lake Sturgeon 3656
## 6 Yellow Perch 3544
## 7 Lake Trout 3515
## 8 Cisco 3245
## 9 Carp 3167
## 10 Freshwater Drum 2645
## # ℹ 41 more rows