Import data

# excel file
Mydata <- read_excel("../00_data/Mydata.xlsx")
Mydata
## # A tibble: 65,706 × 8
##     ...1  year lake  species      grand_total comments region            values
##    <dbl> <dbl> <chr> <chr>        <chr>       <chr>    <chr>             <chr> 
##  1     1  1991 Erie  American Eel 1           NA       Michigan (MI)     0     
##  2     2  1991 Erie  American Eel 1           NA       New York (NY)     0     
##  3     3  1991 Erie  American Eel 1           NA       Ohio (OH)         0     
##  4     4  1991 Erie  American Eel 1           NA       Pennsylvania (PA) 0     
##  5     5  1991 Erie  American Eel 1           NA       U.S. Total        0     
##  6     6  1991 Erie  American Eel 1           NA       Canada (ONT)      1     
##  7     7  1992 Erie  American Eel 0           NA       Michigan (MI)     0     
##  8     8  1992 Erie  American Eel 0           NA       New York (NY)     0     
##  9     9  1992 Erie  American Eel 0           NA       Ohio (OH)         0     
## 10    10  1992 Erie  American Eel 0           NA       Pennsylvania (PA) 0     
## # ℹ 65,696 more rows

Apply the following dplyr verbs to your data

Filter rows

filter(Mydata, lake == "Erie", region == "Canada (ONT)")
## # A tibble: 2,439 × 8
##     ...1  year lake  species      grand_total comments region       values
##    <dbl> <dbl> <chr> <chr>        <chr>       <chr>    <chr>        <chr> 
##  1     6  1991 Erie  American Eel 1           NA       Canada (ONT) 1     
##  2    12  1992 Erie  American Eel 0           NA       Canada (ONT) 0     
##  3    18  1993 Erie  American Eel 0           NA       Canada (ONT) 0     
##  4    24  1994 Erie  American Eel 0           NA       Canada (ONT) 0     
##  5    30  1995 Erie  American Eel 0           NA       Canada (ONT) 0     
##  6    36  1996 Erie  American Eel 0           NA       Canada (ONT) 0     
##  7    42  1997 Erie  American Eel 0           NA       Canada (ONT) 0     
##  8    48  1998 Erie  American Eel 0           NA       Canada (ONT) 0     
##  9    54  1999 Erie  American Eel 0           NA       Canada (ONT) 0     
## 10    60  1885 Erie  Blue Pike    NA          NA       Canada (ONT) NA    
## # ℹ 2,429 more rows

Arrange rows

arrange(Mydata, (grand_total), desc(year))
## # A tibble: 65,706 × 8
##     ...1  year lake  species        grand_total comments region           values
##    <dbl> <dbl> <chr> <chr>          <chr>       <chr>    <chr>            <chr> 
##  1   799  2015 Erie  Bowfin         0           NA       Michigan (MI)    0     
##  2   800  2015 Erie  Bowfin         0           NA       New York (NY)    NA    
##  3   801  2015 Erie  Bowfin         0           NA       Ohio (OH)        NA    
##  4   802  2015 Erie  Bowfin         0           NA       Pennsylvania (P… NA    
##  5   803  2015 Erie  Bowfin         0           NA       U.S. Total       0     
##  6   804  2015 Erie  Bowfin         0           NA       Canada (ONT)     NA    
##  7  9463  2015 Erie  Pacific Salmon 0           NA       Michigan (MI)    NA    
##  8  9464  2015 Erie  Pacific Salmon 0           NA       New York (NY)    NA    
##  9  9465  2015 Erie  Pacific Salmon 0           NA       Ohio (OH)        NA    
## 10  9466  2015 Erie  Pacific Salmon 0           NA       Pennsylvania (P… 0     
## # ℹ 65,696 more rows

Select columns

select(Mydata, year:grand_total, region)
## # A tibble: 65,706 × 5
##     year lake  species      grand_total region           
##    <dbl> <chr> <chr>        <chr>       <chr>            
##  1  1991 Erie  American Eel 1           Michigan (MI)    
##  2  1991 Erie  American Eel 1           New York (NY)    
##  3  1991 Erie  American Eel 1           Ohio (OH)        
##  4  1991 Erie  American Eel 1           Pennsylvania (PA)
##  5  1991 Erie  American Eel 1           U.S. Total       
##  6  1991 Erie  American Eel 1           Canada (ONT)     
##  7  1992 Erie  American Eel 0           Michigan (MI)    
##  8  1992 Erie  American Eel 0           New York (NY)    
##  9  1992 Erie  American Eel 0           Ohio (OH)        
## 10  1992 Erie  American Eel 0           Pennsylvania (PA)
## # ℹ 65,696 more rows

Add columns

select(Mydata, lake, grand_total) %>%
    
    mutate(grand_total_cumsum = cumsum(grand_total))
## # A tibble: 65,706 × 3
##    lake  grand_total grand_total_cumsum
##    <chr> <chr>                    <dbl>
##  1 Erie  1                            1
##  2 Erie  1                            2
##  3 Erie  1                            3
##  4 Erie  1                            4
##  5 Erie  1                            5
##  6 Erie  1                            6
##  7 Erie  0                            6
##  8 Erie  0                            6
##  9 Erie  0                            6
## 10 Erie  0                            6
## # ℹ 65,696 more rows

Summarize by groups

Mydata %>%
    
    #group by species
  group_by(species) %>%   
    
    #count how many fish per species
  summarise(count = n(), .groups = "drop") %>%  
    
    #arrange in descending order
  arrange(desc(count))
## # A tibble: 51 × 2
##    species         count
##    <chr>           <int>
##  1 Lake Whitefish   4355
##  2 Walleye          4180
##  3 Northern Pike    3966
##  4 Suckers          3959
##  5 Lake Sturgeon    3656
##  6 Yellow Perch     3544
##  7 Lake Trout       3515
##  8 Cisco            3245
##  9 Carp             3167
## 10 Freshwater Drum  2645
## # ℹ 41 more rows