Import data
# excel file
birds <- read_excel("../00_data/data/myDataBirds.xlsx")
birds
## # A tibble: 18,706 × 6
## year species species_latin how_many_counted total_hours
## <dbl> <chr> <chr> <dbl> <chr>
## 1 1921 American Bittern Botaurus len… 0 8
## 2 1921 American Black Duck Anas rubripes 0 8
## 3 1921 American Coot Fulica ameri… 0 8
## 4 1921 American Crow Corvus brach… 0 8
## 5 1921 American Goldfinch Spinus trist… 0 8
## 6 1921 American Kestrel Falco sparve… 0 8
## 7 1921 American Pipit Anthus rubes… 0 8
## 8 1921 American Robin Turdus migra… 1 8
## 9 1921 American Three-toed Woodpec… Picoides dor… 0 8
## 10 1921 American Tree Sparrow Spizelloides… 12 8
## # ℹ 18,696 more rows
## # ℹ 1 more variable: how_many_counted_by_hour <chr>
Apply the following dplyr verbs to your data
Filter rows
filter(birds, species == "American Crow")
## # A tibble: 94 × 6
## year species species_latin how_many_counted total_hours
## <dbl> <chr> <chr> <dbl> <chr>
## 1 1921 American Crow Corvus brachyrhynchos 0 8
## 2 1922 American Crow Corvus brachyrhynchos 0 NA
## 3 1924 American Crow Corvus brachyrhynchos 0 NA
## 4 1925 American Crow Corvus brachyrhynchos 0 NA
## 5 1926 American Crow Corvus brachyrhynchos 0 NA
## 6 1928 American Crow Corvus brachyrhynchos 0 NA
## 7 1930 American Crow Corvus brachyrhynchos 1 NA
## 8 1931 American Crow Corvus brachyrhynchos 1 NA
## 9 1932 American Crow Corvus brachyrhynchos 3 NA
## 10 1933 American Crow Corvus brachyrhynchos 5 NA
## # ℹ 84 more rows
## # ℹ 1 more variable: how_many_counted_by_hour <chr>
Arrange rows
arrange(birds, year)
## # A tibble: 18,706 × 6
## year species species_latin how_many_counted total_hours
## <dbl> <chr> <chr> <dbl> <chr>
## 1 1921 American Bittern Botaurus len… 0 8
## 2 1921 American Black Duck Anas rubripes 0 8
## 3 1921 American Coot Fulica ameri… 0 8
## 4 1921 American Crow Corvus brach… 0 8
## 5 1921 American Goldfinch Spinus trist… 0 8
## 6 1921 American Kestrel Falco sparve… 0 8
## 7 1921 American Pipit Anthus rubes… 0 8
## 8 1921 American Robin Turdus migra… 1 8
## 9 1921 American Three-toed Woodpec… Picoides dor… 0 8
## 10 1921 American Tree Sparrow Spizelloides… 12 8
## # ℹ 18,696 more rows
## # ℹ 1 more variable: how_many_counted_by_hour <chr>
Select columns
select(birds, year, how_many_counted)
## # A tibble: 18,706 × 2
## year how_many_counted
## <dbl> <dbl>
## 1 1921 0
## 2 1921 0
## 3 1921 0
## 4 1921 0
## 5 1921 0
## 6 1921 0
## 7 1921 0
## 8 1921 1
## 9 1921 0
## 10 1921 12
## # ℹ 18,696 more rows
Add columns
filter(birds, species == "American Crow") %>%
select(year, how_many_counted) %>%
mutate(how_many_counted_cummean = cummean(how_many_counted))
## # A tibble: 94 × 3
## year how_many_counted how_many_counted_cummean
## <dbl> <dbl> <dbl>
## 1 1921 0 0
## 2 1922 0 0
## 3 1924 0 0
## 4 1925 0 0
## 5 1926 0 0
## 6 1928 0 0
## 7 1930 1 0.143
## 8 1931 1 0.25
## 9 1932 3 0.556
## 10 1933 5 1
## # ℹ 84 more rows
Summarize by groups
birds %>%
group_by(species) %>%
summarize(avg_count = mean(how_many_counted, na.rm = TRUE))
## # A tibble: 199 × 2
## species avg_count
## <chr> <dbl>
## 1 American Bittern 0.0213
## 2 American Black Duck 261.
## 3 American Coot 25.8
## 4 American Crow 1373.
## 5 American Goldfinch 297.
## 6 American Kestrel 16.2
## 7 American Pipit 0.128
## 8 American Robin 172.
## 9 American Three-toed Woodpecker 0.0213
## 10 American Tree Sparrow 538.
## # ℹ 189 more rows