Import data

# excel file
data <- read_excel("../00_data/NationoalParkSpecies1.xlsx")
data
## # A tibble: 1,709 × 28
##    ParkCode ParkName         CategoryName Order Family TaxonRecordStatus SciName
##    <chr>    <chr>            <chr>        <chr> <chr>  <chr>             <chr>  
##  1 ACAD     Acadia National… Mammal       Arti… Cervi… Active            Alces …
##  2 ACAD     Acadia National… Mammal       Arti… Cervi… Active            Odocoi…
##  3 ACAD     Acadia National… Mammal       Carn… Canid… Active            Canis …
##  4 ACAD     Acadia National… Mammal       Carn… Canid… Active            Canis …
##  5 ACAD     Acadia National… Mammal       Carn… Canid… Active            Vulpes…
##  6 ACAD     Acadia National… Mammal       Carn… Felid… Active            Lynx c…
##  7 ACAD     Acadia National… Mammal       Carn… Felid… Active            Lynx r…
##  8 ACAD     Acadia National… Mammal       Carn… Mephi… Active            Mephit…
##  9 ACAD     Acadia National… Mammal       Carn… Muste… Active            Lutra …
## 10 ACAD     Acadia National… Mammal       Carn… Muste… Active            Martes…
## # ℹ 1,699 more rows
## # ℹ 21 more variables: CommonNames <chr>, Synonyms <lgl>, ParkAccepted <lgl>,
## #   Sensitive <lgl>, RecordStatus <chr>, Occurrence <chr>,
## #   OccurrenceTags <chr>, Nativeness <chr>, NativenessTags <chr>,
## #   Abundance <chr>, NPSTags <chr>, ParkTags <chr>, References <dbl>,
## #   Observations <dbl>, Vouchers <dbl>, ExternalLinks <chr>, TEStatus <chr>,
## #   StateStatus <chr>, OzoneSensitiveStatus <chr>, GRank <chr>, SRank <chr>

Apply the following dplyr verbs to your data

Filter rows

filter(data, CategoryName == "Mammal", Occurrence == "Present")
## # A tibble: 38 × 28
##    ParkCode ParkName         CategoryName Order Family TaxonRecordStatus SciName
##    <chr>    <chr>            <chr>        <chr> <chr>  <chr>             <chr>  
##  1 ACAD     Acadia National… Mammal       Arti… Cervi… Active            Alces …
##  2 ACAD     Acadia National… Mammal       Arti… Cervi… Active            Odocoi…
##  3 ACAD     Acadia National… Mammal       Carn… Canid… Active            Canis …
##  4 ACAD     Acadia National… Mammal       Carn… Canid… Active            Vulpes…
##  5 ACAD     Acadia National… Mammal       Carn… Felid… Active            Lynx r…
##  6 ACAD     Acadia National… Mammal       Carn… Muste… Active            Lutra …
##  7 ACAD     Acadia National… Mammal       Carn… Muste… Active            Martes…
##  8 ACAD     Acadia National… Mammal       Carn… Muste… Active            Mustel…
##  9 ACAD     Acadia National… Mammal       Carn… Muste… Active            Mustel…
## 10 ACAD     Acadia National… Mammal       Carn… Muste… Active            Mustel…
## # ℹ 28 more rows
## # ℹ 21 more variables: CommonNames <chr>, Synonyms <lgl>, ParkAccepted <lgl>,
## #   Sensitive <lgl>, RecordStatus <chr>, Occurrence <chr>,
## #   OccurrenceTags <chr>, Nativeness <chr>, NativenessTags <chr>,
## #   Abundance <chr>, NPSTags <chr>, ParkTags <chr>, References <dbl>,
## #   Observations <dbl>, Vouchers <dbl>, ExternalLinks <chr>, TEStatus <chr>,
## #   StateStatus <chr>, OzoneSensitiveStatus <chr>, GRank <chr>, SRank <chr>

Arrange rows

arrange(data, desc(CommonNames), desc(Observations))
## # A tibble: 1,709 × 28
##    ParkCode ParkName         CategoryName Order Family TaxonRecordStatus SciName
##    <chr>    <chr>            <chr>        <chr> <chr>  <chr>             <chr>  
##  1 ACAD     Acadia National… Vascular Pl… Poly… Dryop… Active            Dryopt…
##  2 ACAD     Acadia National… Vascular Pl… Aste… Aster… Active            Symphy…
##  3 ACAD     Acadia National… Vascular Pl… NA    NA     Active            Salix …
##  4 ACAD     Acadia National… Vascular Pl… Malp… Salic… Active            Salix …
##  5 ACAD     Acadia National… Vascular Pl… Malp… Salic… Active            Salix …
##  6 ACAD     Acadia National… Vascular Pl… Malp… Salic… Active            Salix …
##  7 ACAD     Acadia National… Vascular Pl… Aste… Aster… Active            Symphy…
##  8 ACAD     Acadia National… Vascular Pl… Poal… Cyper… Active            Carex …
##  9 ACAD     Acadia National… Vascular Pl… Poal… Cyper… Active            Scirpu…
## 10 ACAD     Acadia National… Vascular Pl… Poal… Junca… Active            Juncus…
## # ℹ 1,699 more rows
## # ℹ 21 more variables: CommonNames <chr>, Synonyms <lgl>, ParkAccepted <lgl>,
## #   Sensitive <lgl>, RecordStatus <chr>, Occurrence <chr>,
## #   OccurrenceTags <chr>, Nativeness <chr>, NativenessTags <chr>,
## #   Abundance <chr>, NPSTags <chr>, ParkTags <chr>, References <dbl>,
## #   Observations <dbl>, Vouchers <dbl>, ExternalLinks <chr>, TEStatus <chr>,
## #   StateStatus <chr>, OzoneSensitiveStatus <chr>, GRank <chr>, SRank <chr>

Select columns

select(data, CommonNames, CategoryName, Occurrence, Observations) 
## # A tibble: 1,709 × 4
##    CommonNames                              CategoryName Occurrence Observations
##    <chr>                                    <chr>        <chr>             <dbl>
##  1 Moose                                    Mammal       Present               1
##  2 Northern White-tailed Deer, Virginia De… Mammal       Present               0
##  3 Coyote, Eastern Coyote                   Mammal       Present               1
##  4 Eastern Timber Wolf, Gray Wolf, Timber … Mammal       Unconfirm…            0
##  5 Black Fox, Cross Fox, Eastern Red Fox, … Mammal       Present               0
##  6 Canada Lynx                              Mammal       Unconfirm…            0
##  7 Bay Lynx, Bobcat, Red Lynx, Wild Cat     Mammal       Present               0
##  8 Eastern Skunk, Polecat, Striped Skunk    Mammal       Not In Pa…            0
##  9 Otter, River Otter                       Mammal       Present               0
## 10 Blackcat, Fisher, Pekan                  Mammal       Present               0
## # ℹ 1,699 more rows

Add columns

mutate(data,
    gain = SciName, CategoryName)
## # A tibble: 1,709 × 29
##    ParkCode ParkName         CategoryName Order Family TaxonRecordStatus SciName
##    <chr>    <chr>            <chr>        <chr> <chr>  <chr>             <chr>  
##  1 ACAD     Acadia National… Mammal       Arti… Cervi… Active            Alces …
##  2 ACAD     Acadia National… Mammal       Arti… Cervi… Active            Odocoi…
##  3 ACAD     Acadia National… Mammal       Carn… Canid… Active            Canis …
##  4 ACAD     Acadia National… Mammal       Carn… Canid… Active            Canis …
##  5 ACAD     Acadia National… Mammal       Carn… Canid… Active            Vulpes…
##  6 ACAD     Acadia National… Mammal       Carn… Felid… Active            Lynx c…
##  7 ACAD     Acadia National… Mammal       Carn… Felid… Active            Lynx r…
##  8 ACAD     Acadia National… Mammal       Carn… Mephi… Active            Mephit…
##  9 ACAD     Acadia National… Mammal       Carn… Muste… Active            Lutra …
## 10 ACAD     Acadia National… Mammal       Carn… Muste… Active            Martes…
## # ℹ 1,699 more rows
## # ℹ 22 more variables: CommonNames <chr>, Synonyms <lgl>, ParkAccepted <lgl>,
## #   Sensitive <lgl>, RecordStatus <chr>, Occurrence <chr>,
## #   OccurrenceTags <chr>, Nativeness <chr>, NativenessTags <chr>,
## #   Abundance <chr>, NPSTags <chr>, ParkTags <chr>, References <dbl>,
## #   Observations <dbl>, Vouchers <dbl>, ExternalLinks <chr>, TEStatus <chr>,
## #   StateStatus <chr>, OzoneSensitiveStatus <chr>, GRank <chr>, SRank <chr>, …

Summarize by groups

data %>%
    
     # Group by CategoryName
    group_by(CategoryName) %>%
    
    # Calculate average References
    summarise(References = mean(References, na.rm = TRUE)) %>%
    
    # Sort it
    arrange(References)
## # A tibble: 6 × 2
##   CategoryName   References
##   <chr>               <dbl>
## 1 Fish                 2.79
## 2 Vascular Plant       3.79
## 3 Reptile              6.36
## 4 Amphibian            8.4 
## 5 Mammal               8.73
## 6 Bird                11.8