data <- read_excel("../00_data/NationoalParkSpecies1.xlsx")
data <- data %>%
mutate(CategoryName = as.factor(CategoryName))
data
## # A tibble: 1,709 × 28
## ParkCode ParkName CategoryName Order Family TaxonRecordStatus SciName
## <chr> <chr> <fct> <chr> <chr> <chr> <chr>
## 1 ACAD Acadia National… Mammal Arti… Cervi… Active Alces …
## 2 ACAD Acadia National… Mammal Arti… Cervi… Active Odocoi…
## 3 ACAD Acadia National… Mammal Carn… Canid… Active Canis …
## 4 ACAD Acadia National… Mammal Carn… Canid… Active Canis …
## 5 ACAD Acadia National… Mammal Carn… Canid… Active Vulpes…
## 6 ACAD Acadia National… Mammal Carn… Felid… Active Lynx c…
## 7 ACAD Acadia National… Mammal Carn… Felid… Active Lynx r…
## 8 ACAD Acadia National… Mammal Carn… Mephi… Active Mephit…
## 9 ACAD Acadia National… Mammal Carn… Muste… Active Lutra …
## 10 ACAD Acadia National… Mammal Carn… Muste… Active Martes…
## # ℹ 1,699 more rows
## # ℹ 21 more variables: CommonNames <chr>, Synonyms <lgl>, ParkAccepted <lgl>,
## # Sensitive <lgl>, RecordStatus <chr>, Occurrence <chr>,
## # OccurrenceTags <chr>, Nativeness <chr>, NativenessTags <chr>,
## # Abundance <chr>, NPSTags <chr>, ParkTags <chr>, References <dbl>,
## # Observations <dbl>, Vouchers <dbl>, ExternalLinks <chr>, TEStatus <chr>,
## # StateStatus <chr>, OzoneSensitiveStatus <chr>, GRank <chr>, SRank <chr>
Make two bar charts here - one before ordering another after
# before ordering CategoryName
ggplot(data, aes(x = CategoryName)) +
geom_bar() +
coord_flip() +
labs(title = "Species Count by Category (Unordered)")
# after ordering by count
data <- data %>%
mutate(CategoryName = fct_infreq(CategoryName))
ggplot(data, aes(x = CategoryName)) +
geom_bar() +
coord_flip() +
labs(title = "Species Count by Category (Ordered by Frequency)")
Show examples of three functions:
####fct_recode
data <- data %>%
mutate(CategoryName = fct_recode(CategoryName,
"Birds" = "Bird",
"Mammals" = "Mammal"))
data
## # A tibble: 1,709 × 28
## ParkCode ParkName CategoryName Order Family TaxonRecordStatus SciName
## <chr> <chr> <fct> <chr> <chr> <chr> <chr>
## 1 ACAD Acadia National… Mammals Arti… Cervi… Active Alces …
## 2 ACAD Acadia National… Mammals Arti… Cervi… Active Odocoi…
## 3 ACAD Acadia National… Mammals Carn… Canid… Active Canis …
## 4 ACAD Acadia National… Mammals Carn… Canid… Active Canis …
## 5 ACAD Acadia National… Mammals Carn… Canid… Active Vulpes…
## 6 ACAD Acadia National… Mammals Carn… Felid… Active Lynx c…
## 7 ACAD Acadia National… Mammals Carn… Felid… Active Lynx r…
## 8 ACAD Acadia National… Mammals Carn… Mephi… Active Mephit…
## 9 ACAD Acadia National… Mammals Carn… Muste… Active Lutra …
## 10 ACAD Acadia National… Mammals Carn… Muste… Active Martes…
## # ℹ 1,699 more rows
## # ℹ 21 more variables: CommonNames <chr>, Synonyms <lgl>, ParkAccepted <lgl>,
## # Sensitive <lgl>, RecordStatus <chr>, Occurrence <chr>,
## # OccurrenceTags <chr>, Nativeness <chr>, NativenessTags <chr>,
## # Abundance <chr>, NPSTags <chr>, ParkTags <chr>, References <dbl>,
## # Observations <dbl>, Vouchers <dbl>, ExternalLinks <chr>, TEStatus <chr>,
## # StateStatus <chr>, OzoneSensitiveStatus <chr>, GRank <chr>, SRank <chr>
####fct_collapse
data <- data %>%
mutate(CategoryGroup = fct_collapse(CategoryName,
Vertebrates = c("Mammal", "Bird", "Amphibian", "Reptile", "Fish"),
Invertebrates = c("Insect", "Crustacean", "Arachnid")))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `CategoryGroup = fct_collapse(...)`.
## Caused by warning:
## ! Unknown levels in `f`: Mammal, Bird, Insect, Crustacean, Arachnid
data
## # A tibble: 1,709 × 29
## ParkCode ParkName CategoryName Order Family TaxonRecordStatus SciName
## <chr> <chr> <fct> <chr> <chr> <chr> <chr>
## 1 ACAD Acadia National… Mammals Arti… Cervi… Active Alces …
## 2 ACAD Acadia National… Mammals Arti… Cervi… Active Odocoi…
## 3 ACAD Acadia National… Mammals Carn… Canid… Active Canis …
## 4 ACAD Acadia National… Mammals Carn… Canid… Active Canis …
## 5 ACAD Acadia National… Mammals Carn… Canid… Active Vulpes…
## 6 ACAD Acadia National… Mammals Carn… Felid… Active Lynx c…
## 7 ACAD Acadia National… Mammals Carn… Felid… Active Lynx r…
## 8 ACAD Acadia National… Mammals Carn… Mephi… Active Mephit…
## 9 ACAD Acadia National… Mammals Carn… Muste… Active Lutra …
## 10 ACAD Acadia National… Mammals Carn… Muste… Active Martes…
## # ℹ 1,699 more rows
## # ℹ 22 more variables: CommonNames <chr>, Synonyms <lgl>, ParkAccepted <lgl>,
## # Sensitive <lgl>, RecordStatus <chr>, Occurrence <chr>,
## # OccurrenceTags <chr>, Nativeness <chr>, NativenessTags <chr>,
## # Abundance <chr>, NPSTags <chr>, ParkTags <chr>, References <dbl>,
## # Observations <dbl>, Vouchers <dbl>, ExternalLinks <chr>, TEStatus <chr>,
## # StateStatus <chr>, OzoneSensitiveStatus <chr>, GRank <chr>, SRank <chr>, …
####fct_lump
data <- data %>%
mutate(Family = fct_lump(Family, n = 5))
ggplot(data, aes(x = Family)) +
geom_bar() +
coord_flip() +
labs(title = "Top 5 Most Common Families, All Others Lumped")
No need to do anything here.