getwd()
## [1] "C:/Users/james/OneDrive/Desktop/PSU_DAT3000_IntroToDA/04_module7"
data <- read_excel("../00_data/NationoalParkSpecies1.xlsx")
data
## # A tibble: 1,709 × 28
## ParkCode ParkName CategoryName Order Family TaxonRecordStatus SciName
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 ACAD Acadia National… Mammal Arti… Cervi… Active Alces …
## 2 ACAD Acadia National… Mammal Arti… Cervi… Active Odocoi…
## 3 ACAD Acadia National… Mammal Carn… Canid… Active Canis …
## 4 ACAD Acadia National… Mammal Carn… Canid… Active Canis …
## 5 ACAD Acadia National… Mammal Carn… Canid… Active Vulpes…
## 6 ACAD Acadia National… Mammal Carn… Felid… Active Lynx c…
## 7 ACAD Acadia National… Mammal Carn… Felid… Active Lynx r…
## 8 ACAD Acadia National… Mammal Carn… Mephi… Active Mephit…
## 9 ACAD Acadia National… Mammal Carn… Muste… Active Lutra …
## 10 ACAD Acadia National… Mammal Carn… Muste… Active Martes…
## # ℹ 1,699 more rows
## # ℹ 21 more variables: CommonNames <chr>, Synonyms <lgl>, ParkAccepted <lgl>,
## # Sensitive <lgl>, RecordStatus <chr>, Occurrence <chr>,
## # OccurrenceTags <chr>, Nativeness <chr>, NativenessTags <chr>,
## # Abundance <chr>, NPSTags <chr>, ParkTags <chr>, References <dbl>,
## # Observations <dbl>, Vouchers <dbl>, ExternalLinks <chr>, TEStatus <chr>,
## # StateStatus <chr>, OzoneSensitiveStatus <chr>, GRank <chr>, SRank <chr>
data %>%
ggplot(aes(x = CategoryName)) +
geom_bar()
data %>%
ggplot(mapping = aes(x = References)) +
geom_bar()
data %>%
filter(References > 2) %>%
ggplot(aes(x = Abundance)) +
geom_dotplot()
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
data %>%
ggplot(aes(x = CommonNames, color = Order)) +
geom_bar()
data %>%
# Filter out diamonds > 3 carat
filter(Observations > 1) %>%
# Plot
ggplot(aes(x = Family)) +
geom_bar()
faithful %>%
ggplot(aes(x = eruptions)) +
geom_bar()
data %>%
ggplot(aes(x = CategoryName, y = Observations)) +
geom_point()
data %>%
ggplot(aes(x = Order, y = CommonNames)) +
geom_point() +
coord_cartesian(ylim = c(0,50))
data %>%
#filter(y < 3 | y > 20) %>%
mutate(y = ifelse(Abundance < 3 | Abundance > 20, NA, Abundance)) %>%
# Plot
ggplot(aes(x = Observations, y = Abundance)) +
geom_point()
data %>%
ggplot(aes(x = References, y = Occurrence)) +
geom_boxplot()
data %>%
count(Occurrence, Nativeness) %>%
ggplot(aes(x = Nativeness, y = Occurrence, fill = n)) +
geom_tile()
### Two continous variables
data %>%
count(Observations, Occurrence) %>%
ggplot(aes(x = Observations, y = Occurrence, fill = n)) +
geom_tile()
## Patterns and models
data_clean <- data %>%
filter(!is.na(References), !is.na(Observations)) %>%
filter(References > 0, Observations > 0)
mod <- lm(log(References) ~ log(Observations), data = data_clean)
data4 <- data_clean %>%
modelr::add_residuals(mod) %>%
mutate(resid = exp(resid))
data4 %>%
ggplot(aes(References, resid)) +
geom_point()
data4 %>%
ggplot(aes(Observations, resid)) +
geom_boxplot()
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?