##Import data

mydata <- read_excel("../00_data/mydata.xlsx")

Introduction

Frogs are cool!

Questions

What kind of frogs are in Australia and in what state or province are they in?

Variation

Visualizing distributions

mydata %>%
    ggplot(aes(x = stateProvince)) +
    geom_bar()

mydata %>%
    ggplot(aes(x = coordinateUncertaintyInMeters)) +
    geom_histogram(binwidth = 500)

mydata %>%
    filter(coordinateUncertaintyInMeters < 1000) %>%
    ggplot(aes(x = coordinateUncertaintyInMeters)) +
    geom_histogram(binwidth = 100)

mydata %>%
    filter(coordinateUncertaintyInMeters < 1000) %>%
    ggplot(aes(x = coordinateUncertaintyInMeters, color = stateProvince)) +
    geom_freqpoly(binwidth = 100)

Typical values

mydata %>%
    filter(coordinateUncertaintyInMeters < 500) %>%
    ggplot(aes(x = coordinateUncertaintyInMeters)) +
    geom_histogram(binwidth = 10)

Unusual values

mydata %>%
    ggplot(aes(coordinateUncertaintyInMeters)) +
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

mydata %>%
    ggplot(aes(coordinateUncertaintyInMeters)) +
    geom_histogram() +
    coord_cartesian(ylim = c(0, 50))
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Missing Values

mydata %>%
    mutate(uncertainty_clean = ifelse(coordinateUncertaintyInMeters > 5000, NA, coordinateUncertaintyInMeters)) %>%
    ggplot(aes(x = uncertainty_clean, y = decimalLatitude)) +
    geom_point()
## Warning: Removed 3179 rows containing missing values or values outside the scale range
## (`geom_point()`).

Covariation

A categorical and continuous variable

mydata %>%
    filter(coordinateUncertaintyInMeters < 2000) %>%
    ggplot(aes(x = stateProvince, y = coordinateUncertaintyInMeters)) +
    geom_boxplot() +
    coord_flip()

Two categorical variables

top_species <- mydata %>%
    count(scientificName) %>%
    slice_max(n, n = 10) %>%
    pull(scientificName)

mydata %>%
    filter(scientificName %in% top_species) %>%
    count(stateProvince, scientificName) %>%
    ggplot(aes(x = stateProvince, y = scientificName, fill = n)) +
    geom_tile() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

Two continous variables

mydata %>%
    ggplot(aes(x = decimalLongitude, y = decimalLatitude)) +
    geom_hex()

mydata %>%
    filter(coordinateUncertaintyInMeters < 1000) %>%
    ggplot(aes(x = coordinateUncertaintyInMeters, y = decimalLatitude)) +
    geom_boxplot(aes(group = cut_width(coordinateUncertaintyInMeters, 100)))
## Warning: Orientation is not uniquely specified when both the x and y aesthetics are
## continuous. Picking default orientation 'x'.

Patterns and models

library(modelr)

mod <- lm(decimalLatitude ~ log(coordinateUncertaintyInMeters + 1), data = mydata)

mydata_res <- mydata %>%
    add_residuals(mod)

mydata_res %>%
    ggplot(aes(coordinateUncertaintyInMeters, resid)) +
    geom_point() +
    xlim(0, 2000)
## Warning: Removed 3507 rows containing missing values or values outside the scale range
## (`geom_point()`).

mydata_res %>%
    ggplot(aes(stateProvince, resid)) +
    geom_boxplot() +
    coord_flip()