Import data
Mydata <- read_csv("../00_data//Mydata.csv")
## New names:
## Rows: 65706 Columns: 8
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): lake, species, comments, region dbl (4): ...1, year, grand_total, values
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
Mydata
## # A tibble: 65,706 × 8
## ...1 year lake species grand_total comments region values
## <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr> <dbl>
## 1 1 1991 Erie American Eel 1 <NA> Michigan (MI) 0
## 2 2 1991 Erie American Eel 1 <NA> New York (NY) 0
## 3 3 1991 Erie American Eel 1 <NA> Ohio (OH) 0
## 4 4 1991 Erie American Eel 1 <NA> Pennsylvania (PA) 0
## 5 5 1991 Erie American Eel 1 <NA> U.S. Total 0
## 6 6 1991 Erie American Eel 1 <NA> Canada (ONT) 1
## 7 7 1992 Erie American Eel 0 <NA> Michigan (MI) 0
## 8 8 1992 Erie American Eel 0 <NA> New York (NY) 0
## 9 9 1992 Erie American Eel 0 <NA> Ohio (OH) 0
## 10 10 1992 Erie American Eel 0 <NA> Pennsylvania (PA) 0
## # ℹ 65,696 more rows
Introduction
Questions
Variation
ggplot(data = Mydata) +
geom_bar(mapping = aes(x = lake))

Visualizing distributions
ggplot(data = Mydata) +
geom_histogram(mapping = aes(x = year))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = Mydata, mapping = aes(x = year, colour = lake)) +
geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Typical values
ggplot(data = Mydata, mapping = aes(x = year)) +
geom_histogram(binwidth = 0.5)

Unusual values
Mydata %>%
ggplot(aes(year)) +
geom_histogram(binwidth = 0.5) +
coord_cartesian(ylim = c(0, 200))

Missing Values
Mydata %>%
# filter(y < 3 | y > 20) %>%
mutate(y = ifelse(year < 1950 | year > 2000, NA, year)) %>%
#Plot
ggplot(aes(x = year, y = grand_total)) +
geom_point()
## Warning: Removed 31767 rows containing missing values or values outside the scale range
## (`geom_point()`).

Covariation
A categorical and continuous variable
Mydata %>%
ggplot(aes(x = lake, y = grand_total)) +
geom_boxplot()
## Warning: Removed 31767 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Two categorical variables
Mydata %>%
count(species, lake) %>%
ggplot(mapping= aes(x = species, y = lake, fill = n)) +
geom_tile()

Two continous variables
ggplot(data = Mydata) +
geom_point(mapping = aes(x = year, y = grand_total), alpha = 1 / 100)
## Warning: Removed 31767 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data = Mydata) +
geom_hex(mapping = aes(x = year, y = grand_total))
## Warning: Removed 31767 rows containing non-finite outside the scale range
## (`stat_binhex()`).

Patterns and models
ggplot(data = Mydata) +
geom_point(mapping = aes(x = year, y = grand_total))
## Warning: Removed 31767 rows containing missing values or values outside the scale range
## (`geom_point()`).
