Import Data
data <- read_csv("data/myData.csv")
## Rows: 1024 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): name, generation, type, abilities
## dbl (9): id, height, weight, hp, attack, defense, special-attack, special-de...
## lgl (3): is_baby, is_legendary, is_mythical
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Introduction
Questions
Variation
Visualizing distributions
data %>%
ggplot(aes(x = height, fill = generation))+
geom_bar()

Typical values
data %>%
ggplot(mapping = aes(x = attack))+
geom_histogram(binwidth = .01)

Unusual values
data %>%
ggplot(aes(speed)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Missing Values
data %>%
# filter(y < 3 | y > 20) %>%
mutate(y = ifelse(defense < 5 | defense > 20, NA, defense)) %>%
ggplot(aes(x = attack, y = defense))+
geom_point()

Covariation
A categorical and continuous variable
data %>%
ggplot(aes(x = generation, y = speed))+
geom_boxplot()+
coord_flip()

Two categorical variables
data %>%
count(generation, type) %>%
ggplot(aes(x = generation, y = type, fill = n))+
geom_tile()

Two continous variables
library(hexbin)
data %>%
ggplot(aes(x = `special-attack`, y = `special-defense`))+
geom_hex()

Patterns and models
library(modelr)
mod <- lm(log(hp) ~ log(speed), data = data)
data4 <- data %>%
modelr::add_residuals(mod) %>%
mutate(resid = exp(resid))
data4 %>%
ggplot(aes(hp, resid))+
geom_point()
