Import Data

data <- read_csv("data/myData.csv")
## Rows: 1024 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): name, generation, type, abilities
## dbl (9): id, height, weight, hp, attack, defense, special-attack, special-de...
## lgl (3): is_baby, is_legendary, is_mythical
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Introduction

Questions

Variation

Visualizing distributions

data %>%
    ggplot(aes(x = height, fill = generation))+
    geom_bar()

Typical values

data %>%
    ggplot(mapping = aes(x = attack))+
    geom_histogram(binwidth = .01)

Unusual values

data %>%
    
    ggplot(aes(speed)) + 
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Missing Values

data %>%
    
    # filter(y < 3 | y > 20) %>%
    
    mutate(y = ifelse(defense < 5 | defense > 20, NA, defense)) %>%
    
    ggplot(aes(x = attack, y = defense))+
    geom_point()

Covariation

A categorical and continuous variable

data %>%
    
    ggplot(aes(x = generation, y = speed))+
    geom_boxplot()+
    coord_flip()

Two categorical variables

data %>%
    
    count(generation, type) %>%
    
    ggplot(aes(x = generation, y = type, fill = n))+
    geom_tile()

Two continous variables

library(hexbin)
data %>%
    ggplot(aes(x = `special-attack`, y = `special-defense`))+
    geom_hex()

Patterns and models

library(modelr)
mod <- lm(log(hp) ~ log(speed), data = data)

data4 <- data %>%
    modelr::add_residuals(mod) %>%
    mutate(resid = exp(resid))

data4 %>%
    ggplot(aes(hp, resid))+
    geom_point()