Import Data

pokemon <- read_excel("data/myData.xlsx")

Introduction

Questions

Variation

Visualizing distributions

ggplot(data = pokemon) +
  geom_bar(mapping = aes(x = egg_group_1)) +
    theme(axis.text.x=element_text(angle=45,hjust=1))

pokemon %>% count(egg_group_1)
## # A tibble: 15 × 2
##    egg_group_1       n
##    <chr>         <int>
##  1 bug              80
##  2 ditto             1
##  3 dragon           14
##  4 fairy            38
##  5 flying           59
##  6 ground          218
##  7 humanshape       40
##  8 indeterminate    64
##  9 mineral          67
## 10 monster          94
## 11 no-eggs         118
## 12 plant            35
## 13 water1           84
## 14 water2           21
## 15 water3           16
ggplot(data = pokemon) +
  geom_histogram(mapping = aes(x = speed))
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

ggplot(data = pokemon, mapping = aes(x = speed, colour = type_1)) +
  geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Typical values

pokemon %>%
    
    filter(speed > 100) %>%
    # plot
    ggplot(aes(x = speed)) +
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Unusual values

pokemon %>%
    
    ggplot(aes(x = defense)) +
    geom_histogram() +
    coord_cartesian(ylim = c(0,130))
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Missing Values

pokemon %>%
    
    mutate(defense = ifelse(defense < 50 | defense > 100, NA, defense)) %>%
    
    # plot
    ggplot(aes(x = speed, y = defense)) +
    geom_point()
## Warning: Removed 340 rows containing missing values or values outside the scale range
## (`geom_point()`).

A categorical and continuous variable

pokemon %>%
    
    ggplot(aes(x = type_1, y = attack)) +
    geom_boxplot() +
    theme(axis.text.x=element_text(angle=45,hjust=1))

Two categorical variables

pokemon %>%
    
    count(egg_group_1, type_1) %>%
    
    ggplot(aes(x = egg_group_1, y = type_1, fill = n)) +
    geom_tile() +
    theme(axis.text.x=element_text(angle=45,hjust=1))

Two continous variables

library(hexbin)
pokemon %>%
    ggplot(aes(x = attack, y = speed)) +
    geom_hex()

pokemon %>%
    filter(speed > 100) %>%
    mutate(attack_bin = cut(attack,
                            breaks = seq(0, max(attack), by = 10))) %>%
    ggplot(aes(x = attack_bin, y = speed)) +
    geom_boxplot(aes(group = attack_bin)) +
    theme(axis.text.x=element_text(angle=45,hjust=1))

Patterns and models

mod <- lm(log(speed) ~ log(defense), data = pokemon)

pokemon2 <- pokemon %>%
    modelr::add_residuals(mod) %>%
    mutate(resid = exp(resid))

pokemon2 %>%
    ggplot(aes(defense, resid)) +
    geom_point()

pokemon2 %>%
    ggplot(aes(type_2, resid)) +
    geom_boxplot() +
     theme(axis.text.x=element_text(angle=45,hjust=1))