Import Data
pokemon <- read_excel("data/myData.xlsx")
Introduction
Questions
Variation
Visualizing distributions
ggplot(data = pokemon) +
geom_bar(mapping = aes(x = egg_group_1)) +
theme(axis.text.x=element_text(angle=45,hjust=1))

pokemon %>% count(egg_group_1)
## # A tibble: 15 × 2
## egg_group_1 n
## <chr> <int>
## 1 bug 80
## 2 ditto 1
## 3 dragon 14
## 4 fairy 38
## 5 flying 59
## 6 ground 218
## 7 humanshape 40
## 8 indeterminate 64
## 9 mineral 67
## 10 monster 94
## 11 no-eggs 118
## 12 plant 35
## 13 water1 84
## 14 water2 21
## 15 water3 16
ggplot(data = pokemon) +
geom_histogram(mapping = aes(x = speed))
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

ggplot(data = pokemon, mapping = aes(x = speed, colour = type_1)) +
geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Typical values
pokemon %>%
filter(speed > 100) %>%
# plot
ggplot(aes(x = speed)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Unusual values
pokemon %>%
ggplot(aes(x = defense)) +
geom_histogram() +
coord_cartesian(ylim = c(0,130))
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Missing Values
pokemon %>%
mutate(defense = ifelse(defense < 50 | defense > 100, NA, defense)) %>%
# plot
ggplot(aes(x = speed, y = defense)) +
geom_point()
## Warning: Removed 340 rows containing missing values or values outside the scale range
## (`geom_point()`).

A categorical and continuous variable
pokemon %>%
ggplot(aes(x = type_1, y = attack)) +
geom_boxplot() +
theme(axis.text.x=element_text(angle=45,hjust=1))

Two categorical variables
pokemon %>%
count(egg_group_1, type_1) %>%
ggplot(aes(x = egg_group_1, y = type_1, fill = n)) +
geom_tile() +
theme(axis.text.x=element_text(angle=45,hjust=1))

Two continous variables
library(hexbin)
pokemon %>%
ggplot(aes(x = attack, y = speed)) +
geom_hex()

pokemon %>%
filter(speed > 100) %>%
mutate(attack_bin = cut(attack,
breaks = seq(0, max(attack), by = 10))) %>%
ggplot(aes(x = attack_bin, y = speed)) +
geom_boxplot(aes(group = attack_bin)) +
theme(axis.text.x=element_text(angle=45,hjust=1))

Patterns and models
mod <- lm(log(speed) ~ log(defense), data = pokemon)
pokemon2 <- pokemon %>%
modelr::add_residuals(mod) %>%
mutate(resid = exp(resid))
pokemon2 %>%
ggplot(aes(defense, resid)) +
geom_point()

pokemon2 %>%
ggplot(aes(type_2, resid)) +
geom_boxplot() +
theme(axis.text.x=element_text(angle=45,hjust=1))
