import data
data <- read.csv("../00_data/myData.csv")
Introduction
Questions
Variation
Visualizing distributions
ggplot(data = data) +
geom_bar(mapping = aes(x = type))

Typical values
ggplot(data = data, mapping = aes(x = spotify_popularity)) +
geom_histogram(binwidth = 0.01)
## Warning: Removed 37 rows containing non-finite outside the scale range
## (`stat_bin()`).

Unusual values
ggplot(data) +
geom_histogram(mapping = aes(x = rank_2003), binwidth = 0.5)
## Warning: Removed 191 rows containing non-finite outside the scale range
## (`stat_bin()`).

Missing Values
data <- data %>%
mutate(y = ifelse(rank_2012< 3 | rank_2020 > 20, NA, rank_2003))
Covariation
A categorical and continuous variable
data %>%
ggplot(aes(x = genre, y = rank_2003)) +
geom_boxplot()
## Warning: Removed 191 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Two categorical variables
data %>%
count(genre, release_year) %>%
ggplot(aes(x = genre, y = release_year, fill = n)) +
geom_tile()

Two continous variables
library(hexbin)
data %>%
ggplot(aes(x = type, y = weeks_on_billboard)) +
geom_hex()
## Warning: Removed 119 rows containing non-finite outside the scale range
## (`stat_binhex()`).

Patterns and models
data %>%
ggplot(aes(weeks_on_billboard, type)) +
geom_boxplot()
## Warning: Removed 119 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
