import data

data <- read.csv("../00_data/myData.csv")

Introduction

Questions

Variation

Visualizing distributions

ggplot(data = data) +
  geom_bar(mapping = aes(x = type))

Typical values

ggplot(data = data, mapping = aes(x = spotify_popularity)) +
  geom_histogram(binwidth = 0.01)
## Warning: Removed 37 rows containing non-finite outside the scale range
## (`stat_bin()`).

Unusual values

ggplot(data) + 
  geom_histogram(mapping = aes(x = rank_2003), binwidth = 0.5)
## Warning: Removed 191 rows containing non-finite outside the scale range
## (`stat_bin()`).

Missing Values

data <- data %>% 
  mutate(y = ifelse(rank_2012< 3 | rank_2020 > 20, NA, rank_2003))

Covariation

A categorical and continuous variable

data %>%
    
    ggplot(aes(x = genre, y = rank_2003)) +
    geom_boxplot()
## Warning: Removed 191 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Two categorical variables

data %>%
    
    count(genre, release_year) %>%
    
    ggplot(aes(x = genre, y = release_year, fill = n)) +
    geom_tile()

Two continous variables

library(hexbin)
data %>%
    ggplot(aes(x = type, y = weeks_on_billboard)) +
    geom_hex()
## Warning: Removed 119 rows containing non-finite outside the scale range
## (`stat_binhex()`).

Patterns and models

data %>%
    ggplot(aes(weeks_on_billboard, type)) +
    geom_boxplot()
## Warning: Removed 119 rows containing non-finite outside the scale range
## (`stat_boxplot()`).