knitr::opts_chunk$set(echo = TRUE,
fig.align = "center",
fig.height = 6,
fig.width = 8,
message = F,
warning = F)
# Loading the needed packages: ggfittext will help with adding long text to a graph
pacman::p_load(tidyverse, ggfittext)
# Reading in the tv episode data set
tv_shows <- read.csv("TV Show Episodes.csv")
For this R script, we’ll be creating a tile plot (sometimes called a heat map) for episodes of different TV shows.
My girlfriend fiance and I have been
watching Mad Men recently. So we’ll start by using it as our
example.
Let’s create a data set named episodes that just has episodes from Mad Men
episodes <-
tv_shows |>
filter(show == "Mad Men")
Next, create a tile plot that has:
x =
season number (season)y =
episode number (episode_num)fill =
Episode rating (ep_rating)using geom_tile()
. You can add the rating of the episode
by adding geom_text(mapping = aes(label = Ep_rating))
ggplot(
data = episodes,
mapping = aes(x = factor(season), # Using factor to show all season and episode numbers
y = factor(episode_num),
fill = ep_rating)
) +
geom_tile(
color = "white",
linewidth = 2
) +
# Changing the labels and adding a title
labs(
x = "Season",
y = "Episode",
fill = "IMDB \nRating",
title = paste("IMDB Rating for Episodes of", unique(episodes$show))
) +
# Changing the default theme
theme_test() +
# Centering the title and removing the graph (panel) border
theme(
plot.title = element_text(hjust = 0.5),
panel.border = element_blank()
) +
# Changing the color gradient scale and setting the limits to 0 - 10
scale_fill_viridis_c(limits = c(0, 10)) +
# Removing the padded space on all sides
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(
expand = c(0,0),
limits = rev
) +
# Adding the episode rating to each tile
geom_fit_text(
mapping = aes(label = episode),
color = "black",
fontface = "bold",
reflow = T,
contrast = T
)
Let’s create a tile plot for the average episode rating per season for 5 different shows:
Start by calculating the average IMDB rating for each season. For example, Breaking Bad season 1 had an average of
# A vector of the shows we want to compare
shows <- c("Mad Men", "Breaking Bad", "Dexter", "The Sopranos", "Game of Thrones")
# Creating a data set named dramas from the tv_shows data set
dramas <-
tv_shows |>
# Use the correct dplyr verb to only keep the episodes of the shows above
filter(show %in% shows) |>
# Calculate the average season rating for each tv show
group_by(show, season) |>
summarize(season_rating = mean(ep_rating)) |>
ungroup()
dramas
## # A tibble: 34 × 3
## show season season_rating
## <chr> <int> <dbl>
## 1 Breaking Bad 1 8.7
## 2 Breaking Bad 2 8.79
## 3 Breaking Bad 3 8.74
## 4 Breaking Bad 4 8.96
## 5 Breaking Bad 5 9.38
## 6 Dexter 1 8.74
## 7 Dexter 2 8.78
## 8 Dexter 3 8.44
## 9 Dexter 4 8.89
## 10 Dexter 5 8.57
## # ℹ 24 more rows
Use the data set created above to create a tile plot with:
x =
Seasony =
TV Showfill =
average season ratingand include the average season rating in each tile, rounded to 1 decimal place
ggplot(
data = dramas,
mapping = aes(x = factor(season), # Using factor to show all season and episode numbers
y = show,
fill = season_rating)
) +
geom_tile(
color = "white",
linewidth = 1
) +
# Changing the labels and adding a title
labs(
x = "Season",
y = NULL,
fill = "IMDB \nRating",
title = "Average IMDB Rating for each season"
) +
# Changing the default theme
theme_test() +
# Centering the title and removing the graph (panel) border
theme(
plot.title = element_text(hjust = 0.5),
panel.border = element_blank()
) +
# Changing the color gradient scale and setting the limits to 0 - 10
scale_fill_viridis_c(limits = c(0, 10)) +
# Removing the padded space on all sides
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
# Adding the episode rating to each tile using just geom_text()
geom_text(
mapping = aes(label = round(season_rating, digits = 1)),
color = "black",
fontface = "bold"
)