knitr::opts_chunk$set(echo = T,
warning = F,
message = F,
fig.align = "center")
## Load the required package: tidyverse
library(tidyverse)
## Reading in the Dr Who data from github
drwho <-
read.csv("https://raw.githubusercontent.com/Shammalamala/DS-2870-Data-Sets/main/drwho.csv") |>
# Keeping only the unique episodes (some episodes are listed twice)
distinct(title, season, episode, day, month, year, viewers, .keep_all = T) |>
# Putting the order of the dr actors by the order they played the doctor and adding a line break
mutate(
dr_actor = str_replace(dr_actor, " ", "\n") |> fct_reorder(dr_number),
type = str_replace(type, "episode", "regular")
) |>
# Removing the row with NA for the doctor
filter(!is.na(dr_actor))
The drwho data set has the information on 175 episodes of the revival of the television show “Dr. Who”. There are 14 variables (columns), but this homework assignment will focus on the following columns:
Create a set of boxplots comparing the rating of the episode by the actor.
It should match the completed graph in Brightspace attached to the assignment.
The color of the box should be “#003b6f” and the color of the lines and points of the box plots should be “orange2”
# Creating a plot of episode rating by actor
ggplot(
data = drwho,
mapping = aes(
x = rating,
y = dr_actor
)
) +
# Adding the boxplots
geom_boxplot(
fill = "#003b6f",
color = "orange2"
) +
# Changing the theme
theme_bw() +
# Changing the labels and adding a title
labs(
y = NULL,
x = "Episode Rating",
title = "Dr. Who Episode Rating by Actor",
caption = "Data: datardis package in R"
) +
# Centering the title
theme(
plot.title = element_text(hjust = 0.5,
size = 14),
#axis.text.y = element_text(hjust = 0.5)
)
Next, you’ll be creating a scatterplot to compare viewers, rating, actor, and type (regular vs special)
Create a scatterplot with an added trend line that has the following aesthetics mapped to the corresponding columns:
x =
viewers*1000000y =
ratingcolor =
actorshape =
episode typeSet the size
of the points to 1.5. Save the graph as
gg_drwho_2a (make sure it still appears below the code
chunk).
gg_drwho_2a <-
ggplot(
data = drwho,
mapping = aes(
x = viewers*1000000,
y = rating
)
) +
# Adding a single trend line
geom_smooth(
se = F,
formula = y~x,
color = "black",
method = "loess"
) +
# Adding the points
geom_point(
mapping = aes(
color = dr_actor,
shape = type
),
size = 1.5
)
# Displaying the graph
gg_drwho_2a
Using the gg_drwho_2a graph, make and save the changes so the plot appears like what is in Brightspace.
The title of the plot should be bolded, centered, and colored with the same color as the box plots in question 1.
Save it as gg_drwho_2b and make sure it is displayed beneath the code chunk
gg_drwho_2b <-
gg_drwho_2a +
# Changing the labels and adding a title
labs(
title = "Dr. Who Revival Episodes",
color = "Actor",
shape = "Episode Type",
x = "UK Viewers",
y = "Episode Rating"
) +
# Changing the default theme
theme_bw() +
# Centering the title and moving the legend
theme(
plot.title = element_text(hjust = 0.5, color = "#003b6f", face = "bold"),
legend.position = "bottom"
) +
# include this after moving the legend to the bottom of the panel
guides(
color = guide_legend(title.position = "top", title.hjust = 0.5),
shape = guide_legend(title.position = "top", title.hjust = 0.5)
)
gg_drwho_2b
Use gg_drwho_2b and the correct scale
functions
along with their respective arguments to make the changes seen in
Brightspace
The hexcodes for the colors used are: “#1a2530”, “#a1856a”, “#b44a48”, “#e29d42”, “#2bafc8”
Disclaimer: The actors names aren’t coded as “First Last” but as “First” in order to have the first and last names appear on two separate lines. I.e David Tennant is coded as “David”
The number for the shapes used are 19 and 8, respectively
Save the results as gg_drwho_2c and display it beneath the code chunk
gg_drwho_2c <-
gg_drwho_2b +
# Changing the x-axis to use commas and not scientific notation
scale_x_continuous(
labels = scales::label_comma(),
breaks = c(4, 8, 12)*1000000
) +
# Changing the colors of used for each actor
scale_color_manual(
values = c("#1a2530", "#a1856a", "#b44a48", "#e29d42", "#2bafc8")
) +
# Changing the shapes and labels for episode type
scale_shape_manual(
values = c(19, 8),
labels = c("Regular", "Special")
)
gg_drwho_2c
Using gg_drwho_2c, create a separate graph for each actor.
Hide the guide for color
but not shape
using the guides()
function (like in part 2b).
Move the legend into the blank space in the bottom right corner of the plot. To move the legend inside the graph, you give the correct argument a it a pair of (x,y) coordinates that range from 0 - 1 for both x (left - right) and also y (bottom - top)
See how the graph should appear in Brightspace
gg_drwho_2c +
facet_wrap(
facets = vars(dr_actor)
) +
# Removing the guide for color
guides(color = "none") +
# Moving the legend inside the plot
theme(legend.position = c(0.85, 0.25))