Simpson Episodes

Harold Nelson

2025-10-28

Setup

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

Get the Episodes Data

Read from Github

simpsons_episodes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-02-04/simpsons_episodes.csv')
## Rows: 151 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): image_url, production_code, title, video_url
## dbl  (9): id, imdb_rating, imdb_votes, number_in_season, number_in_series, o...
## date (1): original_air_date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Check the Structure

str(simpsons_episodes)
## spc_tbl_ [151 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ id                    : num [1:151] 450 452 455 457 462 464 466 469 472 473 ...
##  $ image_url             : chr [1:151] "http://static-media.fxx.com/img/FX_Networks_-_FXX/180/63/Thursdays_with_Abie.jpg" "http://static-media.fxx.com/img/FX_Networks_-_FXX/603/859/Simpsons_21_11.jpg" "http://static-media.fxx.com/img/FX_Networks_-_FXX/200/83/Postcards_from_the_Wedge.jpg" "http://static-media.fxx.com/img/FX_Networks_-_FXX/607/763/The_Greatest_Story_Ever_D_Ohed.jpg" ...
##  $ imdb_rating           : num [1:151] 6.8 7.1 7.1 5.7 6.9 6.6 6.8 7.2 6.9 7 ...
##  $ imdb_votes            : num [1:151] 481 532 480 675 491 507 506 482 564 528 ...
##  $ number_in_season      : num [1:151] 9 11 14 16 21 23 2 5 8 9 ...
##  $ number_in_series      : num [1:151] 450 452 455 457 462 464 466 469 472 473 ...
##  $ original_air_date     : Date[1:151], format: "2010-01-03" "2010-01-31" ...
##  $ original_air_year     : num [1:151] 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 ...
##  $ production_code       : chr [1:151] "MABF02" "MABF03" "MABF04" "MABF10" ...
##  $ season                : num [1:151] 21 21 21 21 21 21 22 22 22 22 ...
##  $ title                 : chr [1:151] "Thursdays with Abie" "Million Dollar Maybe" "Postcards from the Wedge" "The Greatest Story Ever D'ohed" ...
##  $ us_viewers_in_millions: num [1:151] 8.65 5.11 5.23 5.69 5.66 5.74 8.59 8.97 9.54 7.18 ...
##  $ video_url             : chr [1:151] "http://www.simpsonsworld.com/video/250369603964" "http://www.simpsonsworld.com/video/279804995696" "http://www.simpsonsworld.com/video/250390595937" "http://www.simpsonsworld.com/video/279809091713" ...
##  $ views                 : num [1:151] 36227 40854 41357 44070 40904 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   id = col_double(),
##   ..   image_url = col_character(),
##   ..   imdb_rating = col_double(),
##   ..   imdb_votes = col_double(),
##   ..   number_in_season = col_double(),
##   ..   number_in_series = col_double(),
##   ..   original_air_date = col_date(format = ""),
##   ..   original_air_year = col_double(),
##   ..   production_code = col_character(),
##   ..   season = col_double(),
##   ..   title = col_character(),
##   ..   us_viewers_in_millions = col_double(),
##   ..   video_url = col_character(),
##   ..   views = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

IMDB Rating by year

Do a boxplot.

Solution

simpsons_episodes %>% 
  ggplot(aes(x = factor(original_air_year), y= imdb_rating)) +
  geom_boxplot()
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_boxplot()`).