My Project

For this week’s Google Time, I read about the plotly graphs in R and decided to make one myself. I started by creating the example plot of diamonds from plotly.com. Then, I browsed the website informationisbeautiful.net and saw their visualization of movie successes in 2023. I downloaded the dataset they used and named it “Blockbuster”. However, I didn’t like the way that some of the variables were named so I made a copy of the sheet and adjusted the data from there (instead of cleaning through R). With some trial and error, I was able to make a graph similar to the diamond plot example, without the individual data points because I like the look of trends alone better. Plus, if you hover over the plotly, you’ll see those data points; the data is still accessible without being too busy.

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:mosaic':
## 
##     do
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
set.seed(100)
d <- diamonds[sample(nrow(diamonds), 1000), ]

p <- ggplot(data = d, aes(x = carat, y = price)) +
  geom_point(aes(text = paste("Clarity:", clarity)), size = 4) +
  geom_smooth(aes(colour = cut, fill = cut)) + facet_wrap(~ cut)
## Warning: Ignoring unknown aesthetics: text
ggplotly(p)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
library(readr)
Blockbuster <- read_csv("Data Visualization/The Hollywood In$ider - all data - 2023 for VizSweet.csv")
## New names:
## Rows: 97 Columns: 37
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): Film, budget recovered, force label, Primary Genre, Genres, exclud... dbl
## (11): Year, Domestic gross ($m), Worldwide Gross ($m), Rotten Tomatoes ... lgl
## (19): ...17, ...18, ...19, ...20, ...21, ...22, ...23, ...24, ...25, ......
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...27`
## • `` -> `...28`
## • `` -> `...29`
## • `` -> `...30`
## • `` -> `...31`
## • `` -> `...32`
## • `` -> `...33`
## • `` -> `...34`
## • `` -> `...35`
head(Blockbuster)
## # A tibble: 6 × 37
##   Film                       Year `Domestic gross ($m)` `Worldwide Gross ($m)`
##   <chr>                     <dbl>                 <dbl>                  <dbl>
## 1 Avatar 2                   2023                  684                   2320 
## 2 Barbie                     2023                  635.                  1438.
## 3 Super Mario Bros           2023                  574                   1358.
## 4 Across the Spider-Verse    2023                  381.                   687.
## 5 Guardians of the Galaxy 3  2023                  359.                   845.
## 6 Oppenheimer                2023                  324.                   938.
## # ℹ 33 more variables: `Rotten Tomatoes  critics` <dbl>,
## #   `Rotten Tomatoes Audience` <dbl>, `Budget ($m)` <dbl>,
## #   `budget recovered` <chr>, `times budget recovered` <dbl>,
## #   `force label` <chr>, `critics vs audience divergence` <dbl>,
## #   `Primary Genre` <chr>, Genres <chr>, `IMDB Rating` <dbl>,
## #   `exclude label` <chr>, `RT vs IMDB disparity` <dbl>, ...17 <lgl>,
## #   ...18 <lgl>, ...19 <lgl>, ...20 <lgl>, ...21 <lgl>, ...22 <lgl>, …
library(readr)
Blockbuster2 <- read_csv("Data Visualization/Blockbuster2 - 2023 for VizSweet.csv")
## New names:
## Rows: 97 Columns: 37
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): Film, budget recovered, force label, PrimaryGenre, Genres, exclude... dbl
## (11): Year, Domestic gross ($m), Worldwide Gross ($m), RottenTomatoesCri... lgl
## (19): ...17, ...18, ...19, ...20, ...21, ...22, ...23, ...24, ...25, ......
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...27`
## • `` -> `...28`
## • `` -> `...29`
## • `` -> `...30`
## • `` -> `...31`
## • `` -> `...32`
## • `` -> `...33`
## • `` -> `...34`
## • `` -> `...35`
head(Blockbuster2)
## # A tibble: 6 × 37
##   Film   Year `Domestic gross ($m)` `Worldwide Gross ($m)` RottenTomatoesCritics
##   <chr> <dbl>                 <dbl>                  <dbl>                 <dbl>
## 1 Avat…  2023                  684                   2320                     76
## 2 Barb…  2023                  635.                  1438.                    88
## 3 Supe…  2023                  574                   1358.                    59
## 4 Acro…  2023                  381.                   687.                    95
## 5 Guar…  2023                  359.                   845.                    81
## 6 Oppe…  2023                  324.                   938.                    93
## # ℹ 32 more variables: RottenTomatoesAudience <dbl>, `Budget ($m)` <dbl>,
## #   `budget recovered` <chr>, `times budget recovered` <dbl>,
## #   `force label` <chr>, `critics vs audience divergence` <dbl>,
## #   PrimaryGenre <chr>, Genres <chr>, `IMDB Rating` <dbl>,
## #   `exclude label` <chr>, `RT vs IMDB disparity` <dbl>, ...17 <lgl>,
## #   ...18 <lgl>, ...19 <lgl>, ...20 <lgl>, ...21 <lgl>, ...22 <lgl>,
## #   ...23 <lgl>, ...24 <lgl>, ...25 <lgl>, ...26 <lgl>, ...27 <lgl>, …
library(plotly)

BlockbusterModel <- ggplot(data = Blockbuster2, aes(x = RottenTomatoesCritics, y = RottenTomatoesAudience)) +
  geom_smooth(aes(colour = PrimaryGenre, fill = PrimaryGenre)) + facet_wrap(~ PrimaryGenre)

ggplotly(BlockbusterModel)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 34.84
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 30.16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.6656
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small. fewer
## data values than degrees of freedom.
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 34.84
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 30.16
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 4.6656