For this week’s Google Time, I read about the plotly graphs in R and decided to make one myself. I started by creating the example plot of diamonds from plotly.com. Then, I browsed the website informationisbeautiful.net and saw their visualization of movie successes in 2023. I downloaded the dataset they used and named it “Blockbuster”. However, I didn’t like the way that some of the variables were named so I made a copy of the sheet and adjusted the data from there (instead of cleaning through R). With some trial and error, I was able to make a graph similar to the diamond plot example, without the individual data points because I like the look of trends alone better. Plus, if you hover over the plotly, you’ll see those data points; the data is still accessible without being too busy.
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:mosaic':
##
## do
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
set.seed(100)
d <- diamonds[sample(nrow(diamonds), 1000), ]
p <- ggplot(data = d, aes(x = carat, y = price)) +
geom_point(aes(text = paste("Clarity:", clarity)), size = 4) +
geom_smooth(aes(colour = cut, fill = cut)) + facet_wrap(~ cut)
## Warning: Ignoring unknown aesthetics: text
ggplotly(p)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
library(readr)
Blockbuster <- read_csv("Data Visualization/The Hollywood In$ider - all data - 2023 for VizSweet.csv")
## New names:
## Rows: 97 Columns: 37
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): Film, budget recovered, force label, Primary Genre, Genres, exclud... dbl
## (11): Year, Domestic gross ($m), Worldwide Gross ($m), Rotten Tomatoes ... lgl
## (19): ...17, ...18, ...19, ...20, ...21, ...22, ...23, ...24, ...25, ......
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...27`
## • `` -> `...28`
## • `` -> `...29`
## • `` -> `...30`
## • `` -> `...31`
## • `` -> `...32`
## • `` -> `...33`
## • `` -> `...34`
## • `` -> `...35`
head(Blockbuster)
## # A tibble: 6 × 37
## Film Year `Domestic gross ($m)` `Worldwide Gross ($m)`
## <chr> <dbl> <dbl> <dbl>
## 1 Avatar 2 2023 684 2320
## 2 Barbie 2023 635. 1438.
## 3 Super Mario Bros 2023 574 1358.
## 4 Across the Spider-Verse 2023 381. 687.
## 5 Guardians of the Galaxy 3 2023 359. 845.
## 6 Oppenheimer 2023 324. 938.
## # ℹ 33 more variables: `Rotten Tomatoes critics` <dbl>,
## # `Rotten Tomatoes Audience` <dbl>, `Budget ($m)` <dbl>,
## # `budget recovered` <chr>, `times budget recovered` <dbl>,
## # `force label` <chr>, `critics vs audience divergence` <dbl>,
## # `Primary Genre` <chr>, Genres <chr>, `IMDB Rating` <dbl>,
## # `exclude label` <chr>, `RT vs IMDB disparity` <dbl>, ...17 <lgl>,
## # ...18 <lgl>, ...19 <lgl>, ...20 <lgl>, ...21 <lgl>, ...22 <lgl>, …
library(readr)
Blockbuster2 <- read_csv("Data Visualization/Blockbuster2 - 2023 for VizSweet.csv")
## New names:
## Rows: 97 Columns: 37
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): Film, budget recovered, force label, PrimaryGenre, Genres, exclude... dbl
## (11): Year, Domestic gross ($m), Worldwide Gross ($m), RottenTomatoesCri... lgl
## (19): ...17, ...18, ...19, ...20, ...21, ...22, ...23, ...24, ...25, ......
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...27`
## • `` -> `...28`
## • `` -> `...29`
## • `` -> `...30`
## • `` -> `...31`
## • `` -> `...32`
## • `` -> `...33`
## • `` -> `...34`
## • `` -> `...35`
head(Blockbuster2)
## # A tibble: 6 × 37
## Film Year `Domestic gross ($m)` `Worldwide Gross ($m)` RottenTomatoesCritics
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Avat… 2023 684 2320 76
## 2 Barb… 2023 635. 1438. 88
## 3 Supe… 2023 574 1358. 59
## 4 Acro… 2023 381. 687. 95
## 5 Guar… 2023 359. 845. 81
## 6 Oppe… 2023 324. 938. 93
## # ℹ 32 more variables: RottenTomatoesAudience <dbl>, `Budget ($m)` <dbl>,
## # `budget recovered` <chr>, `times budget recovered` <dbl>,
## # `force label` <chr>, `critics vs audience divergence` <dbl>,
## # PrimaryGenre <chr>, Genres <chr>, `IMDB Rating` <dbl>,
## # `exclude label` <chr>, `RT vs IMDB disparity` <dbl>, ...17 <lgl>,
## # ...18 <lgl>, ...19 <lgl>, ...20 <lgl>, ...21 <lgl>, ...22 <lgl>,
## # ...23 <lgl>, ...24 <lgl>, ...25 <lgl>, ...26 <lgl>, ...27 <lgl>, …
library(plotly)
BlockbusterModel <- ggplot(data = Blockbuster2, aes(x = RottenTomatoesCritics, y = RottenTomatoesAudience)) +
geom_smooth(aes(colour = PrimaryGenre, fill = PrimaryGenre)) + facet_wrap(~ PrimaryGenre)
ggplotly(BlockbusterModel)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 34.84
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 30.16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.6656
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small. fewer
## data values than degrees of freedom.
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 34.84
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 30.16
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 4.6656