The data consist of 39 movies from Marvel and DC including informations about IMDB rate, Metascore, Minutes, release start, Budget, and profit.
db <- read.csv("db.csv")
#structur data checking
str(db)
## 'data.frame': 39 obs. of 11 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Original.Title : Factor w/ 39 levels "Ant-Man","Ant-Man and the Wasp",..: 19 34 20 35 11 31 21 36 12 17 ...
## $ Company : Factor w/ 2 levels "DC","Marvel": 2 2 2 2 2 2 2 2 2 2 ...
## $ Rate : num 7.9 6.7 7 7 6.9 8 7.2 6.9 7.7 8 ...
## $ Metascore : int 79 61 57 57 66 69 62 54 70 76 ...
## $ Minutes : num 126 112 124 115 124 143 130 112 136 121 ...
## $ Release : int 2008 2008 2010 2011 2011 2012 2013 2013 2014 2014 ...
## $ Budget : num 140000000 150000000 200000000 150000000 140000000 220000000 200000000 170000000 170000000 170000000 ...
## $ Opening.Weekend.USA: int 98618668 55414050 128122480 65723338 65058524 207438708 174144585 85737841 95023721 94320883 ...
## $ Gross.USA : int 318604126 134806913 312433331 181030624 176654505 623357910 409013994 206362140 259766572 333176600 ...
## $ Gross.Worldwide : num 585366247 263427551 623933331 449326618 370569774 ...
#check if there is missing observation
colSums(is.na(db))
## X Original.Title Company Rate
## 0 0 0 0
## Metascore Minutes Release Budget
## 0 0 0 0
## Opening.Weekend.USA Gross.USA Gross.Worldwide
## 0 0 0
There is no missing value variable.
head(db)
library(dplyr)
db <- db %>%
select(-X)
str(db)
## 'data.frame': 39 obs. of 10 variables:
## $ Original.Title : Factor w/ 39 levels "Ant-Man","Ant-Man and the Wasp",..: 19 34 20 35 11 31 21 36 12 17 ...
## $ Company : Factor w/ 2 levels "DC","Marvel": 2 2 2 2 2 2 2 2 2 2 ...
## $ Rate : num 7.9 6.7 7 7 6.9 8 7.2 6.9 7.7 8 ...
## $ Metascore : int 79 61 57 57 66 69 62 54 70 76 ...
## $ Minutes : num 126 112 124 115 124 143 130 112 136 121 ...
## $ Release : int 2008 2008 2010 2011 2011 2012 2013 2013 2014 2014 ...
## $ Budget : num 140000000 150000000 200000000 150000000 140000000 220000000 200000000 170000000 170000000 170000000 ...
## $ Opening.Weekend.USA: int 98618668 55414050 128122480 65723338 65058524 207438708 174144585 85737841 95023721 94320883 ...
## $ Gross.USA : int 318604126 134806913 312433331 181030624 176654505 623357910 409013994 206362140 259766572 333176600 ...
## $ Gross.Worldwide : num 585366247 263427551 623933331 449326618 370569774 ...
levels(db$Original.Title)
## [1] "Ant-Man" "Ant-Man and the Wasp"
## [3] "Aquaman" "Avengers: Age of Ultron "
## [5] "Avengers: Endgame" "Avengers: Infinity War"
## [7] "Batman Begins" "Batman v Superman: Dawn of Justice"
## [9] "Black Panther" "Captain America: Civil War"
## [11] "Captain America: The First Avenger" "Captain America: The Winter Soldier"
## [13] "Captain Marve" "Catwoman"
## [15] "Doctor Strange" "Green Lantern"
## [17] "Guardians of the Galaxy" "Guardians of the Galaxy Vol. 2"
## [19] "Iron Man" "Iron Man 2"
## [21] "Iron Man Three" "Joker"
## [23] "Jonah Hex " "Justice League"
## [25] "Man of Steel" "Shazam!"
## [27] "Spider-Man: Far from Home" "Spider-Man: Homecoming"
## [29] "Suicide Squad " "Superman Returns"
## [31] "The Avengers" "The Dark Knight"
## [33] "The Dark Knight Rises" "The Incredible Hulk"
## [35] "Thor" "Thor: The Dark World"
## [37] "Thor:Ragnarok" "Watchmen"
## [39] "Wonder Woman"
library(tidyverse)
library(glue)
db1 <- db %>%
group_by(Company) %>%
summarise(totalinc = sum(Gross.Worldwide)) %>%
ungroup() %>%
mutate(text= glue(
"Number of Total Income = {totalinc}"
))
library(ggplot2)
library(GGally)
library(ggthemes)
library(ggpubr)
library(plotly)
plot1 <- ggplot(data = db1, aes(x = Company, y= totalinc, text=text))+
theme()+
geom_col(aes(fill = Company), position = "dodge")+
coord_flip()+
labs(title="Total Income of Marvel vs DC", x="Company", y="Total Income Gross (USD)", caption="Made by Meinari Claudia")
ggplotly(plot1, tooltip = "text")
# Rank Movie Based on Top 10 Rating
db2 <- db %>%
arrange(desc(Rate)) %>%
head(10) %>%
mutate(text= glue(
"Rating = {Rate},
Total Income = {Gross.Worldwide}"
))
plot2 <- ggplot(data = db2, aes(x = reorder(Original.Title, Rate),
y = Rate, text=text)) +
geom_col(aes(fill = Company), show.legend = T) +
labs(title = "Top 10 Movies of Marvel and DC based on IMDB rating",
subtitle = "Marvel VS DC",
y = NULL,
x = "Movie Title")+
coord_flip()
ggplotly(plot2, tooltip = "text")
# Rank Movie Based on Income
db3 <- db %>%
mutate(profit = Gross.Worldwide-Budget) %>%
arrange(desc(profit)) %>%
head(10) %>%
mutate(text= glue(
"Rating = {Rate},
Total Income = {profit}"
))
plot3 <- ggplot(data = db3, aes(x = reorder(Original.Title, profit),
y = profit, text=text)) +
geom_col(aes(fill = Company), show.legend = T) +
labs(title = "Top 10 Movies of Marvel and DC based on Income",
subtitle = "Marvel VS DC",
y = NULL,
x = "Movie Title")+
coord_flip()
ggplotly(plot3, tooltip = "text")