1 Introduction

The data consist of 39 movies from Marvel and DC including informations about IMDB rate, Metascore, Minutes, release start, Budget, and profit.

2 Load Data

db <- read.csv("db.csv")

3 Exploratory Data Analysis

#structur data checking
str(db)
## 'data.frame':    39 obs. of  11 variables:
##  $ X                  : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Original.Title     : Factor w/ 39 levels "Ant-Man","Ant-Man and the Wasp",..: 19 34 20 35 11 31 21 36 12 17 ...
##  $ Company            : Factor w/ 2 levels "DC","Marvel": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Rate               : num  7.9 6.7 7 7 6.9 8 7.2 6.9 7.7 8 ...
##  $ Metascore          : int  79 61 57 57 66 69 62 54 70 76 ...
##  $ Minutes            : num  126 112 124 115 124 143 130 112 136 121 ...
##  $ Release            : int  2008 2008 2010 2011 2011 2012 2013 2013 2014 2014 ...
##  $ Budget             : num  140000000 150000000 200000000 150000000 140000000 220000000 200000000 170000000 170000000 170000000 ...
##  $ Opening.Weekend.USA: int  98618668 55414050 128122480 65723338 65058524 207438708 174144585 85737841 95023721 94320883 ...
##  $ Gross.USA          : int  318604126 134806913 312433331 181030624 176654505 623357910 409013994 206362140 259766572 333176600 ...
##  $ Gross.Worldwide    : num  585366247 263427551 623933331 449326618 370569774 ...
#check if there is missing observation
colSums(is.na(db))
##                   X      Original.Title             Company                Rate 
##                   0                   0                   0                   0 
##           Metascore             Minutes             Release              Budget 
##                   0                   0                   0                   0 
## Opening.Weekend.USA           Gross.USA     Gross.Worldwide 
##                   0                   0                   0

There is no missing value variable.

head(db)
library(dplyr)

db <- db %>% 
  select(-X)

str(db)
## 'data.frame':    39 obs. of  10 variables:
##  $ Original.Title     : Factor w/ 39 levels "Ant-Man","Ant-Man and the Wasp",..: 19 34 20 35 11 31 21 36 12 17 ...
##  $ Company            : Factor w/ 2 levels "DC","Marvel": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Rate               : num  7.9 6.7 7 7 6.9 8 7.2 6.9 7.7 8 ...
##  $ Metascore          : int  79 61 57 57 66 69 62 54 70 76 ...
##  $ Minutes            : num  126 112 124 115 124 143 130 112 136 121 ...
##  $ Release            : int  2008 2008 2010 2011 2011 2012 2013 2013 2014 2014 ...
##  $ Budget             : num  140000000 150000000 200000000 150000000 140000000 220000000 200000000 170000000 170000000 170000000 ...
##  $ Opening.Weekend.USA: int  98618668 55414050 128122480 65723338 65058524 207438708 174144585 85737841 95023721 94320883 ...
##  $ Gross.USA          : int  318604126 134806913 312433331 181030624 176654505 623357910 409013994 206362140 259766572 333176600 ...
##  $ Gross.Worldwide    : num  585366247 263427551 623933331 449326618 370569774 ...
levels(db$Original.Title)
##  [1] "Ant-Man"                             "Ant-Man and the Wasp"               
##  [3] "Aquaman"                             "Avengers: Age of Ultron "           
##  [5] "Avengers: Endgame"                   "Avengers: Infinity War"             
##  [7] "Batman Begins"                       "Batman v Superman: Dawn of Justice" 
##  [9] "Black Panther"                       "Captain America: Civil War"         
## [11] "Captain America: The First Avenger"  "Captain America: The Winter Soldier"
## [13] "Captain Marve"                       "Catwoman"                           
## [15] "Doctor Strange"                      "Green Lantern"                      
## [17] "Guardians of the Galaxy"             "Guardians of the Galaxy Vol. 2"     
## [19] "Iron Man"                            "Iron Man 2"                         
## [21] "Iron Man Three"                      "Joker"                              
## [23] "Jonah Hex "                          "Justice League"                     
## [25] "Man of Steel"                        "Shazam!"                            
## [27] "Spider-Man: Far from Home"           "Spider-Man: Homecoming"             
## [29] "Suicide Squad "                      "Superman Returns"                   
## [31] "The Avengers"                        "The Dark Knight"                    
## [33] "The Dark Knight Rises"               "The Incredible Hulk"                
## [35] "Thor"                                "Thor: The Dark World"               
## [37] "Thor:Ragnarok"                       "Watchmen"                           
## [39] "Wonder Woman"
library(tidyverse)
library(glue)

db1 <- db %>% 
  group_by(Company) %>% 
  summarise(totalinc = sum(Gross.Worldwide)) %>% 
  ungroup() %>% 
  mutate(text= glue(
    "Number of Total Income = {totalinc}"
  ))
library(ggplot2)
library(GGally)
library(ggthemes)
library(ggpubr)
library(plotly)

plot1 <- ggplot(data = db1, aes(x = Company, y= totalinc, text=text))+
  theme()+
  geom_col(aes(fill = Company), position = "dodge")+
  coord_flip()+
  labs(title="Total Income of Marvel vs DC",  x="Company", y="Total Income Gross (USD)", caption="Made by Meinari Claudia")


ggplotly(plot1, tooltip = "text")
# Rank Movie Based on Top 10 Rating
db2 <- db %>% 
  arrange(desc(Rate)) %>% 
  head(10) %>% 
  mutate(text= glue(
    "Rating = {Rate},
    Total Income = {Gross.Worldwide}"
  ))


plot2 <- ggplot(data = db2, aes(x = reorder(Original.Title, Rate), 
                                  y = Rate, text=text)) +
  geom_col(aes(fill = Company), show.legend = T) +
  labs(title = "Top 10 Movies of Marvel and DC based on IMDB rating",
       subtitle = "Marvel VS DC",
       y = NULL,
       x = "Movie Title")+
  coord_flip()

ggplotly(plot2, tooltip = "text")
# Rank Movie Based on Income
db3 <- db %>% 
  mutate(profit = Gross.Worldwide-Budget) %>% 
  arrange(desc(profit)) %>% 
  head(10) %>% 
  mutate(text= glue(
    "Rating = {Rate},
    Total Income = {profit}"
  ))


plot3 <- ggplot(data = db3, aes(x = reorder(Original.Title, profit), 
                                  y = profit, text=text)) +
  geom_col(aes(fill = Company), show.legend = T) +
  labs(title = "Top 10 Movies of Marvel and DC based on Income",
       subtitle = "Marvel VS DC",
       y = NULL,
       x = "Movie Title")+
  coord_flip()

ggplotly(plot3, tooltip = "text")