Load the library function
library(dplyr)
library(ggplot2)
a Create 4 factors for Film, Studio, TopGross, OpenQuarter
Film<- c("The Last Jedi", "Beauty and the Beast", "Wonder Woman", "Jumanji", "Guardians of the Galaxy","Spider-Man", "It", "Thor:Ragnarok", "Despicable Me 3", "Justice League", "Logan", "The Fate of the Furious", "Coco", "Dunkirk", "Get Out")
Studio<- c("Disney", "Disney", "Warner", "Sony", "Disney", "Sony", "Warner", "Disney", "Universal", "Warner", "Fox", "Universal", "Disney", "Warner", "Universal")
TopGross<- c(620, 504, 413, 405, 390, 334, 328, 315, 265, 229, 226, 226, 210, 188, 176)
OpenQuarter<- c(4,1,2,4,2,3,3,4,2,4,1,2,4,3,1)
b Create dataframe TopMovies
TopMovies<- data.frame(Film, Studio, TopGross, OpenQuarter, stringsAsFactors = FALSE)
TopMovies
## Film Studio TopGross OpenQuarter
## 1 The Last Jedi Disney 620 4
## 2 Beauty and the Beast Disney 504 1
## 3 Wonder Woman Warner 413 2
## 4 Jumanji Sony 405 4
## 5 Guardians of the Galaxy Disney 390 2
## 6 Spider-Man Sony 334 3
## 7 It Warner 328 3
## 8 Thor:Ragnarok Disney 315 4
## 9 Despicable Me 3 Universal 265 2
## 10 Justice League Warner 229 4
## 11 Logan Fox 226 1
## 12 The Fate of the Furious Universal 226 2
## 13 Coco Disney 210 4
## 14 Dunkirk Warner 188 3
## 15 Get Out Universal 176 1
c Encode Studio as Factor
class(TopMovies)
## [1] "data.frame"
dim(TopMovies)
## [1] 15 4
str(TopMovies)
## 'data.frame': 15 obs. of 4 variables:
## $ Film : chr "The Last Jedi" "Beauty and the Beast" "Wonder Woman" "Jumanji" ...
## $ Studio : chr "Disney" "Disney" "Warner" "Sony" ...
## $ TopGross : num 620 504 413 405 390 334 328 315 265 229 ...
## $ OpenQuarter: num 4 1 2 4 2 3 3 4 2 4 ...
TopMovies$Studio = as.factor(TopMovies$Studio)
str(TopMovies)
## 'data.frame': 15 obs. of 4 variables:
## $ Film : chr "The Last Jedi" "Beauty and the Beast" "Wonder Woman" "Jumanji" ...
## $ Studio : Factor w/ 5 levels "Disney","Fox",..: 1 1 5 3 1 3 5 1 4 5 ...
## $ TopGross : num 620 504 413 405 390 334 328 315 265 229 ...
## $ OpenQuarter: num 4 1 2 4 2 3 3 4 2 4 ...
d Encode 1,2,3,4 in OpenQuarter as factors
TopMovies$OpenQuarter = as.factor(TopMovies$OpenQuarter)
str(TopMovies)
## 'data.frame': 15 obs. of 4 variables:
## $ Film : chr "The Last Jedi" "Beauty and the Beast" "Wonder Woman" "Jumanji" ...
## $ Studio : Factor w/ 5 levels "Disney","Fox",..: 1 1 5 3 1 3 5 1 4 5 ...
## $ TopGross : num 620 504 413 405 390 334 328 315 265 229 ...
## $ OpenQuarter: Factor w/ 4 levels "1","2","3","4": 4 1 2 4 2 3 3 4 2 4 ...
e How many Disney movies in top 15 grossing
TopMovies %>%
filter(Studio == "Disney") %>%
summarise(n())
## n()
## 1 5
f How many Disney movies in top 10 grossing
TopMovies %>%
top_n(10, TopGross) %>%
filter(Studio == "Disney") %>%
summarise(n())
## n()
## 1 4
g How much Disney earned from top 15 grossing movies
TopMovies %>%
filter(Studio == "Disney") %>%
select("TopGross") %>%
colSums()
## TopGross
## 2039
h How much Disney earned from top 10 grossing movies
TopMovies %>%
top_n(10, TopGross) %>%
filter(Studio == "Disney") %>%
select("TopGross") %>%
colSums()
## TopGross
## 1829
i How much Disney earned from top 5 grossing movies
TopMovies %>%
top_n(5, TopGross) %>%
filter(Studio == "Disney") %>%
select("TopGross") %>%
colSums()
## TopGross
## 1514
J How many Warner movies in top 15 grossing
TopMovies %>%
filter(Studio == "Warner") %>%
summarise(n())
## n()
## 1 4
K How many Warner movies in top 10 grossing
TopMovies %>%
top_n(10, TopGross) %>%
filter(Studio == "Warner") %>%
summarise(n())
## n()
## 1 3
l How much Warner earned from top 15 grossing movies
TopMovies %>%
filter(Studio == "Warner") %>%
select("TopGross") %>%
colSums()
## TopGross
## 1158
m How much Warner earned from top 10 grossing movies
TopMovies %>%
top_n(10, TopGross) %>%
filter(Studio == "Warner") %>%
select("TopGross") %>%
colSums()
## TopGross
## 970
n How much Warner earned from top 5 grossing movies
TopMovies %>%
top_n(5, TopGross) %>%
filter(Studio == "Warner") %>%
select("TopGross") %>%
colSums()
## TopGross
## 413
0 Plot graph illustrate number of movies by each studio in top 15 grossing
x <- TopMovies %>%
group_by(Studio) %>%
summarise(NoMovie = n())
x
## # A tibble: 5 x 2
## Studio NoMovie
## <fct> <int>
## 1 Disney 5
## 2 Fox 1
## 3 Sony 2
## 4 Universal 3
## 5 Warner 4
barplot(x$NoMovie, main = "Number of Movies in Top 15 Grossing", xlab = "Studio",
names.arg=c("Disney", "Fox", "Sony", "Universal", "Warner"))

p Plot graph illustrate number of movies by each studio in top 10 grossing
y <- TopMovies %>%
top_n(10, TopGross) %>%
group_by(Studio) %>%
summarise(Total = n())
y
## # A tibble: 4 x 2
## Studio Total
## <fct> <int>
## 1 Disney 4
## 2 Sony 2
## 3 Universal 1
## 4 Warner 3
barplot(y$Total, main = "Number of Movies in Top 10 Grossing", xlab = "Studio",
names.arg=c("Disney", "Sony", "Universal", "Warner") )

q Plot graph illustrate total revenue each studio has earned from movies in top 15 grossings
z<- TopMovies %>%
group_by(Studio) %>%
summarise(totalrev = sum(TopGross))
z
## # A tibble: 5 x 2
## Studio totalrev
## <fct> <dbl>
## 1 Disney 2039
## 2 Fox 226
## 3 Sony 739
## 4 Universal 667
## 5 Warner 1158
barplot(z$totalrev, main = "Total Revenue by Studio in Top 15 Grossing", xlab = "Studio",
names.arg = c("Disney", "Fox", "Sony", " Universal", "Warner"))

r Plot graph illustrate total revenue each studio has earned from movies in top 10 grossings
a<- TopMovies %>%
top_n(10, TopGross) %>%
group_by(Studio) %>%
summarise(totalrev = sum(TopGross))
a
## # A tibble: 4 x 2
## Studio totalrev
## <fct> <dbl>
## 1 Disney 1829
## 2 Sony 739
## 3 Universal 265
## 4 Warner 970
barplot(a$totalrev, main = "Total Revenue by Studio in Top 10 Grossing", xlab = "Studio",
names.arg = c("Disney", "Sony", " Universal", "Warner"))

s Plot graph illustrate total revenue each studio has earned from movies in top 15 grossings (ggplot)
TopMovies %>%
group_by(Studio) %>%
summarise(totalrev = sum(TopGross)) %>%
ggplot(aes(x= Studio, y = totalrev)) +
geom_bar(stat = 'identity') + ggtitle("Total Revenue by Studio in Top 15 Grossing")

t Plot graph illustrate total revenue each studio has earned from movies in top 10 grossings (ggplot)
TopMovies %>%
top_n(10, TopGross) %>%
group_by(Studio) %>%
summarise(totalrev = sum(TopGross)) %>%
ggplot(aes(x= Studio, y = totalrev)) +
geom_bar(stat = 'identity') + ggtitle("Total Revenue by Studio in Top 10 Grossing")
