Movies Data

Including Plots

You can also embed plots, for example:

str(data)

## 'data.frame':    54 obs. of  11 variables:
##  $ X                  : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ Movie.Name         : chr  "The Shawshank Redemption" "The Godfather" "Ramayana: The Legend of Prince Rama" "The Chaos Class" ...
##  $ Year.of.Release    : int  1994 1972 1993 1975 2008 1993 2003 1957 1974 2016 ...
##  $ Run.Time.in.minutes: int  142 175 135 87 152 195 201 96 202 147 ...
##  $ Movie.Rating       : num  9.3 9.2 9.2 9.2 9 9 9 9 9 9 ...
##  $ Votes              : int  2804443 1954174 12995 42231 2786129 1409869 1919908 834248 1327069 29102 ...
##  $ MetaScore          : int  82 100 NA NA 84 95 94 97 90 NA ...
##  $ Gross              : int  28340000 134970000 NA NA 534860000 96900000 377850000 4360000 57300000 NA ...
##  $ Genre              : chr  "['Drama']" "['Crime', ' Drama']" "['Animation', ' Action', ' Adventure']" "['Comedy', ' Drama']" ...
##  $ Certification      : chr  "R" "R" "PG" "" ...
##  $ Director           : chr  "['Frank Darabont']" "['Francis Ford Coppola']" "['Ram Mohan', 'Y?\xafg?? Sak??', 'Koichi Saski']" "['Ertem Egilmez']" ...

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.2.3

  ggplot(data)+labs(title="Movies data")

ggplot(data, aes(x = Votes, y = MetaScore))+labs(title = "Movies Data")

ggplot(data, aes(x = Movie.Rating, y = Gross))+labs(title = "Movies data")

plot(data)

ggplot(data, aes(x = Gross, y = Year.of.Release)) +
  geom_point() +
  labs(title = "Gross vs Year of Release", x = "Gross", y = "Year of Release")

## Warning: Removed 31 rows containing missing values (`geom_point()`).

ggplot(data, aes(x = Movie.Rating, y = Run.Time.in.minutes, size = Votes)) +
geom_point() +
labs(title = "Movie Rating vs Run Time in Minutes", x = "Movie Rating", y = "Run Time in Minutes")

ggplot(data, aes(x = MetaScore, y = Run.Time.in.minutes, col = factor(Certification), shape = factor(Certification))) +geom_point() +
labs(title = "MetaScore vs Gross", x = "MetaScore", y = "Gross")

## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.

## Warning: Removed 29 rows containing missing values (`geom_point()`).

data$Rank<-factor(data$Votes)
ggplot(data, aes(x = factor(Votes), y = Year.of.Release)) +
  geom_point()

ggplot(data, aes(x = Year.of.Release)) +
geom_histogram(binwidth = 5,color="black", fill="lightblue") +
labs(title = "Year of Release", x = "Year of Release", y = "Count")

ggplot(data, aes(x=as.factor(Certification), fill=Certification)) + 
       geom_bar(stat="count")

Movie.Rating <- table(data$Movie.Rating)
data.labels <- names(Movie.Rating)
share <- round(Movie.Rating / sum(Movie.Rating) * 100)
data.labels <- paste(data.labels, share)
data.labels <- paste(data.labels, "%", sep = "")
pie(Movie.Rating, labels = data.labels, clockwise = TRUE, col = rainbow(length(data.labels)), main = "Frequency of Movie Rating")

ggplot(data, aes(x = as.factor(Certification), y = Votes, col = Gross)) +
  geom_jitter() +
  facet_grid(. ~ Certification)

bx <- ggplot(data, aes(x = Genre, y = MetaScore, fill = Genre)) +
  geom_boxplot() +
  labs(
    title = "Distribution of MetaScore by Genre",
    x = "Genre",
    y = "MetaScore"
  ) +
  theme_minimal() +
  scale_fill_brewer(palette = "Set3")

print(bx)

## Warning: Removed 29 rows containing non-finite values (`stat_boxplot()`).

## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set3 is 12
## Returning the palette you asked for with that many colors

head(data)

##   X                          Movie.Name Year.of.Release Run.Time.in.minutes
## 1 0            The Shawshank Redemption            1994                 142
## 2 1                       The Godfather            1972                 175
## 3 2 Ramayana: The Legend of Prince Rama            1993                 135
## 4 3                     The Chaos Class            1975                  87
## 5 4                     The Dark Knight            2008                 152
## 6 5                    Schindler's List            1993                 195
##   Movie.Rating   Votes MetaScore     Gross
## 1          9.3 2804443        82  28340000
## 2          9.2 1954174       100 134970000
## 3          9.2   12995        NA        NA
## 4          9.2   42231        NA        NA
## 5          9.0 2786129        84 534860000
## 6          9.0 1409869        95  96900000
##                                    Genre Certification
## 1                              ['Drama']             R
## 2                    ['Crime', ' Drama']             R
## 3 ['Animation', ' Action', ' Adventure']            PG
## 4                   ['Comedy', ' Drama']              
## 5         ['Action', ' Crime', ' Drama']         PG-13
## 6    ['Biography', ' Drama', ' History']             R
##                                           Director    Rank
## 1                               ['Frank Darabont'] 2804443
## 2                         ['Francis Ford Coppola'] 1954174
## 3 ['Ram Mohan', 'Y?\xafg?? Sak??', 'Koichi Saski']   12995
## 4                                ['Ertem Egilmez']   42231
## 5                            ['Christopher Nolan'] 2786129
## 6                             ['Steven Spielberg'] 1409869

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Movies Data

CHAITHANYA

2023-10-17

R Markdown

Including Plots