R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

data=read.csv("C:/Users/java/Desktop/MUKESH1.csv")
summary(data)
##        X          Movie.Name        Year.of.Release Run.Time.in.minutes
##  Min.   : 0.00   Length:54          Min.   :1957    Min.   : 80.0      
##  1st Qu.:13.25   Class :character   1st Qu.:1978    1st Qu.:105.2      
##  Median :26.50   Mode  :character   Median :1994    Median :142.0      
##  Mean   :26.50                      Mean   :1996    Mean   :140.1      
##  3rd Qu.:39.75                      3rd Qu.:2018    3rd Qu.:164.0      
##  Max.   :53.00                      Max.   :2023    Max.   :247.0      
##                                                                        
##   Movie.Rating       Votes           MetaScore         Gross          
##  Min.   :8.600   Min.   :  10134   Min.   : 65.0   Min.   :  4360000  
##  1st Qu.:8.700   1st Qu.:  16303   1st Qu.: 82.0   1st Qu.: 77100000  
##  Median :8.800   Median :  88319   Median : 87.0   Median :134970000  
##  Mean   :8.815   Mean   : 774740   Mean   : 85.6   Mean   :184588261  
##  3rd Qu.:8.900   3rd Qu.:1485877   3rd Qu.: 92.0   3rd Qu.:304060000  
##  Max.   :9.300   Max.   :2804443   Max.   :100.0   Max.   :534860000  
##                                    NA's   :29      NA's   :31         
##     Genre           Certification        Director        
##  Length:54          Length:54          Length:54         
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
## 

Including Plots

You can also embed plots, for example:

str(data)
## 'data.frame':    54 obs. of  11 variables:
##  $ X                  : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ Movie.Name         : chr  "The Shawshank Redemption" "The Godfather" "Ramayana: The Legend of Prince Rama" "The Chaos Class" ...
##  $ Year.of.Release    : int  1994 1972 1993 1975 2008 1993 2003 1957 1974 2016 ...
##  $ Run.Time.in.minutes: int  142 175 135 87 152 195 201 96 202 147 ...
##  $ Movie.Rating       : num  9.3 9.2 9.2 9.2 9 9 9 9 9 9 ...
##  $ Votes              : int  2804443 1954174 12995 42231 2786129 1409869 1919908 834248 1327069 29102 ...
##  $ MetaScore          : int  82 100 NA NA 84 95 94 97 90 NA ...
##  $ Gross              : int  28340000 134970000 NA NA 534860000 96900000 377850000 4360000 57300000 NA ...
##  $ Genre              : chr  "['Drama']" "['Crime', ' Drama']" "['Animation', ' Action', ' Adventure']" "['Comedy', ' Drama']" ...
##  $ Certification      : chr  "R" "R" "PG" "" ...
##  $ Director           : chr  "['Frank Darabont']" "['Francis Ford Coppola']" "['Ram Mohan', 'Y?\xafg?? Sak??', 'Koichi Saski']" "['Ertem Egilmez']" ...
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
  ggplot(data)+labs(title="Movies data")

ggplot(data, aes(x = Votes, y = MetaScore))+labs(title = "Movies Data")

ggplot(data, aes(x = Movie.Rating, y = Gross))+labs(title = "Movies data")

plot(data)

ggplot(data, aes(x = Gross, y = Year.of.Release)) +
  geom_point() +
  labs(title = "Gross vs Year of Release", x = "Gross", y = "Year of Release")
## Warning: Removed 31 rows containing missing values (`geom_point()`).

ggplot(data, aes(x = Movie.Rating, y = Run.Time.in.minutes, size = Votes)) +
geom_point() +
labs(title = "Movie Rating vs Run Time in Minutes", x = "Movie Rating", y = "Run Time in Minutes")

ggplot(data, aes(x = MetaScore, y = Run.Time.in.minutes, col = factor(Certification), shape = factor(Certification))) +geom_point() +
labs(title = "MetaScore vs Gross", x = "MetaScore", y = "Gross")
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 29 rows containing missing values (`geom_point()`).

data$Rank<-factor(data$Votes)
ggplot(data, aes(x = factor(Votes), y = Year.of.Release)) +
  geom_point()

ggplot(data, aes(x = Year.of.Release)) +
geom_histogram(binwidth = 5,color="black", fill="lightblue") +
labs(title = "Year of Release", x = "Year of Release", y = "Count")

ggplot(data, aes(x=as.factor(Certification), fill=Certification)) + 
       geom_bar(stat="count")

Movie.Rating <- table(data$Movie.Rating)
data.labels <- names(Movie.Rating)
share <- round(Movie.Rating / sum(Movie.Rating) * 100)
data.labels <- paste(data.labels, share)
data.labels <- paste(data.labels, "%", sep = "")
pie(Movie.Rating, labels = data.labels, clockwise = TRUE, col = rainbow(length(data.labels)), main = "Frequency of Movie Rating")

ggplot(data, aes(x = as.factor(Certification), y = Votes, col = Gross)) +
  geom_jitter() +
  facet_grid(. ~ Certification)

bx <- ggplot(data, aes(x = Genre, y = MetaScore, fill = Genre)) +
  geom_boxplot() +
  labs(
    title = "Distribution of MetaScore by Genre",
    x = "Genre",
    y = "MetaScore"
  ) +
  theme_minimal() +
  scale_fill_brewer(palette = "Set3")

print(bx)
## Warning: Removed 29 rows containing non-finite values (`stat_boxplot()`).
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set3 is 12
## Returning the palette you asked for with that many colors

head(data)
##   X                          Movie.Name Year.of.Release Run.Time.in.minutes
## 1 0            The Shawshank Redemption            1994                 142
## 2 1                       The Godfather            1972                 175
## 3 2 Ramayana: The Legend of Prince Rama            1993                 135
## 4 3                     The Chaos Class            1975                  87
## 5 4                     The Dark Knight            2008                 152
## 6 5                    Schindler's List            1993                 195
##   Movie.Rating   Votes MetaScore     Gross
## 1          9.3 2804443        82  28340000
## 2          9.2 1954174       100 134970000
## 3          9.2   12995        NA        NA
## 4          9.2   42231        NA        NA
## 5          9.0 2786129        84 534860000
## 6          9.0 1409869        95  96900000
##                                    Genre Certification
## 1                              ['Drama']             R
## 2                    ['Crime', ' Drama']             R
## 3 ['Animation', ' Action', ' Adventure']            PG
## 4                   ['Comedy', ' Drama']              
## 5         ['Action', ' Crime', ' Drama']         PG-13
## 6    ['Biography', ' Drama', ' History']             R
##                                           Director    Rank
## 1                               ['Frank Darabont'] 2804443
## 2                         ['Francis Ford Coppola'] 1954174
## 3 ['Ram Mohan', 'Y?\xafg?? Sak??', 'Koichi Saski']   12995
## 4                                ['Ertem Egilmez']   42231
## 5                            ['Christopher Nolan'] 2786129
## 6                             ['Steven Spielberg'] 1409869

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.