R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

data<-read.csv("C:/Users/java/Desktop/movie.csv")
summary(data)
##     Movie           Year.of.Release    Run.time      Movie.Rating  
##  Length:49          Min.   :1957    Min.   : 80.0   Min.   :8.700  
##  Class :character   1st Qu.:1978    1st Qu.: 96.0   1st Qu.:8.700  
##  Mode  :character   Median :1994    Median :142.0   Median :8.800  
##                     Mean   :1996    Mean   :139.9   Mean   :8.837  
##                     3rd Qu.:2018    3rd Qu.:164.0   3rd Qu.:8.900  
##                     Max.   :2023    Max.   :247.0   Max.   :9.300  
##                                                                    
##      votes           MetaScore         Gross              Genre          
##  Min.   :  10134   Min.   : 67.0   Min.   :  4360000   Length:49         
##  1st Qu.:  16000   1st Qu.: 82.0   1st Qu.: 52070000   Class :character  
##  Median :  36923   Median : 86.5   Median :134970000   Mode  :character  
##  Mean   : 720032   Mean   : 86.0   Mean   :182914737                     
##  3rd Qu.:1409869   3rd Qu.: 92.5   3rd Qu.:304060000                     
##  Max.   :2804443   Max.   :100.0   Max.   :534860000                     
##                    NA's   :29      NA's   :30                            
##  Certification        Director            Stars          
##  Length:49          Length:49          Length:49         
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
## 

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.1
#Data Layer
ggplot(data = data) + labs(title ="movies Data Plot")

str(data)
## 'data.frame':    49 obs. of  11 variables:
##  $ Movie          : chr  "The Shawshank Redemption" "The Godfather" "Ramayana: The Legend of Prince Rama" "The Chaos Class" ...
##  $ Year.of.Release: int  1994 1972 1993 1975 2008 1993 2003 1957 1974 2016 ...
##  $ Run.time       : int  142 175 135 87 152 195 201 96 202 147 ...
##  $ Movie.Rating   : num  9.3 9.2 9.2 9.2 9 9 9 9 9 9 ...
##  $ votes          : int  2804443 1954174 12995 42231 2786129 1409869 1919908 834248 1327069 29102 ...
##  $ MetaScore      : int  82 100 NA NA 84 95 94 97 90 NA ...
##  $ Gross          : int  28340000 134970000 NA NA 534860000 96900000 377850000 4360000 57300000 NA ...
##  $ Genre          : chr  "['Drama']" "['Crime', ' Drama']" "['Animation', ' Action', ' Adventure']" "['Comedy', ' Drama']" ...
##  $ Certification  : chr  "R" "R" "PG" "" ...
##  $ Director       : chr  "['Frank Darabont']" "['Francis Ford Coppola']" "['Ram Mohan', 'Yûgô Sakô', 'Koichi Saski']" "['Ertem Egilmez']" ...
##  $ Stars          : chr  "['Tim Robbins', 'Morgan Freeman', 'Bob Gunton', 'William Sadler']" "['Marlon Brando', 'Al Pacino', 'James Caan', 'Diane Keaton']" "['Arun Govil', 'Nikhil Kapoor', 'Edie Mirman', 'Rael Padamsee']" "['Kemal Sunal', 'Münir Özkul', 'Halit Akçatepe', 'Tarik Akan']" ...
ggplot(data = data, aes(x = votes , y =Movie.Rating, col = Gross ))+labs(title = "Movie Data Plot")

ggplot(data = data, aes(x =  Movie.Rating, y = votes , col = Gross)) +
  geom_point() +
  labs(title = " Movie.Rating vsvotes", x = " Movie.Rating", y = "votes ")

ggplot(data = data, aes(x = Movie.Rating  , y = votes    , size = Gross )) +
geom_point() +
labs(title = "Movie.Rating vs  votes ", x = "Movie.Rating", y = " votes  ")
## Warning: Removed 30 rows containing missing values (geom_point).

ggplot(data = data, aes(x = Movie.Rating , y = votes , col = factor(Gross), shape = factor(Run.time ))) +geom_point() +
labs(title = " Movie.Rating  vs votes ", x = " Movie.Rating ", y = "votes ")
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 38. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 41 rows containing missing values (geom_point).

#scatterplot
data$votes<-factor(data$votes)
ggplot(data, aes(x = factor(votes), y =Movie.Rating )) +
  geom_point()

ggplot(data = data, aes(x = Year.of.Release   )) +
geom_histogram(binwidth = 5,color="black", fill="lightblue") +
labs(title = "Histogram of movie rating", x = " Year.of.Release   ", y = " ")

ggplot(data = data, aes(x=as.factor(Certification), fill=Certification)) + 
       geom_bar(stat="count")

  Movie.Rating   = table(data$  Movie.Rating  )
data.labels = names(  Movie.Rating  )
Movie.Rating    = round( Movie.Rating /sum(  Movie.Rating  )*100)
data.labels = paste(data.labels,Movie.Rating   )
data.labels = paste(data.labels,"%",sep="") 
pie(Movie.Rating,labels = data.labels,clockwise=TRUE, col=heat.colors(length(data.labels)), main="Movie rating")

 bx <- ggplot(data = data, aes(x = factor(Year.of.Release ), y = Run.time)) + 
  geom_boxplot(fill = "blue") + 
  ggtitle("Year.of.Release") +
  ylab(" Year.of.Release ") + 
  xlab(" Run.time") 
bx

ggplot(data, aes(x = as.factor( Movie.Rating), y =  votes  , col =  Movie.Rating)) +
  geom_jitter() +
  facet_grid(. ~ Movie.Rating)