This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
data<-read.csv("C:/Users/java/Desktop/movie.csv")
summary(data)
## Movie Year.of.Release Run.time Movie.Rating
## Length:49 Min. :1957 Min. : 80.0 Min. :8.700
## Class :character 1st Qu.:1978 1st Qu.: 96.0 1st Qu.:8.700
## Mode :character Median :1994 Median :142.0 Median :8.800
## Mean :1996 Mean :139.9 Mean :8.837
## 3rd Qu.:2018 3rd Qu.:164.0 3rd Qu.:8.900
## Max. :2023 Max. :247.0 Max. :9.300
##
## votes MetaScore Gross Genre
## Min. : 10134 Min. : 67.0 Min. : 4360000 Length:49
## 1st Qu.: 16000 1st Qu.: 82.0 1st Qu.: 52070000 Class :character
## Median : 36923 Median : 86.5 Median :134970000 Mode :character
## Mean : 720032 Mean : 86.0 Mean :182914737
## 3rd Qu.:1409869 3rd Qu.: 92.5 3rd Qu.:304060000
## Max. :2804443 Max. :100.0 Max. :534860000
## NA's :29 NA's :30
## Certification Director Stars
## Length:49 Length:49 Length:49
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.1
#Data Layer
ggplot(data = data) + labs(title ="movies Data Plot")
str(data)
## 'data.frame': 49 obs. of 11 variables:
## $ Movie : chr "The Shawshank Redemption" "The Godfather" "Ramayana: The Legend of Prince Rama" "The Chaos Class" ...
## $ Year.of.Release: int 1994 1972 1993 1975 2008 1993 2003 1957 1974 2016 ...
## $ Run.time : int 142 175 135 87 152 195 201 96 202 147 ...
## $ Movie.Rating : num 9.3 9.2 9.2 9.2 9 9 9 9 9 9 ...
## $ votes : int 2804443 1954174 12995 42231 2786129 1409869 1919908 834248 1327069 29102 ...
## $ MetaScore : int 82 100 NA NA 84 95 94 97 90 NA ...
## $ Gross : int 28340000 134970000 NA NA 534860000 96900000 377850000 4360000 57300000 NA ...
## $ Genre : chr "['Drama']" "['Crime', ' Drama']" "['Animation', ' Action', ' Adventure']" "['Comedy', ' Drama']" ...
## $ Certification : chr "R" "R" "PG" "" ...
## $ Director : chr "['Frank Darabont']" "['Francis Ford Coppola']" "['Ram Mohan', 'Yûgô Sakô', 'Koichi Saski']" "['Ertem Egilmez']" ...
## $ Stars : chr "['Tim Robbins', 'Morgan Freeman', 'Bob Gunton', 'William Sadler']" "['Marlon Brando', 'Al Pacino', 'James Caan', 'Diane Keaton']" "['Arun Govil', 'Nikhil Kapoor', 'Edie Mirman', 'Rael Padamsee']" "['Kemal Sunal', 'Münir Özkul', 'Halit Akçatepe', 'Tarik Akan']" ...
ggplot(data = data, aes(x = votes , y =Movie.Rating, col = Gross ))+labs(title = "Movie Data Plot")
ggplot(data = data, aes(x = Movie.Rating, y = votes , col = Gross)) +
geom_point() +
labs(title = " Movie.Rating vsvotes", x = " Movie.Rating", y = "votes ")
ggplot(data = data, aes(x = Movie.Rating , y = votes , size = Gross )) +
geom_point() +
labs(title = "Movie.Rating vs votes ", x = "Movie.Rating", y = " votes ")
## Warning: Removed 30 rows containing missing values (geom_point).
ggplot(data = data, aes(x = Movie.Rating , y = votes , col = factor(Gross), shape = factor(Run.time ))) +geom_point() +
labs(title = " Movie.Rating vs votes ", x = " Movie.Rating ", y = "votes ")
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 38. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 41 rows containing missing values (geom_point).
#scatterplot
data$votes<-factor(data$votes)
ggplot(data, aes(x = factor(votes), y =Movie.Rating )) +
geom_point()
ggplot(data = data, aes(x = Year.of.Release )) +
geom_histogram(binwidth = 5,color="black", fill="lightblue") +
labs(title = "Histogram of movie rating", x = " Year.of.Release ", y = " ")
ggplot(data = data, aes(x=as.factor(Certification), fill=Certification)) +
geom_bar(stat="count")
Movie.Rating = table(data$ Movie.Rating )
data.labels = names( Movie.Rating )
Movie.Rating = round( Movie.Rating /sum( Movie.Rating )*100)
data.labels = paste(data.labels,Movie.Rating )
data.labels = paste(data.labels,"%",sep="")
pie(Movie.Rating,labels = data.labels,clockwise=TRUE, col=heat.colors(length(data.labels)), main="Movie rating")
bx <- ggplot(data = data, aes(x = factor(Year.of.Release ), y = Run.time)) +
geom_boxplot(fill = "blue") +
ggtitle("Year.of.Release") +
ylab(" Year.of.Release ") +
xlab(" Run.time")
bx
ggplot(data, aes(x = as.factor( Movie.Rating), y = votes , col = Movie.Rating)) +
geom_jitter() +
facet_grid(. ~ Movie.Rating)