This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
data=read.csv("C:/Users/java/Desktop/MUKESH1.csv")
summary(data)
## X Movie.Name Year.of.Release Run.Time.in.minutes
## Min. : 0.00 Length:54 Min. :1957 Min. : 80.0
## 1st Qu.:13.25 Class :character 1st Qu.:1978 1st Qu.:105.2
## Median :26.50 Mode :character Median :1994 Median :142.0
## Mean :26.50 Mean :1996 Mean :140.1
## 3rd Qu.:39.75 3rd Qu.:2018 3rd Qu.:164.0
## Max. :53.00 Max. :2023 Max. :247.0
##
## Movie.Rating Votes MetaScore Gross
## Min. :8.600 Min. : 10134 Min. : 65.0 Min. : 4360000
## 1st Qu.:8.700 1st Qu.: 16303 1st Qu.: 82.0 1st Qu.: 77100000
## Median :8.800 Median : 88319 Median : 87.0 Median :134970000
## Mean :8.815 Mean : 774740 Mean : 85.6 Mean :184588261
## 3rd Qu.:8.900 3rd Qu.:1485877 3rd Qu.: 92.0 3rd Qu.:304060000
## Max. :9.300 Max. :2804443 Max. :100.0 Max. :534860000
## NA's :29 NA's :31
## Genre Certification Director
## Length:54 Length:54 Length:54
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
You can also embed plots, for example:
str(data)
## 'data.frame': 54 obs. of 11 variables:
## $ X : int 0 1 2 3 4 5 6 7 8 9 ...
## $ Movie.Name : chr "The Shawshank Redemption" "The Godfather" "Ramayana: The Legend of Prince Rama" "The Chaos Class" ...
## $ Year.of.Release : int 1994 1972 1993 1975 2008 1993 2003 1957 1974 2016 ...
## $ Run.Time.in.minutes: int 142 175 135 87 152 195 201 96 202 147 ...
## $ Movie.Rating : num 9.3 9.2 9.2 9.2 9 9 9 9 9 9 ...
## $ Votes : int 2804443 1954174 12995 42231 2786129 1409869 1919908 834248 1327069 29102 ...
## $ MetaScore : int 82 100 NA NA 84 95 94 97 90 NA ...
## $ Gross : int 28340000 134970000 NA NA 534860000 96900000 377850000 4360000 57300000 NA ...
## $ Genre : chr "['Drama']" "['Crime', ' Drama']" "['Animation', ' Action', ' Adventure']" "['Comedy', ' Drama']" ...
## $ Certification : chr "R" "R" "PG" "" ...
## $ Director : chr "['Frank Darabont']" "['Francis Ford Coppola']" "['Ram Mohan', 'Y?\xafg?? Sak??', 'Koichi Saski']" "['Ertem Egilmez']" ...
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
ggplot(data)+labs(title="Movies data")
ggplot(data, aes(x = Votes, y = MetaScore))+labs(title = "Movies Data")
ggplot(data, aes(x = Movie.Rating, y = Gross))+labs(title = "Movies data")
plot(data)
ggplot(data, aes(x = Gross, y = Year.of.Release)) +
geom_point() +
labs(title = "Gross vs Year of Release", x = "Gross", y = "Year of Release")
## Warning: Removed 31 rows containing missing values (`geom_point()`).
ggplot(data, aes(x = Movie.Rating, y = Run.Time.in.minutes, size = Votes)) +
geom_point() +
labs(title = "Movie Rating vs Run Time in Minutes", x = "Movie Rating", y = "Run Time in Minutes")
ggplot(data, aes(x = MetaScore, y = Run.Time.in.minutes, col = factor(Certification), shape = factor(Certification))) +geom_point() +
labs(title = "MetaScore vs Gross", x = "MetaScore", y = "Gross")
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 29 rows containing missing values (`geom_point()`).
data$Rank<-factor(data$Votes)
ggplot(data, aes(x = factor(Votes), y = Year.of.Release)) +
geom_point()
ggplot(data, aes(x = Year.of.Release)) +
geom_histogram(binwidth = 5,color="black", fill="lightblue") +
labs(title = "Year of Release", x = "Year of Release", y = "Count")
ggplot(data, aes(x=as.factor(Certification), fill=Certification)) +
geom_bar(stat="count")
Movie.Rating <- table(data$Movie.Rating)
data.labels <- names(Movie.Rating)
share <- round(Movie.Rating / sum(Movie.Rating) * 100)
data.labels <- paste(data.labels, share)
data.labels <- paste(data.labels, "%", sep = "")
pie(Movie.Rating, labels = data.labels, clockwise = TRUE, col = rainbow(length(data.labels)), main = "Frequency of Movie Rating")
ggplot(data, aes(x = as.factor(Certification), y = Votes, col = Gross)) +
geom_jitter() +
facet_grid(. ~ Certification)
bx <- ggplot(data, aes(x = Genre, y = MetaScore, fill = Genre)) +
geom_boxplot() +
labs(
title = "Distribution of MetaScore by Genre",
x = "Genre",
y = "MetaScore"
) +
theme_minimal() +
scale_fill_brewer(palette = "Set3")
print(bx)
## Warning: Removed 29 rows containing non-finite values (`stat_boxplot()`).
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set3 is 12
## Returning the palette you asked for with that many colors
head(data)
## X Movie.Name Year.of.Release Run.Time.in.minutes
## 1 0 The Shawshank Redemption 1994 142
## 2 1 The Godfather 1972 175
## 3 2 Ramayana: The Legend of Prince Rama 1993 135
## 4 3 The Chaos Class 1975 87
## 5 4 The Dark Knight 2008 152
## 6 5 Schindler's List 1993 195
## Movie.Rating Votes MetaScore Gross
## 1 9.3 2804443 82 28340000
## 2 9.2 1954174 100 134970000
## 3 9.2 12995 NA NA
## 4 9.2 42231 NA NA
## 5 9.0 2786129 84 534860000
## 6 9.0 1409869 95 96900000
## Genre Certification
## 1 ['Drama'] R
## 2 ['Crime', ' Drama'] R
## 3 ['Animation', ' Action', ' Adventure'] PG
## 4 ['Comedy', ' Drama']
## 5 ['Action', ' Crime', ' Drama'] PG-13
## 6 ['Biography', ' Drama', ' History'] R
## Director Rank
## 1 ['Frank Darabont'] 2804443
## 2 ['Francis Ford Coppola'] 1954174
## 3 ['Ram Mohan', 'Y?\xafg?? Sak??', 'Koichi Saski'] 12995
## 4 ['Ertem Egilmez'] 42231
## 5 ['Christopher Nolan'] 2786129
## 6 ['Steven Spielberg'] 1409869
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.