knitr::opts_chunk$set(echo = TRUE)
library(stringr)
movies <- read.csv("C:/Users/DELL/Downloads/imdb.csv")
str(movies)
## 'data.frame': 377 obs. of 12 variables:
## $ country : chr "Creed III" "Avatar: The Way of Water" "The Super Mario Bros. Movie" "Mummies" ...
## $ date_x : chr "03-02-2023" "12/15/2022 " "04-05-2023" "01-05-2023" ...
## $ score : int 73 78 76 70 61 66 80 83 59 58 ...
## $ genre : chr "Drama,?\xffAction" "Science Fiction,?\xffAdventure,?\xffAction" "Animation,?\xffAdventure,?\xffFamily,?\xffFantasy,?\xffComedy" "Animation,?\xffComedy,?\xffFamily,?\xffAdventure,?\xffFantasy" ...
## $ overview : chr "After dominating the boxing world, Adonis Creed has been thriving in both his career and family life. When a childhood friend a "Set more than a decade after the events of the first film, learn the story of the Sully family (Jake, Neytiri, and their kids), "While working underground to fix a water main, Brooklyn plumbers\x83??and brothers\x83??Mario and Luigi are transported down a "Through a series of unfortunate events, three mummies end up in present-day London and embark on a wacky and hilarious journey ...
## $ crew : chr "Michael B. Jordan, Adonis Creed, Tessa Thompson, Bianca Taylor, Jonathan Majors, Damien Anderson, Wood Harris, Tony 'Little Duk "Sam Worthington, Jake Sully, Zoe Salda?\xf1a, Neytiri, Sigourney Weaver, Kiri / Dr. Grace Augustine, Stephen Lang, Colonel Mile "Chris Pratt, Mario (voice), Anya Taylor-Joy, Princess Peach (voice), Charlie Day, Luigi (voice), Jack Black, Bowser (voice), Ke "??scar Barber?\xadn, Thut (voice), Ana Esther Alborg, Nefer (voice), Luis P??rez Reina, Carnaby (voice), Mar??a Luisa Sol?\xad, ...
## $ orig_title: chr "Creed III" "Avatar: The Way of Water" "The Super Mario Bros. Movie" " Momias" ...
## $ status : chr " Released" " Released" " Released" " Released" ...
## $ orig_lang : chr " English" " English" " English" " Spanish, Castilian" ...
## $ budget_x : num 7.50e+07 4.60e+08 1.00e+08 1.23e+07 7.70e+07 ...
## $ revenue : num 2.72e+08 2.32e+09 7.24e+08 3.42e+07 3.41e+08 ...
## $ X : chr "AU" "AU" "AU" "AU" ...
summary(movies)
## country date_x score genre
## Length:377 Length:377 Min. : 0.00 Length:377
## Class :character Class :character 1st Qu.: 63.00 Class :character
## Mode :character Mode :character Median : 70.00 Mode :character
## Mean : 68.26
## 3rd Qu.: 76.00
## Max. :100.00
## overview crew orig_title status
## Length:377 Length:377 Length:377 Length:377
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## orig_lang budget_x revenue X
## Length:377 Min. : 105 Min. :0.000e+00 Length:377
## Class :character 1st Qu.: 38211149 1st Qu.:1.020e+08 Class :character
## Mode :character Median : 92600000 Median :3.526e+08 Mode :character
## Mean : 96816280 Mean :4.080e+08
## 3rd Qu.:136400000 3rd Qu.:5.645e+08
## Max. :460000000 Max. :2.924e+09
movies$genre <- as.character(movies$genre)
movies$genre <- str_remove_all(movies$genre, "Drama,?")
movies$genre <- str_remove_all(movies$genre, "Action,?")
movies$genre <- str_remove_all(movies$genre, "Adventure,?")
movies$genre <- str_remove_all(movies$genre, "Fantasy,?")
movies$genre <- str_remove_all(movies$genre, "Comedy,?")
movies$genre <- str_remove_all(movies$genre, "Romance,?")
movies$genre <- str_remove_all(movies$genre, "Thriller,?")
movies$genre <- str_remove_all(movies$genre, "Crime,?")
movies$genre <- str_remove_all(movies$genre, "Horror,?")
movies$genre <- str_remove_all(movies$genre, "History,?")
movies$genre <- str_remove_all(movies$genre, "Mystery,?")
movies$genre <- str_remove_all(movies$genre, "War,?")
movies$genre <- str_remove_all(movies$genre, "Western,?")
movies$genre <- str_remove_all(movies$genre, "Music,?")
movies$genre <- str_remove_all(movies$genre, "Documentary,?")
movies$genre <- str_remove_all(movies$genre, "Family,?")
movies$genre <- str_remove_all(movies$genre, "Science Fiction,?")
movies$genre <- str_remove_all(movies$genre, "Animation,?")
table(movies$genre)
##
## ?� ?�?� ?�?�?� ?�?�?�?�
## 47 91 154 59 19
## ?�?�?�?�?� ?�?�TV Movie ?�TV Movie,?�?�
## 5 1 1
anova_model <- aov(revenue ~ genre, data = movies)
summary(anova_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## genre 7 6.126e+17 8.751e+16 0.532 0.81
## Residuals 369 6.066e+19 1.644e+17
regression_model <- lm(revenue ~ budget_x, data = movies)
summary(regression_model)
##
## Call:
## lm(formula = revenue ~ budget_x, data = movies)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.322e+09 -1.406e+08 -3.773e+07 1.132e+08 1.989e+09
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.397e+07 2.577e+07 1.706 0.0888 .
## budget_x 3.760e+00 2.135e-01 17.617 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.99e+08 on 375 degrees of freedom
## Multiple R-squared: 0.4528, Adjusted R-squared: 0.4514
## F-statistic: 310.4 on 1 and 375 DF, p-value: < 2.2e-16
plot(movies$budget_x, movies$revenue,
xlab = "Budget", ylab = "Revenue")
abline(regression_model, col = "red")
regression_model2 <- lm(revenue ~ budget_x + genre, data = movies)
summary(regression_model2)
##
## Call:
## lm(formula = revenue ~ budget_x + genre, data = movies)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.323e+09 -1.457e+08 -3.588e+07 1.192e+08 2.011e+09
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.354e+07 4.656e+07 0.935 0.350
## budget_x 3.804e+00 2.184e-01 17.423 <2e-16 ***
## genre?� 2.771e+07 5.406e+07 0.512 0.609
## genre?�?� -1.361e+07 5.063e+07 -0.269 0.788
## genre?�?�?� -3.250e+07 5.931e+07 -0.548 0.584
## genre?�?�?�?� -1.066e+07 8.192e+07 -0.130 0.897
## genre?�?�?�?�?� 8.621e+07 1.414e+08 0.610 0.543
## genre?�?�TV Movie -2.884e+08 3.042e+08 -0.948 0.344
## genre?�TV Movie,?�?� 1.077e+08 3.040e+08 0.354 0.723
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 300500000 on 368 degrees of freedom
## Multiple R-squared: 0.4575, Adjusted R-squared: 0.4457
## F-statistic: 38.79 on 8 and 368 DF, p-value: < 2.2e-16
rm(list = ls())