This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
##Clean Data
#Filter years - greater 2005
imbdclean <- imbd_rating[imbd_rating$year > 2005, ]
#Get rid of null rows
finalimbd <- imbdclean[complete.cases(imbdclean), ]
#Clean title
actionmovies$title <- gsub("Â", '', actionmovies$title)
#Clean Director
actionmovies$director <- gsub("Â", '', actionmovies$director)
#filter genre
actionmovies <- filter(finalimbd, grepl('Action', genres))
#filter aspect ratio
actionmovies <- actionmovies[actionmovies$aspect_ratio == "2.35", ]
#histogram
imbdclean$aspect_ratio <- as.factor(imbdclean$aspect_ratio)
count <- count(imbdclean, aspect_ratio)
counts <- table(imbdclean$aspect_ratio)
barplot(counts, main = "Aspect Ratio", xlab = "Dimensions")
#histogram
imbdclean$genres <- as.factor(imbdclean$genres)
countgenres <- count(imbdclean,'Action', genres)
counts <- table(imbdclean$genres)
barplot(counts, main = "Genres", xlab = "Type")
#regression model
reg <- lm(actionmovies$gross ~ actionmovies$budget + actionmovies$score + actionmovies$critic_reviews)
summary(reg)
Call:
lm(formula = actionmovies$gross ~ actionmovies$budget + actionmovies$score +
actionmovies$critic_reviews)
Residuals:
Min 1Q Median 3Q Max
-320804488 -30651992 -5086208 23817116 252130825
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.398e+08 2.248e+07 -6.217 1.39e-09 ***
actionmovies$budget 5.410e-01 5.269e-02 10.268 < 2e-16 ***
actionmovies$score 1.589e+07 3.945e+06 4.028 6.84e-05 ***
actionmovies$critic_reviews 2.758e+05 2.962e+04 9.312 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 59470000 on 364 degrees of freedom
Multiple R-squared: 0.6166, Adjusted R-squared: 0.6135
F-statistic: 195.2 on 3 and 364 DF, p-value: < 2.2e-16
plot(actionmovies$budget, actionmovies$gross, xlab = "Budget of Action movies with 2.35 AR", ylab = "Gross of Action movies with 2.35 AR")
abline(9.892e+06, 8.960e-01)
lm(actionmovies$gross ~ actionmovies$budget)
Call:
lm(formula = actionmovies$gross ~ actionmovies$budget)
Coefficients:
(Intercept) actionmovies$budget
9.892e+06 8.960e-01
summary(actionmovies)
title genres director actor1 actor2 actor3 length budget director_fb_likes actor1_fb_likes
Length:368 Length:368 Length:368 Length:368 Length:368 Length:368 Min. : 81.0 Min. : 2000000 Min. : 0.0 Min. : 21.0
Class :character Class :character Class :character Class :character Class :character Class :character 1st Qu.:101.0 1st Qu.: 35000000 1st Qu.: 25.0 1st Qu.: 966.8
Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Median :110.5 Median : 65000000 Median : 135.0 Median : 11000.0
Mean :114.5 Mean : 89085685 Mean : 730.5 Mean : 11583.1
3rd Qu.:123.0 3rd Qu.:140000000 3rd Qu.: 340.5 3rd Qu.: 18000.0
Max. :215.0 Max. :553632000 Max. :22000.0 Max. :137000.0
actor2_fb_likes actor3_fb_likes total_cast_likes fb_likes critic_reviews users_reviews users_votes score aspect_ratio gross year
Min. : 17.0 Min. : 7.0 Min. : 58 Min. : 0 Min. : 30.0 Min. : 23.0 Min. : 2508 Min. :2.700 Min. :2.35 Min. : 162 Min. :2006
1st Qu.: 551.8 1st Qu.: 308.8 1st Qu.: 2908 1st Qu.: 0 1st Qu.:188.8 1st Qu.: 178.8 1st Qu.: 54076 1st Qu.:5.875 1st Qu.:2.35 1st Qu.: 23047736 1st Qu.:2009
Median : 899.5 Median : 559.0 Median : 14968 Median : 15000 Median :265.0 Median : 342.5 Median : 119483 Median :6.400 Median :2.35 Median : 56114221 Median :2011
Mean : 3475.1 Mean : 1258.9 Mean : 17760 Mean : 24942 Mean :290.3 Mean : 497.3 Mean : 174112 Mean :6.371 Mean :2.35 Mean : 89708090 Mean :2011
3rd Qu.: 3000.0 3rd Qu.: 903.0 3rd Qu.: 25210 3rd Qu.: 38000 3rd Qu.:372.0 3rd Qu.: 637.0 3rd Qu.: 229681 3rd Qu.:6.925 3rd Qu.:2.35 3rd Qu.:126248948 3rd Qu.:2014
Max. :27000.0 Max. :23000.0 Max. :137712 Max. :197000 Max. :813.0 Max. :4667.0 Max. :1676169 Max. :9.000 Max. :2.35 Max. :533316061 Max. :2016
lm(actionmovies$gross ~ actionmovies$score)
Call:
lm(formula = actionmovies$gross ~ actionmovies$score)
Coefficients:
(Intercept) actionmovies$score
-218685712 48408514
lm(actionmovies$gross ~ actionmovies$critic_reviews)
Call:
lm(formula = actionmovies$gross ~ actionmovies$critic_reviews)
Coefficients:
(Intercept) actionmovies$critic_reviews
-48611078 476463
plot(actionmovies$score, actionmovies$gross, xlab = "Score of Action movies with 2.35 AR", ylab = "Gross of Action movies with 2.35 AR")
abline(-218685712, 48408514)
plot(actionmovies$critic_reviews, actionmovies$gross, xlab = "Critics Reviews of Action movies with 2.35 AR", ylab = "Gross of Action movies with 2.35 AR")
abline(-48611078, 476463)
summary(actionmovies$gross)
Min. 1st Qu. Median Mean 3rd Qu. Max.
162 23050000 56110000 89710000 126200000 533300000
summary(actionmovies$budget)
Min. 1st Qu. Median Mean 3rd Qu. Max.
2000000 35000000 65000000 89090000 140000000 553600000
summary(actionmovies$score)
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.700 5.875 6.400 6.371 6.925 9.000
summary(actionmovies$critic_reviews)
Min. 1st Qu. Median Mean 3rd Qu. Max.
30.0 188.8 265.0 290.3 372.0 813.0
summary(actionmovies)
title genres director actor1 actor2
Length:368 Length:368 Length:368 Length:368 Length:368
Class :character Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character Mode :character
actor3 length budget director_fb_likes actor1_fb_likes
Length:368 Min. : 81.0 Min. : 2000000 Min. : 0.0 Min. : 21.0
Class :character 1st Qu.:101.0 1st Qu.: 35000000 1st Qu.: 25.0 1st Qu.: 966.8
Mode :character Median :110.5 Median : 65000000 Median : 135.0 Median : 11000.0
Mean :114.5 Mean : 89085685 Mean : 730.5 Mean : 11583.1
3rd Qu.:123.0 3rd Qu.:140000000 3rd Qu.: 340.5 3rd Qu.: 18000.0
Max. :215.0 Max. :553632000 Max. :22000.0 Max. :137000.0
actor2_fb_likes actor3_fb_likes total_cast_likes fb_likes critic_reviews users_reviews
Min. : 17.0 Min. : 7.0 Min. : 58 Min. : 0 Min. : 30.0 Min. : 23.0
1st Qu.: 551.8 1st Qu.: 308.8 1st Qu.: 2908 1st Qu.: 0 1st Qu.:188.8 1st Qu.: 178.8
Median : 899.5 Median : 559.0 Median : 14968 Median : 15000 Median :265.0 Median : 342.5
Mean : 3475.1 Mean : 1258.9 Mean : 17760 Mean : 24942 Mean :290.3 Mean : 497.3
3rd Qu.: 3000.0 3rd Qu.: 903.0 3rd Qu.: 25210 3rd Qu.: 38000 3rd Qu.:372.0 3rd Qu.: 637.0
Max. :27000.0 Max. :23000.0 Max. :137712 Max. :197000 Max. :813.0 Max. :4667.0
users_votes score aspect_ratio gross year
Min. : 2508 Min. :2.700 Min. :2.35 Min. : 162 Min. :2006
1st Qu.: 54076 1st Qu.:5.875 1st Qu.:2.35 1st Qu.: 23047736 1st Qu.:2009
Median : 119483 Median :6.400 Median :2.35 Median : 56114221 Median :2011
Mean : 174112 Mean :6.371 Mean :2.35 Mean : 89708090 Mean :2011
3rd Qu.: 229681 3rd Qu.:6.925 3rd Qu.:2.35 3rd Qu.:126248948 3rd Qu.:2014
Max. :1676169 Max. :9.000 Max. :2.35 Max. :533316061 Max. :2016
sd(actionmovies$gross)
[1] 95662525
#gross
sd(actionmovies$budget)
[1] 70341395
#budget
sd(actionmovies$critic_reviews)
[1] 140.6367
#critic reviews
sd(actionmovies$score)
[1] 0.9166386
#score
snrgross = mean(actionmovies$gross)/sd(actionmovies$gross)
snrbudget = mean(actionmovies$budget)/sd(actionmovies$budget)
snrcriticreviews = mean(actionmovies$critic_reviews)/sd(actionmovies$critic_reviews)
snrscore = mean(actionmovies$score)/sd(actionmovies$score)
corr01 = actionmovies[c(8,20)]
cor(corr01)
budget gross
budget 1.0000000 0.6587992
gross 0.6587992 1.0000000
corr02 = actionmovies[c(15,20)]
cor(corr02)
critic_reviews gross
critic_reviews 1.0000000 0.7004639
gross 0.7004639 1.0000000
corr03 = actionmovies[c(18,20)]
cor(corr03)
score gross
score 1.0000000 0.4638505
gross 0.4638505 1.0000000
plot(corr02)
abline(-48611078, 476463)
lm(actionmovies$gross~actionmovies$critic_reviews)
Call:
lm(formula = actionmovies$gross ~ actionmovies$critic_reviews)
Coefficients:
(Intercept) actionmovies$critic_reviews
-48611078 476463
#predicting gross by budget, critic reviews, and score
lm1 = lm(actionmovies$gross ~ actionmovies$budget + actionmovies$critic_reviews + actionmovies$score)
lm1
Call:
lm(formula = actionmovies$gross ~ actionmovies$budget + actionmovies$critic_reviews +
actionmovies$score)
Coefficients:
(Intercept) actionmovies$budget actionmovies$critic_reviews
-1.398e+08 5.410e-01 2.758e+05
actionmovies$score
1.589e+07
plot(corr01)
abline(9.892e+06, 8.960e-01)
plot(corr03)
abline(-218685712, 48408514)
summary(actionmovies$gross)
Min. 1st Qu. Median Mean 3rd Qu. Max.
162 23050000 56110000 89710000 126200000 533300000
hist(actionmovies$gross)
hist(actionmovies$budget)
hist(actionmovies$score)
hist(actionmovies$critic_reviews)
scatter.smooth(actionmovies$critic_reviews,actionmovies$gross)
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).