rat <- read.csv(paste("movies_metadata.csv", sep=" "))
library(psych)
summary(rat)
## adult
## - Written by ÃrnÃ¥s : 1
## Avalanche Sharks tells the story of a bikini contest that turns into a horrifying affair when it is hit by a shark avalanche.: 1
## Rune Balot goes to a casino connected to the October corporation to try to wrap up her case once and for all. : 1
## False :45454
## True : 9
##
##
## belongs_to_collection
## :40972
## {'id': 415931, 'name': 'The Bowery Boys', 'poster_path': '/q6sA4bzMT9cK7EEmXYwt7PNrL5h.jpg', 'backdrop_path': '/foe3kuiJmg5AklhtD3skWbaTMf2.jpg'} : 29
## {'id': 421566, 'name': 'Totò Collection', 'poster_path': '/4ayJsjC3djGwU9eCWUokdBWvdLC.jpg', 'backdrop_path': '/jaUuprubvAxXLAY5hUfrNjxccUh.jpg'} : 27
## {'id': 645, 'name': 'James Bond Collection', 'poster_path': '/HORpg5CSkmeQlAolx3bKMrKgfi.jpg', 'backdrop_path': '/6VcVl48kNKvdXOZfJPdarlUGOsk.jpg'} : 26
## {'id': 96887, 'name': 'Zatôichi: The Blind Swordsman', 'poster_path': '/8Q31DAtmFJjhFTwQGXghBUCgWK2.jpg', 'backdrop_path': '/bY8gLImMR5Pr9PaG3ZpobfaAQ8N.jpg'}: 26
## {'id': 37261, 'name': 'The Carry On Collection', 'poster_path': '/2P0HNrYgKDvirV8RCdT1rBSJdbJ.jpg', 'backdrop_path': '/38tF1LJN7ULeZAuAfP7beaPMfcl.jpg'} : 25
## (Other) : 4361
## budget
## 0 :36573
## 5000000 : 286
## 10000000: 259
## 20000000: 243
## 2000000 : 242
## 15000000: 226
## (Other) : 7637
## genres
## [{'id': 18, 'name': 'Drama'}] : 5000
## [{'id': 35, 'name': 'Comedy'}] : 3621
## [{'id': 99, 'name': 'Documentary'}] : 2723
## [] : 2442
## [{'id': 18, 'name': 'Drama'}, {'id': 10749, 'name': 'Romance'}]: 1301
## [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'name': 'Drama'}] : 1135
## (Other) :29244
## homepage id
## :37684 141971 : 3
## http://www.georgecarlin.com : 12 105045 : 2
## http://www.wernerherzog.com/films-by.html : 7 10991 : 2
## http://breakblade.jp/ : 6 109962 : 2
## http://phantasm.com : 4 110428 : 2
## http://www.crownintlpictures.com/actitles.html: 4 11115 : 2
## (Other) : 7749 (Other):45453
## imdb_id original_language original_title
## : 17 en :32269 Alice in Wonderland: 8
## 0 : 3 fr : 2438 Hamlet : 8
## tt1180333: 3 it : 1529 A Christmas Carol : 7
## tt0022537: 2 ja : 1350 Cinderella : 7
## tt0022879: 2 de : 1080 Les Misérables : 7
## tt0046468: 2 es : 994 Macbeth : 7
## (Other) :45437 (Other): 5806 (Other) :45422
## overview
## : 954
## No overview found. : 133
## No Overview : 7
## : 5
## A few funny little novels about different aspects of life.: 3
## Adaptation of the Jane Austen novel. : 3
## (Other) :44361
## popularity poster_path
## 0.0 : 66 : 386
## 1e-06 : 56 /5D7UBSEgdyONE6Lql6xS7s6OLcW.jpg: 5
## 0.000308: 43 /2kslZXOaW0HmnGuVPCnQlCdXFR9.jpg: 4
## 0.00022 : 40 /qW1oQlOHizRHXZQrpkimYr0oxzn.jpg: 4
## 0.000578: 38 /8VSZ9coCzxOCW2wE2Qene1H1fKO.jpg: 3
## 0.000844: 38 /cdwVC18URfEdQjjxqJyRMoGDC0H.jpg: 3
## (Other) :45185 (Other) :45061
## production_companies
## [] :11875
## [{'name': 'Metro-Goldwyn-Mayer (MGM)', 'id': 8411}] : 742
## [{'name': 'Warner Bros.', 'id': 6194}] : 540
## [{'name': 'Paramount Pictures', 'id': 4}] : 505
## [{'name': 'Twentieth Century Fox Film Corporation', 'id': 306}]: 439
## [{'name': 'Universal Pictures', 'id': 33}] : 320
## (Other) :31045
## production_countries
## [{'iso_3166_1': 'US', 'name': 'United States of America'}]:17851
## [] : 6282
## [{'iso_3166_1': 'GB', 'name': 'United Kingdom'}] : 2238
## [{'iso_3166_1': 'FR', 'name': 'France'}] : 1654
## [{'iso_3166_1': 'JP', 'name': 'Japan'}] : 1356
## [{'iso_3166_1': 'IT', 'name': 'Italy'}] : 1030
## (Other) :15055
## release_date revenue runtime
## 2008-01-01: 136 Min. :0.000e+00 Min. : 0.00
## 2009-01-01: 121 1st Qu.:0.000e+00 1st Qu.: 85.00
## 2007-01-01: 118 Median :0.000e+00 Median : 95.00
## 2005-01-01: 111 Mean :1.121e+07 Mean : 94.13
## 2006-01-01: 101 3rd Qu.:0.000e+00 3rd Qu.: 107.00
## 2002-01-01: 96 Max. :2.788e+09 Max. :1256.00
## (Other) :44783 NA's :6 NA's :263
## spoken_languages
## [{'iso_639_1': 'en', 'name': 'English'}] :22395
## [] : 3829
## [{'iso_639_1': 'fr', 'name': 'Français'}]: 1853
## [{'iso_639_1': 'ja', 'name': 'æ¥æ¬èª'}]: 1289
## [{'iso_639_1': 'it', 'name': 'Italiano'}] : 1218
## [{'iso_639_1': 'es', 'name': 'Español'}] : 902
## (Other) :13980
## status tagline
## : 87 :25054
## Canceled : 2 Based on a true story. : 7
## In Production : 20 - : 4
## Planned : 15 Be careful what you wish for.: 4
## Post Production: 98 Trust no one. : 4
## Released :45014 A Love Story : 3
## Rumored : 230 (Other) :20390
## title video vote_average
## Cinderella : 11 : 6 Min. : 0.000
## Alice in Wonderland : 9 False:45367 1st Qu.: 5.000
## Hamlet : 9 True : 93 Median : 6.000
## Beauty and the Beast: 8 Mean : 5.618
## Les Misérables : 8 3rd Qu.: 6.800
## A Christmas Carol : 7 Max. :10.000
## (Other) :45414 NA's :6
## vote_count
## Min. : 0.0
## 1st Qu.: 3.0
## Median : 10.0
## Mean : 109.9
## 3rd Qu.: 34.0
## Max. :14075.0
## NA's :6
str(rat)
## 'data.frame': 45466 obs. of 24 variables:
## $ adult : Factor w/ 5 levels " - Written by ÃrnÃ¥s",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ belongs_to_collection: Factor w/ 1699 levels "","{'id': 10, 'name': 'Star Wars Collection', 'poster_path': '/ghd5zOQnDaDW1mxO7R5fXXpZMu.jpg', 'backdrop_path': '"| __truncated__,..: 16 1 149 1 1661 1 1 1 1 1480 ...
## $ budget : Factor w/ 1226 levels "/ff9qCepilowshEtG2GYWwzt2bs4.jpg",..: 603 977 4 269 4 938 916 4 673 916 ...
## $ genres : Factor w/ 4069 levels "[]","[{'id': 10402, 'name': 'Music'}, {'id': 10749, 'name': 'Romance'}, {'id': 10751, 'name': 'Family'}, {'id': 14, "| __truncated__,..: 1409 694 219 2982 3204 2772 2867 2297 2393 912 ...
## $ homepage : Factor w/ 7674 levels "","[{'iso_3166_1': 'CA', 'name': 'Canada'}, {'iso_3166_1': 'LU', 'name': 'Luxembourg'}, {'iso_3166_1': 'GB', 'name"| __truncated__,..: 1648 1 1 1 1 1 1 1 1 4762 ...
## $ id : Factor w/ 45436 levels "100","10000",..: 42818 43253 8247 22686 3367 44426 3363 32502 43711 39417 ...
## $ imdb_id : Factor w/ 45418 levels "","0","tt0000001",..: 17054 16814 16756 17084 16705 16771 16979 16545 17022 16746 ...
## $ original_language : Factor w/ 93 levels "","104.0","68.0",..: 22 22 22 22 22 22 22 22 22 22 ...
## $ original_title : Factor w/ 43373 levels "'49-'17","'71",..: 40233 19508 15679 41878 13708 16268 30211 40019 32959 15378 ...
## $ overview : Factor w/ 44308 levels "","'305' is a mockumentary detailing the misadventures of five not-so-brave members of the Spartan army charged wi"| __truncated__,..: 26769 42967 2698 15920 26169 29318 12928 5166 24645 25341 ...
## $ popularity : Factor w/ 43759 levels "","0.0","0.000102",..: 33705 29937 28531 35611 42292 30001 40205 32366 37970 29603 ...
## $ poster_path : Factor w/ 45025 levels "","/107LxWmMYCh01Y7dYsWop7vBnx4.jpg",..: 34093 39878 6966 137 17194 44497 24320 35314 17855 5355 ...
## $ production_companies : Factor w/ 22709 levels "","[]","[{'name': '(주)ë¡ëí½ì³ì¤', 'id': 87426}]",..: 15320 19843 21774 20260 16910 16190 14865 21644 20774 20455 ...
## $ production_countries : Factor w/ 2394 levels "","[]","[{'iso_3166_1': 'AE', 'name': 'United Arab Emirates'}, {'iso_3166_1': 'GB', 'name': 'United Kingdom'}, {'iso_31"| __truncated__,..: 2354 2354 2354 2354 2354 2354 785 2354 2354 1384 ...
## $ release_date : Factor w/ 17337 levels "","1","12","1874-12-09",..: 10679 10710 10715 10715 10511 10710 10710 10715 10715 10694 ...
## $ revenue : num 3.74e+08 2.63e+08 0.00 8.15e+07 7.66e+07 ...
## $ runtime : num 81 104 101 127 106 170 127 97 106 130 ...
## $ spoken_languages : Factor w/ 1932 levels "","[]","[{'iso_639_1': 'ab', 'name': ''}, {'iso_639_1': 'ka', 'name': 'á¥áá áá£áá'}, {'iso_639_1': 'ru', 'na"| __truncated__,..: 1134 845 1134 1134 1134 667 1271 627 1134 1064 ...
## $ status : Factor w/ 7 levels "","Canceled",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ tagline : Factor w/ 20284 levels "","'A Journey of Dreams'- Courage, Determination, Destiny",..: 1 12049 13490 5681 9077 1129 19916 15439 13728 10573 ...
## $ title : Factor w/ 42278 levels "","'49-'17","'71",..: 39114 16905 13450 40524 11269 14042 26512 38890 29423 13123 ...
## $ video : Factor w/ 3 levels "","False","True": 2 2 2 2 2 2 2 2 2 2 ...
## $ vote_average : num 7.7 6.9 6.5 6.1 5.7 7.7 6.2 5.4 5.5 6.6 ...
## $ vote_count : int 5415 2413 92 34 173 1886 141 45 174 1194 ...
View(rat)
Loading Library’s
library(knitr)
library(pander)
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.4.1 v dplyr 0.7.4
## v tidyr 0.7.2 v stringr 1.2.0
## v readr 1.1.1 v forcats 0.2.0
## -- Conflicts ------------------------------------------------------------------- tidyverse_conflicts() --
## x ggplot2::%+%() masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(broom)
library(DataCombine)
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
Summary of Regression Model
lmfit1<-lm(rat$vote_count~rat$vote_average)
panderOptions("digits", 2)
pander(lmfit1, caption = "Linear Model: popularity ~ vote count")
Linear Model: popularity ~ vote count
| (Intercept) |
-67 |
7.1 |
-9.6 |
1.3e-21 |
| rat$vote_average |
32 |
1.2 |
27 |
3.2e-154 |
R1=summary(lmfit1)$r.squared
cat("R-Squared = ", R1)
## R-Squared = 0.0152786
library(ggplot2)
ggplot(rat, aes(rat$revenue, rat$runtime)) +
geom_point(color="firebrick") +
ggtitle('revenue vs. runtime') +
theme(plot.title = element_text(size=20, face="bold",
margin = margin(10, 0, 10, 0)))+
labs(x="Date", y="USD")+
theme(axis.text.x=element_text(angle=50, vjust=0.5)) +
theme(panel.background = element_rect(fill = 'grey75'))
## Warning: Removed 263 rows containing missing values (geom_point).
