Question 1
movie.sample <- read.table(file = "http://nathanieldphillips.com/wp-content/uploads/2015/11/movies1.txt",
header = T,
sep = "\t",
stringsAsFactors = F)
Question2
nrow(movie.sample)
## [1] 5000
ncol(movie.sample)
## [1] 17
names(movie.sample)
## [1] "name" "boxoffice.total"
## [3] "boxoffice.domestic" "boxoffice.international"
## [5] "dvd.domestic" "budget"
## [7] "rating" "genre"
## [9] "creative.type" "time"
## [11] "year" "production.method"
## [13] "sequel" "boxoffice.domestic.inflationadj"
## [15] "budget.millions" "boxoffice.total.millions"
## [17] "revenue.d.budget"
Question3
head(movie.sample)
## name boxoffice.total boxoffice.domestic
## 1 Avatar 2783918982 760507625
## 2 Titanic 2207615668 658672302
## 3 Jurassic World 1665443635 651443635
## 4 The Avengers 1519479547 623279547
## 5 Furious 7 1516246709 351032910
## 6 The Avengers: Age of Ultron 1404705868 459005868
## boxoffice.international dvd.domestic budget rating genre
## 1 2023411357 230915507 425000000 PG-13 Action
## 2 1548943366 NA 200000000 PG-13 Thriller/Suspense
## 3 1014000000 NA 215000000 PG-13 Action
## 4 896200000 109515497 225000000 PG-13 Adventure
## 5 1165213799 14947559 190000000 PG-13 Action
## 6 945700000 7312791 250000000 PG-13 Action
## creative.type time year production.method sequel
## 1 Science Fiction 162 2009 Animation/Live Action 0
## 2 Historical Fiction 194 1997 Live Action 0
## 3 Science Fiction 124 2015 Live Action 1
## 4 Super Hero 143 2012 Animation/Live Action 0
## 5 Contemporary Fiction 137 2014 Live Action 1
## 6 Super Hero 141 2015 Live Action 1
## boxoffice.domestic.inflationadj budget.millions boxoffice.total.millions
## 1 826198130 425 2783.919
## 2 1139182838 200 2207.616
## 3 651443635 215 1665.444
## 4 655383136 225 1519.480
## 5 351032910 190 1516.247
## 6 459005868 250 1404.706
## revenue.d.budget
## 1 6.550398
## 2 11.038078
## 3 7.746249
## 4 6.753242
## 5 7.980246
## 6 5.618823
tail(movie.sample)
## name boxoffice.total boxoffice.domestic
## 4995 Idlewild 12571185 12571185
## 4996 Hellraiser III: Hell on Earth 12525537 12525537
## 4997 Sorority Boys 12516222 10198766
## 4998 Love Jones 12514138 12514138
## 4999 Klute 12512637 12512637
## 5000 Breakin' All the Rules 12512317 12232382
## boxoffice.international dvd.domestic budget rating genre
## 4995 0 8753109 15000000 R Musical
## 4996 0 NA 0 R Horror
## 4997 2317456 NA 12000000 R Comedy
## 4998 0 NA 10000000 R Drama
## 4999 0 NA 0 <NA> <NA>
## 5000 279935 NA 9000000 PG-13 Romantic Comedy
## creative.type time year production.method sequel
## 4995 Historical Fiction NA 2006 Live Action 0
## 4996 Fantasy NA 1992 Live Action 1
## 4997 Contemporary Fiction NA 2002 Live Action 0
## 4998 Contemporary Fiction NA 1997 Live Action 0
## 4999 <NA> NA 1971 <NA> 0
## 5000 Contemporary Fiction NA 2004 Live Action 0
## boxoffice.domestic.inflationadj budget.millions
## 4995 16064248 15
## 4996 12525537 0
## 4997 14692539 12
## 4998 22819901 10
## 4999 63473192 0
## 5000 16487126 9
## boxoffice.total.millions revenue.d.budget
## 4995 12.57118 0.838079
## 4996 12.52554 Inf
## 4997 12.51622 1.043019
## 4998 12.51414 1.251414
## 4999 12.51264 Inf
## 5000 12.51232 1.390257
movie.sample[20:30,]
## name boxoffice.total
## 20 Star Wars Ep. I: The Phantom Menace 1027044677
## 21 Alice in Wonderland 1025491110
## 22 The Hobbit: An Unexpected Journey 1017003568
## 23 The Dark Knight 1002891358
## 24 The Lion King 987480140
## 25 Despicable Me 2 974873764
## 26 Harry Potter and the Sorcerer\xd5s Stone 974755371
## 27 Pirates of the Caribbean: At World's End 963420425
## 28 The Hobbit: The Desolation of Smaug 960366855
## 29 Harry Potter and the Deathly Hallows: Part I 959301070
## 30 The Hobbit: The Battle of the Five Armies 955119788
## boxoffice.domestic boxoffice.international dvd.domestic budget
## 20 474544677 552500000 NA 115000000
## 21 334191110 691300000 82297116 200000000
## 22 303003568 714000000 34191124 250000000
## 23 533345358 469546000 282134660 185000000
## 24 422780140 564700000 86246006 79300000
## 25 368065385 606808379 124381409 76000000
## 26 317575550 657179821 NA 125000000
## 27 309420425 654000000 312228146 300000000
## 28 258366855 702000000 40247146 250000000
## 29 295001070 664300000 94385569 125000000
## 30 255119788 700000000 22439417 250000000
## rating genre creative.type time year production.method
## 20 PG Adventure Science Fiction 133 1999 Animation/Live Action
## 21 PG Adventure Fantasy 108 2010 Animation/Live Action
## 22 PG-13 Adventure Fantasy 169 2012 Animation/Live Action
## 23 PG-13 Action Super Hero 152 2008 Live Action
## 24 G Adventure Kids Fiction 87 1994 Hand Animation
## 25 PG Comedy Kids Fiction 98 2013 Digital Animation
## 26 PG Adventure Fantasy 152 2001 Animation/Live Action
## 27 PG-13 Adventure Historical Fiction 167 2007 Live Action
## 28 PG-13 Adventure Fantasy 201 2013 Animation/Live Action
## 29 PG-13 Adventure Fantasy 146 2010 Animation/Live Action
## 30 PG-13 Adventure Fantasy 144 2014 Live Action
## sequel boxoffice.domestic.inflationadj budget.millions
## 20 1 755899504 115.0
## 21 0 354522122 200.0
## 22 1 317749322 250.0
## 23 1 621624466 185.0
## 24 0 756219975 79.3
## 25 1 378930222 76.0
## 26 0 468612897 125.0
## 27 1 376431535 300.0
## 28 1 265846191 250.0
## 29 1 312886528 125.0
## 30 1 260520067 250.0
## boxoffice.total.millions revenue.d.budget
## 20 1027.0447 8.930823
## 21 1025.4911 5.127456
## 22 1017.0036 4.068014
## 23 1002.8914 5.421034
## 24 987.4801 12.452461
## 25 974.8738 12.827286
## 26 974.7554 7.798043
## 27 963.4204 3.211401
## 28 960.3669 3.841467
## 29 959.3011 7.674409
## 30 955.1198 3.820479
Question4
movie.sample2 <- subset(x = movie.sample,
subset = (name == "Harry Potter and the Chamber of Secrets"))
movie.sample2
## name boxoffice.total
## 40 Harry Potter and the Chamber of Secrets 878979634
## boxoffice.domestic boxoffice.international dvd.domestic budget
## 40 261987880 616991754 NA 100000000
## rating genre creative.type time year production.method sequel
## 40 PG Adventure Fantasy 161 2002 Animation/Live Action 1
## boxoffice.domestic.inflationadj budget.millions
## 40 376584346 100
## boxoffice.total.millions revenue.d.budget
## 40 878.9796 8.789796
movie.sample2$rating
## [1] "PG"
movie.sample3 <- subset(x = movie.sample,
subset = (name == "Harry Potter and the Deathly Hallows"))
movie.sample3
## [1] name boxoffice.total
## [3] boxoffice.domestic boxoffice.international
## [5] dvd.domestic budget
## [7] rating genre
## [9] creative.type time
## [11] year production.method
## [13] sequel boxoffice.domestic.inflationadj
## [15] budget.millions boxoffice.total.millions
## [17] revenue.d.budget
## <0 rows> (or 0-length row.names)
movie.sample3$boxoffice.total
## numeric(0)
movie.sample4 <- subset(x = movie.sample,
subset = (name == "Harry Potter and the Order of the Phoenix"))
movie.sample4
## name boxoffice.total
## 31 Harry Potter and the Order of the Phoenix 942943935
## boxoffice.domestic boxoffice.international dvd.domestic budget
## 31 292004738 650939197 250892902 150000000
## rating genre creative.type time year production.method sequel
## 31 PG-13 Adventure Fantasy 138 2007 Animation/Live Action 1
## boxoffice.domestic.inflationadj budget.millions
## 31 355244135 150
## boxoffice.total.millions revenue.d.budget
## 31 942.9439 6.286293
movie.sample4$time
## [1] 138
Question5
index <- order(movie.sample$year, decreasing = F)
movie.sample <- movie.sample[index,]
movie.sample$name[1:5]
## [1] "The Big Parade" "Snow White and the Seven Dwarfs"
## [3] "Gone with the Wind" "The Wizard of Oz"
## [5] "Pinocchio"
Question6
index <- order(movie.sample$boxoffice.total, decreasing = T)
movie.sample <- movie.sample[index,]
Question 7
movie.sample$boxoffice.millions <- movie.sample$boxoffice.total / 1000000
Question 8
mean(movie.sample$boxoffice.total)
## [1] 98205350
median(movie.sample$time, na.rm = TRUE)
## [1] 108
mean(movie.sample$sequel, na.rm = TRUE)
## [1] 0.1170213
Question9
movie.sample$genre[1:5]
## [1] "Action" "Thriller/Suspense" "Action"
## [4] "Adventure" "Action"
movie.sample$time[1:5]
## [1] 162 194 124 143 137
Question10
table(movie.sample$genre)
##
## Action Adventure Black Comedy
## 692 486 34
## Comedy Concert/Performance Documentary
## 1212 14 63
## Drama Horror Multiple Genres
## 1085 300 2
## Musical Reality Romantic Comedy
## 77 2 249
## Thriller/Suspense Western
## 428 38