WPA 3 Julie Melicharová
m.names <- c("Baramgwa hamjje sarajida", "Sleepless in Seattle", "The Water Diviner",
"Fly Away Home", "The Three Musketeers", "Candyman: Farewell to Flesh",
"Honey I Blew Up the Kid", "Kingsman: The Secret Service", "Ajab Prem Ki Ghazab Kahani",
"A Bug's Life", "Courage Under Fire", "Dirty Pretty Things",
"In the Name of the Father", "Soul Plane", "Magnum Force", "About Time",
"House of Sand and Fog", "Bokura ga ita Zenpen", "Jackass 3D",
"Tropic Thunder - A Pirate's Tale")
boxoffice <- c(28686545, 218076024, 30864649, 35870837, 50375628, 13899536,
58662452, 404561724, 15906411, 363089431, 100748262, 14156753,
25096862, 14553807, 44680473, 89177486, 16157923, 26324268, 171685793,
191091250)
genre <- c("Action", "Romantic Comedy", "Drama", "Drama", "Adventure",
"Horror", "Comedy", "Action", "Comedy", "Adventure", "Drama",
"Drama", "Drama", "Comedy", "Action", "Romantic Comedy", "Drama",
"Drama", "Comedy", "Comedy")
time <- c(121, 100, 112, NA, NA, NA, NA, 129, NA, 96, 111, NA, NA, NA,
NA, 123, NA, 121, 93, 106)
rating <- c(NA, "PG", "R", "PG", "PG", "R", "PG", "R", NA, "G", "R",
"R", "R", "R", NA, "R", "R", NA, "R", "R")
# Question 1
#A
#Here are the vectors from last week’s WPA.Using these vectors, create a dataframe called (short for movies.sample) that contain the vectors
movies.sample <- data.frame(m.names, boxoffice, genre, time, rating)
#B
movies1 <- read.table(file = "http://nathanieldphillips.com/wp-content/uploads/2015/11/movies1.txt",
header = T,
sep = "\t", # tab-delimited
stringsAsFactors = F
)
Question 2
# How many rows and columns are in the movies dataset?
nrow(movies1)
## [1] 5000
ncol(movies1)
## [1] 17
# What are the column names?
names(movies1)
## [1] "name" "boxoffice.total"
## [3] "boxoffice.domestic" "boxoffice.international"
## [5] "dvd.domestic" "budget"
## [7] "rating" "genre"
## [9] "creative.type" "time"
## [11] "year" "production.method"
## [13] "sequel" "boxoffice.domestic.inflationadj"
## [15] "budget.millions" "boxoffice.total.millions"
## [17] "revenue.d.budget"
Question 3
# A Look at the first few rows of the dataset using the head() function.
head(movies1)
## name boxoffice.total boxoffice.domestic
## 1 Avatar 2783918982 760507625
## 2 Titanic 2207615668 658672302
## 3 Jurassic World 1665443635 651443635
## 4 The Avengers 1519479547 623279547
## 5 Furious 7 1516246709 351032910
## 6 The Avengers: Age of Ultron 1404705868 459005868
## boxoffice.international dvd.domestic budget rating genre
## 1 2023411357 230915507 425000000 PG-13 Action
## 2 1548943366 NA 200000000 PG-13 Thriller/Suspense
## 3 1014000000 NA 215000000 PG-13 Action
## 4 896200000 109515497 225000000 PG-13 Adventure
## 5 1165213799 14947559 190000000 PG-13 Action
## 6 945700000 7312791 250000000 PG-13 Action
## creative.type time year production.method sequel
## 1 Science Fiction 162 2009 Animation/Live Action 0
## 2 Historical Fiction 194 1997 Live Action 0
## 3 Science Fiction 124 2015 Live Action 1
## 4 Super Hero 143 2012 Animation/Live Action 0
## 5 Contemporary Fiction 137 2014 Live Action 1
## 6 Super Hero 141 2015 Live Action 1
## boxoffice.domestic.inflationadj budget.millions boxoffice.total.millions
## 1 826198130 425 2783.919
## 2 1139182838 200 2207.616
## 3 651443635 215 1665.444
## 4 655383136 225 1519.480
## 5 351032910 190 1516.247
## 6 459005868 250 1404.706
## revenue.d.budget
## 1 6.550398
## 2 11.038078
## 3 7.746249
## 4 6.753242
## 5 7.980246
## 6 5.618823
# B Look at the last few rows using the tail() function.
tail(movies1)
## name boxoffice.total boxoffice.domestic
## 4995 Idlewild 12571185 12571185
## 4996 Hellraiser III: Hell on Earth 12525537 12525537
## 4997 Sorority Boys 12516222 10198766
## 4998 Love Jones 12514138 12514138
## 4999 Klute 12512637 12512637
## 5000 Breakin' All the Rules 12512317 12232382
## boxoffice.international dvd.domestic budget rating genre
## 4995 0 8753109 15000000 R Musical
## 4996 0 NA 0 R Horror
## 4997 2317456 NA 12000000 R Comedy
## 4998 0 NA 10000000 R Drama
## 4999 0 NA 0 <NA> <NA>
## 5000 279935 NA 9000000 PG-13 Romantic Comedy
## creative.type time year production.method sequel
## 4995 Historical Fiction NA 2006 Live Action 0
## 4996 Fantasy NA 1992 Live Action 1
## 4997 Contemporary Fiction NA 2002 Live Action 0
## 4998 Contemporary Fiction NA 1997 Live Action 0
## 4999 <NA> NA 1971 <NA> 0
## 5000 Contemporary Fiction NA 2004 Live Action 0
## boxoffice.domestic.inflationadj budget.millions
## 4995 16064248 15
## 4996 12525537 0
## 4997 14692539 12
## 4998 22819901 10
## 4999 63473192 0
## 5000 16487126 9
## boxoffice.total.millions revenue.d.budget
## 4995 12.57118 0.838079
## 4996 12.52554 Inf
## 4997 12.51622 1.043019
## 4998 12.51414 1.251414
## 4999 12.51264 Inf
## 5000 12.51232 1.390257
# C Look at rows 20 through 30.
movies1[20:30, ]
## name boxoffice.total
## 20 Star Wars Ep. I: The Phantom Menace 1027044677
## 21 Alice in Wonderland 1025491110
## 22 The Hobbit: An Unexpected Journey 1017003568
## 23 The Dark Knight 1002891358
## 24 The Lion King 987480140
## 25 Despicable Me 2 974873764
## 26 Harry Potter and the SorcererŐs Stone 974755371
## 27 Pirates of the Caribbean: At World's End 963420425
## 28 The Hobbit: The Desolation of Smaug 960366855
## 29 Harry Potter and the Deathly Hallows: Part I 959301070
## 30 The Hobbit: The Battle of the Five Armies 955119788
## boxoffice.domestic boxoffice.international dvd.domestic budget
## 20 474544677 552500000 NA 115000000
## 21 334191110 691300000 82297116 200000000
## 22 303003568 714000000 34191124 250000000
## 23 533345358 469546000 282134660 185000000
## 24 422780140 564700000 86246006 79300000
## 25 368065385 606808379 124381409 76000000
## 26 317575550 657179821 NA 125000000
## 27 309420425 654000000 312228146 300000000
## 28 258366855 702000000 40247146 250000000
## 29 295001070 664300000 94385569 125000000
## 30 255119788 700000000 22439417 250000000
## rating genre creative.type time year production.method
## 20 PG Adventure Science Fiction 133 1999 Animation/Live Action
## 21 PG Adventure Fantasy 108 2010 Animation/Live Action
## 22 PG-13 Adventure Fantasy 169 2012 Animation/Live Action
## 23 PG-13 Action Super Hero 152 2008 Live Action
## 24 G Adventure Kids Fiction 87 1994 Hand Animation
## 25 PG Comedy Kids Fiction 98 2013 Digital Animation
## 26 PG Adventure Fantasy 152 2001 Animation/Live Action
## 27 PG-13 Adventure Historical Fiction 167 2007 Live Action
## 28 PG-13 Adventure Fantasy 201 2013 Animation/Live Action
## 29 PG-13 Adventure Fantasy 146 2010 Animation/Live Action
## 30 PG-13 Adventure Fantasy 144 2014 Live Action
## sequel boxoffice.domestic.inflationadj budget.millions
## 20 1 755899504 115.0
## 21 0 354522122 200.0
## 22 1 317749322 250.0
## 23 1 621624466 185.0
## 24 0 756219975 79.3
## 25 1 378930222 76.0
## 26 0 468612897 125.0
## 27 1 376431535 300.0
## 28 1 265846191 250.0
## 29 1 312886528 125.0
## 30 1 260520067 250.0
## boxoffice.total.millions revenue.d.budget
## 20 1027.0447 8.930823
## 21 1025.4911 5.127456
## 22 1017.0036 4.068014
## 23 1002.8914 5.421034
## 24 987.4801 12.452461
## 25 974.8738 12.827286
## 26 974.7554 7.798043
## 27 963.4204 3.211401
## 28 960.3669 3.841467
## 29 959.3011 7.674409
## 30 955.1198 3.820479
Question 4
# Show me all the data for Harry Potter and the Chamber of Secrets
harry <- movies1$name == "Harry Potter and the Chamber of Secrets"
data.harry <- movies1[harry, ]
# What was the rating of Harry Potter and the Chamber of Secrets? (use indexing!)
data.harry$rating
## [1] "PG"
# What was the boxoffice total of Harry Potter and the Deathly Hallows?
harry.death <- movies1$name == "Harry Potter and the Deathly Hallows: Part I"
data.harry.death <- movies1[harry.death, ]
data.harry.death$boxoffice.total
## [1] 959301070
# What was the running time of Harry Potter and the Order of the Phoenix?
harry.bird <- movies1$name == "Harry Potter and the Order of the Phoenix"
data.harry.bird <- movies1[harry.bird, ]
data.harry.bird$time
## [1] 138
Question 5
#Using this logic, sort the movies dataframe by year (with the oldest movies at the top of the dataframe).
oldies <- order(movies1$year, decreasing = F)