WPA 3 Julie Melicharová

m.names <- c("Baramgwa hamjje sarajida", "Sleepless in Seattle", "The Water Diviner", 
"Fly Away Home", "The Three Musketeers", "Candyman: Farewell to Flesh", 
"Honey I Blew Up the Kid", "Kingsman: The Secret Service", "Ajab Prem Ki Ghazab Kahani", 
"A Bug's Life", "Courage Under Fire", "Dirty Pretty Things", 
"In the Name of the Father", "Soul Plane", "Magnum Force", "About Time", 
"House of Sand and Fog", "Bokura ga ita Zenpen", "Jackass 3D", 
"Tropic Thunder - A Pirate's Tale")

boxoffice <- c(28686545, 218076024, 30864649, 35870837, 50375628, 13899536, 
58662452, 404561724, 15906411, 363089431, 100748262, 14156753, 
25096862, 14553807, 44680473, 89177486, 16157923, 26324268, 171685793, 
191091250)

genre <- c("Action", "Romantic Comedy", "Drama", "Drama", "Adventure", 
"Horror", "Comedy", "Action", "Comedy", "Adventure", "Drama", 
"Drama", "Drama", "Comedy", "Action", "Romantic Comedy", "Drama", 
"Drama", "Comedy", "Comedy")

time <- c(121, 100, 112, NA, NA, NA, NA, 129, NA, 96, 111, NA, NA, NA, 
NA, 123, NA, 121, 93, 106)

rating <- c(NA, "PG", "R", "PG", "PG", "R", "PG", "R", NA, "G", "R", 
"R", "R", "R", NA, "R", "R", NA, "R", "R")
# Question 1 
#A
#Here are the vectors from last week’s WPA.Using these vectors, create a dataframe called (short for movies.sample) that contain the vectors

movies.sample <- data.frame(m.names, boxoffice, genre, time, rating) 

#B
movies1 <- read.table(file = "http://nathanieldphillips.com/wp-content/uploads/2015/11/movies1.txt",
header = T,
sep = "\t", # tab-delimited
stringsAsFactors = F
)

Question 2

# How many rows and columns are in the movies dataset?

nrow(movies1)
## [1] 5000
ncol(movies1)
## [1] 17
# What are the column names?
names(movies1)
##  [1] "name"                            "boxoffice.total"                
##  [3] "boxoffice.domestic"              "boxoffice.international"        
##  [5] "dvd.domestic"                    "budget"                         
##  [7] "rating"                          "genre"                          
##  [9] "creative.type"                   "time"                           
## [11] "year"                            "production.method"              
## [13] "sequel"                          "boxoffice.domestic.inflationadj"
## [15] "budget.millions"                 "boxoffice.total.millions"       
## [17] "revenue.d.budget"

Question 3

# A Look at the first few rows of the dataset using the head() function.
head(movies1)
##                          name boxoffice.total boxoffice.domestic
## 1                      Avatar      2783918982          760507625
## 2                     Titanic      2207615668          658672302
## 3              Jurassic World      1665443635          651443635
## 4                The Avengers      1519479547          623279547
## 5                   Furious 7      1516246709          351032910
## 6 The Avengers: Age of Ultron      1404705868          459005868
##   boxoffice.international dvd.domestic    budget rating             genre
## 1              2023411357    230915507 425000000  PG-13            Action
## 2              1548943366           NA 200000000  PG-13 Thriller/Suspense
## 3              1014000000           NA 215000000  PG-13            Action
## 4               896200000    109515497 225000000  PG-13         Adventure
## 5              1165213799     14947559 190000000  PG-13            Action
## 6               945700000      7312791 250000000  PG-13            Action
##          creative.type time year     production.method sequel
## 1      Science Fiction  162 2009 Animation/Live Action      0
## 2   Historical Fiction  194 1997           Live Action      0
## 3      Science Fiction  124 2015           Live Action      1
## 4           Super Hero  143 2012 Animation/Live Action      0
## 5 Contemporary Fiction  137 2014           Live Action      1
## 6           Super Hero  141 2015           Live Action      1
##   boxoffice.domestic.inflationadj budget.millions boxoffice.total.millions
## 1                       826198130             425                 2783.919
## 2                      1139182838             200                 2207.616
## 3                       651443635             215                 1665.444
## 4                       655383136             225                 1519.480
## 5                       351032910             190                 1516.247
## 6                       459005868             250                 1404.706
##   revenue.d.budget
## 1         6.550398
## 2        11.038078
## 3         7.746249
## 4         6.753242
## 5         7.980246
## 6         5.618823
# B Look at the last few rows using the tail() function.
tail(movies1)
##                               name boxoffice.total boxoffice.domestic
## 4995                      Idlewild        12571185           12571185
## 4996 Hellraiser III: Hell on Earth        12525537           12525537
## 4997                 Sorority Boys        12516222           10198766
## 4998                    Love Jones        12514138           12514138
## 4999                         Klute        12512637           12512637
## 5000        Breakin' All the Rules        12512317           12232382
##      boxoffice.international dvd.domestic   budget rating           genre
## 4995                       0      8753109 15000000      R         Musical
## 4996                       0           NA        0      R          Horror
## 4997                 2317456           NA 12000000      R          Comedy
## 4998                       0           NA 10000000      R           Drama
## 4999                       0           NA        0   <NA>            <NA>
## 5000                  279935           NA  9000000  PG-13 Romantic Comedy
##             creative.type time year production.method sequel
## 4995   Historical Fiction   NA 2006       Live Action      0
## 4996              Fantasy   NA 1992       Live Action      1
## 4997 Contemporary Fiction   NA 2002       Live Action      0
## 4998 Contemporary Fiction   NA 1997       Live Action      0
## 4999                 <NA>   NA 1971              <NA>      0
## 5000 Contemporary Fiction   NA 2004       Live Action      0
##      boxoffice.domestic.inflationadj budget.millions
## 4995                        16064248              15
## 4996                        12525537               0
## 4997                        14692539              12
## 4998                        22819901              10
## 4999                        63473192               0
## 5000                        16487126               9
##      boxoffice.total.millions revenue.d.budget
## 4995                 12.57118         0.838079
## 4996                 12.52554              Inf
## 4997                 12.51622         1.043019
## 4998                 12.51414         1.251414
## 4999                 12.51264              Inf
## 5000                 12.51232         1.390257
# C Look at rows 20 through 30.
movies1[20:30, ]
##                                            name boxoffice.total
## 20          Star Wars Ep. I: The Phantom Menace      1027044677
## 21                          Alice in Wonderland      1025491110
## 22            The Hobbit: An Unexpected Journey      1017003568
## 23                              The Dark Knight      1002891358
## 24                                The Lion King       987480140
## 25                              Despicable Me 2       974873764
## 26        Harry Potter and the SorcererŐs Stone       974755371
## 27     Pirates of the Caribbean: At World's End       963420425
## 28          The Hobbit: The Desolation of Smaug       960366855
## 29 Harry Potter and the Deathly Hallows: Part I       959301070
## 30    The Hobbit: The Battle of the Five Armies       955119788
##    boxoffice.domestic boxoffice.international dvd.domestic    budget
## 20          474544677               552500000           NA 115000000
## 21          334191110               691300000     82297116 200000000
## 22          303003568               714000000     34191124 250000000
## 23          533345358               469546000    282134660 185000000
## 24          422780140               564700000     86246006  79300000
## 25          368065385               606808379    124381409  76000000
## 26          317575550               657179821           NA 125000000
## 27          309420425               654000000    312228146 300000000
## 28          258366855               702000000     40247146 250000000
## 29          295001070               664300000     94385569 125000000
## 30          255119788               700000000     22439417 250000000
##    rating     genre      creative.type time year     production.method
## 20     PG Adventure    Science Fiction  133 1999 Animation/Live Action
## 21     PG Adventure            Fantasy  108 2010 Animation/Live Action
## 22  PG-13 Adventure            Fantasy  169 2012 Animation/Live Action
## 23  PG-13    Action         Super Hero  152 2008           Live Action
## 24      G Adventure       Kids Fiction   87 1994        Hand Animation
## 25     PG    Comedy       Kids Fiction   98 2013     Digital Animation
## 26     PG Adventure            Fantasy  152 2001 Animation/Live Action
## 27  PG-13 Adventure Historical Fiction  167 2007           Live Action
## 28  PG-13 Adventure            Fantasy  201 2013 Animation/Live Action
## 29  PG-13 Adventure            Fantasy  146 2010 Animation/Live Action
## 30  PG-13 Adventure            Fantasy  144 2014           Live Action
##    sequel boxoffice.domestic.inflationadj budget.millions
## 20      1                       755899504           115.0
## 21      0                       354522122           200.0
## 22      1                       317749322           250.0
## 23      1                       621624466           185.0
## 24      0                       756219975            79.3
## 25      1                       378930222            76.0
## 26      0                       468612897           125.0
## 27      1                       376431535           300.0
## 28      1                       265846191           250.0
## 29      1                       312886528           125.0
## 30      1                       260520067           250.0
##    boxoffice.total.millions revenue.d.budget
## 20                1027.0447         8.930823
## 21                1025.4911         5.127456
## 22                1017.0036         4.068014
## 23                1002.8914         5.421034
## 24                 987.4801        12.452461
## 25                 974.8738        12.827286
## 26                 974.7554         7.798043
## 27                 963.4204         3.211401
## 28                 960.3669         3.841467
## 29                 959.3011         7.674409
## 30                 955.1198         3.820479

Question 4

# Show me all the data for Harry Potter and the Chamber of Secrets
harry <- movies1$name == "Harry Potter and the Chamber of Secrets"
data.harry <- movies1[harry, ]

# What was the rating of Harry Potter and the Chamber of Secrets? (use indexing!)
data.harry$rating
## [1] "PG"
# What was the boxoffice total of Harry Potter and the Deathly Hallows?
harry.death <- movies1$name == "Harry Potter and the Deathly Hallows: Part I"
data.harry.death <- movies1[harry.death, ]
data.harry.death$boxoffice.total
## [1] 959301070
# What was the running time of Harry Potter and the Order of the Phoenix?
harry.bird <- movies1$name == "Harry Potter and the Order of the Phoenix"
data.harry.bird <- movies1[harry.bird, ]
data.harry.bird$time
## [1] 138

Question 5

#Using this logic, sort the movies dataframe by year (with the oldest movies at the top of the dataframe).
oldies <- order(movies1$year, decreasing = F)