Q1

Here are the vectors from last week’s WPA.

  m.names <- c("Baramgwa hamjje sarajida", "Sleepless in Seattle", "The Water Diviner", 
"Fly Away Home", "The Three Musketeers", "Candyman: Farewell to Flesh", 
"Honey I Blew Up the Kid", "Kingsman: The Secret Service", "Ajab Prem Ki Ghazab Kahani", 
"A Bug's Life", "Courage Under Fire", "Dirty Pretty Things", 
"In the Name of the Father", "Soul Plane", "Magnum Force", "About Time", 
"House of Sand and Fog", "Bokura ga ita Zenpen", "Jackass 3D", 
"Tropic Thunder - A Pirate's Tale")

boxoffice <- c(28686545, 218076024, 30864649, 35870837, 50375628, 13899536, 
58662452, 404561724, 15906411, 363089431, 100748262, 14156753, 
25096862, 14553807, 44680473, 89177486, 16157923, 26324268, 171685793, 
191091250)

genre <- c("Action", "Romantic Comedy", "Drama", "Drama", "Adventure", 
"Horror", "Comedy", "Action", "Comedy", "Adventure", "Drama", 
"Drama", "Drama", "Comedy", "Action", "Romantic Comedy", "Drama", 
"Drama", "Comedy", "Comedy")

time <- c(121, 100, 112, NA, NA, NA, NA, 129, NA, 96, 111, NA, NA, NA, 
NA, 123, NA, 121, 93, 106)

rating <- c(NA, "PG", "R", "PG", "PG", "R", "PG", "R", NA, "G", "R", 
"R", "R", "R", NA, "R", "R", NA, "R", "R")

Using these vectors, create a dataframe called (short for movies.sample) that contain the vectors.

movies.sample <- data.frame(m.names, boxoffice, genre, time, rating, stringsAsFactors = F)

movies.sample
##                             m.names boxoffice           genre time rating
## 1          Baramgwa hamjje sarajida  28686545          Action  121   <NA>
## 2              Sleepless in Seattle 218076024 Romantic Comedy  100     PG
## 3                 The Water Diviner  30864649           Drama  112      R
## 4                     Fly Away Home  35870837           Drama   NA     PG
## 5              The Three Musketeers  50375628       Adventure   NA     PG
## 6       Candyman: Farewell to Flesh  13899536          Horror   NA      R
## 7           Honey I Blew Up the Kid  58662452          Comedy   NA     PG
## 8      Kingsman: The Secret Service 404561724          Action  129      R
## 9        Ajab Prem Ki Ghazab Kahani  15906411          Comedy   NA   <NA>
## 10                     A Bug's Life 363089431       Adventure   96      G
## 11               Courage Under Fire 100748262           Drama  111      R
## 12              Dirty Pretty Things  14156753           Drama   NA      R
## 13        In the Name of the Father  25096862           Drama   NA      R
## 14                       Soul Plane  14553807          Comedy   NA      R
## 15                     Magnum Force  44680473          Action   NA   <NA>
## 16                       About Time  89177486 Romantic Comedy  123      R
## 17            House of Sand and Fog  16157923           Drama   NA      R
## 18             Bokura ga ita Zenpen  26324268           Drama  121   <NA>
## 19                       Jackass 3D 171685793          Comedy   93      R
## 20 Tropic Thunder - A Pirate's Tale 191091250          Comedy  106      R

download of movie data set

movies <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/11/movies.txt",
sep = "\t",
header = T,
stringsAsFactors = F)

Q2

A. How many rows and columns are in the movies dataset?

#Number of rows:
nrow (movies)
## [1] 5000
#Number of columns:
ncol(movies)
## [1] 14

B. What are the column names?

names(movies)
##  [1] "name"                            "boxoffice.total"                
##  [3] "boxoffice.domestic"              "boxoffice.international"        
##  [5] "dvd.domestic"                    "budget"                         
##  [7] "rating"                          "genre"                          
##  [9] "creative.type"                   "time"                           
## [11] "year"                            "production.method"              
## [13] "sequel"                          "boxoffice.domestic.inflationadj"

Q3

A. Look at the first few rows of the dataset using the head() function.

head(movies)
##                          name boxoffice.total boxoffice.domestic
## 1                      Avatar      2783918982          760507625
## 2                     Titanic      2207615668          658672302
## 3              Jurassic World      1665443635          651443635
## 4                The Avengers      1519479547          623279547
## 5                   Furious 7      1516246709          351032910
## 6 The Avengers: Age of Ultron      1404705868          459005868
##   boxoffice.international dvd.domestic    budget rating             genre
## 1              2023411357    230915507 425000000  PG-13            Action
## 2              1548943366           NA 200000000  PG-13 Thriller/Suspense
## 3              1014000000           NA 215000000  PG-13            Action
## 4               896200000    109515497 225000000  PG-13         Adventure
## 5              1165213799     14947559 190000000  PG-13            Action
## 6               945700000      7312791 250000000  PG-13            Action
##          creative.type time year     production.method sequel
## 1      Science Fiction  162 2009 Animation/Live Action      0
## 2   Historical Fiction  194 1997           Live Action      0
## 3      Science Fiction  124 2015           Live Action      1
## 4           Super Hero  143 2012 Animation/Live Action      0
## 5 Contemporary Fiction  137 2014           Live Action      1
## 6           Super Hero  141 2015           Live Action      1
##   boxoffice.domestic.inflationadj
## 1                       826198130
## 2                      1139182838
## 3                       651443635
## 4                       655383136
## 5                       351032910
## 6                       459005868

B. Look at the last few rows using the tail() function.

tail(movies)
##                               name boxoffice.total boxoffice.domestic
## 4995                      Idlewild        12571185           12571185
## 4996 Hellraiser III: Hell on Earth        12525537           12525537
## 4997                 Sorority Boys        12516222           10198766
## 4998                    Love Jones        12514138           12514138
## 4999                         Klute        12512637           12512637
## 5000        Breakin' All the Rules        12512317           12232382
##      boxoffice.international dvd.domestic   budget rating           genre
## 4995                       0      8753109 15000000      R         Musical
## 4996                       0           NA        0      R          Horror
## 4997                 2317456           NA 12000000      R          Comedy
## 4998                       0           NA 10000000      R           Drama
## 4999                       0           NA        0   <NA>            <NA>
## 5000                  279935           NA  9000000  PG-13 Romantic Comedy
##             creative.type time year production.method sequel
## 4995   Historical Fiction   NA 2006       Live Action      0
## 4996              Fantasy   NA 1992       Live Action      1
## 4997 Contemporary Fiction   NA 2002       Live Action      0
## 4998 Contemporary Fiction   NA 1997       Live Action      0
## 4999                 <NA>   NA 1971              <NA>      0
## 5000 Contemporary Fiction   NA 2004       Live Action      0
##      boxoffice.domestic.inflationadj
## 4995                        16064248
## 4996                        12525537
## 4997                        14692539
## 4998                        22819901
## 4999                        63473192
## 5000                        16487126

C. Look at rows 20 through 30.

movies [c(20:30), ]
##                                            name boxoffice.total
## 20          Star Wars Ep. I: The Phantom Menace      1027044677
## 21                          Alice in Wonderland      1025491110
## 22            The Hobbit: An Unexpected Journey      1017003568
## 23                              The Dark Knight      1002891358
## 24                                The Lion King       987480140
## 25                              Despicable Me 2       974873764
## 26        Harry Potter and the SorcererÕs Stone       974755371
## 27     Pirates of the Caribbean: At World's End       963420425
## 28          The Hobbit: The Desolation of Smaug       960366855
## 29 Harry Potter and the Deathly Hallows: Part I       959301070
## 30    The Hobbit: The Battle of the Five Armies       955119788
##    boxoffice.domestic boxoffice.international dvd.domestic    budget
## 20          474544677               552500000           NA 115000000
## 21          334191110               691300000     82297116 200000000
## 22          303003568               714000000     34191124 250000000
## 23          533345358               469546000    282134660 185000000
## 24          422780140               564700000     86246006  79300000
## 25          368065385               606808379    124381409  76000000
## 26          317575550               657179821           NA 125000000
## 27          309420425               654000000    312228146 300000000
## 28          258366855               702000000     40247146 250000000
## 29          295001070               664300000     94385569 125000000
## 30          255119788               700000000     22439417 250000000
##    rating     genre      creative.type time year     production.method
## 20     PG Adventure    Science Fiction  133 1999 Animation/Live Action
## 21     PG Adventure            Fantasy  108 2010 Animation/Live Action
## 22  PG-13 Adventure            Fantasy  169 2012 Animation/Live Action
## 23  PG-13    Action         Super Hero  152 2008           Live Action
## 24      G Adventure       Kids Fiction   87 1994        Hand Animation
## 25     PG    Comedy       Kids Fiction   98 2013     Digital Animation
## 26     PG Adventure            Fantasy  152 2001 Animation/Live Action
## 27  PG-13 Adventure Historical Fiction  167 2007           Live Action
## 28  PG-13 Adventure            Fantasy  201 2013 Animation/Live Action
## 29  PG-13 Adventure            Fantasy  146 2010 Animation/Live Action
## 30  PG-13 Adventure            Fantasy  144 2014           Live Action
##    sequel boxoffice.domestic.inflationadj
## 20      1                       755899504
## 21      0                       354522122
## 22      1                       317749322
## 23      1                       621624466
## 24      0                       756219975
## 25      1                       378930222
## 26      0                       468612897
## 27      1                       376431535
## 28      1                       265846191
## 29      1                       312886528
## 30      1                       260520067

Q4

A. Show me all the data for Harry Potter and the Chamber of Secrets

movies [movies$name == "Harry Potter and the Chamber of Secrets", ]
##                                       name boxoffice.total
## 40 Harry Potter and the Chamber of Secrets       878979634
##    boxoffice.domestic boxoffice.international dvd.domestic    budget
## 40          261987880               616991754           NA 100000000
##    rating     genre creative.type time year     production.method sequel
## 40     PG Adventure       Fantasy  161 2002 Animation/Live Action      1
##    boxoffice.domestic.inflationadj
## 40                       376584346

B. What was the rating of Harry Potter and the Chamber of Secrets?

movies [movies$name =="Harry Potter and the Chamber of Secrets", names(movies) == "rating"]
## [1] "PG"

C. What was the boxoffice total of Harry Potter and the Deathly Hallows?

movies [movies$name =="Harry Potter and the Deathly Hallows: Part I", names(movies) == "boxoffice.total"]
## [1] 959301070

D. What was the running time of Harry Potter and the Order of the Phoenix?

movies [movies$name =="Harry Potter and the Order of the Phoenix", names(movies) == "time"]
## [1] 138

Q5

A. Sort the movies dataframe by year (with the oldest movies at the top of the dataframe).

index <- order(movies$year, decreasing = F)
movies <- movies[index,]

B. What are the names of the 5 oldest movies in the dataset?

movies$name[1:5]
## [1] "The Big Parade"                  "Snow White and the Seven Dwarfs"
## [3] "Gone with the Wind"              "The Wizard of Oz"               
## [5] "Pinocchio"

Q6

Sort the dataframe back into its original order (by boxoffice). The first movie in the dataframe should be Avatar!

index2 <- order(movies$boxoffice.total, decreasing = T)
movies <- movies[index2,]

Q7

Add a new column called “boxoffice.millions” that has the box-office values in millions of dollars. For example, a value of 1000000 in the original boxoffice vector should be 1 in boxoffice.millions

movies$boxoffice.millions <- movies$boxoffice.total/1000000

Q8

A. What is the mean box-office totals of all movies?

mean(movies[,names(movies) == "boxoffice.total"])
## [1] 98205350

B. What is the median running length of the movies?

median(movies[,names(movies) == "time"], na.rm=T)
## [1] 108