Q1

Here are the vectors from last week’s WPA.Using these vectors, create a dataframe called (short for movies.sample) that contain the vectors.

m.names <- c("Baramgwa hamjje sarajida", "Sleepless in Seattle", "The Water Diviner", 
"Fly Away Home", "The Three Musketeers", "Candyman: Farewell to Flesh", 
"Honey I Blew Up the Kid", "Kingsman: The Secret Service", "Ajab Prem Ki Ghazab Kahani", 
"A Bug's Life", "Courage Under Fire", "Dirty Pretty Things", 
"In the Name of the Father", "Soul Plane", "Magnum Force", "About Time", 
"House of Sand and Fog", "Bokura ga ita Zenpen", "Jackass 3D", 
"Tropic Thunder - A Pirate's Tale")

boxoffice <- c(28686545, 218076024, 30864649, 35870837, 50375628, 13899536, 
58662452, 404561724, 15906411, 363089431, 100748262, 14156753, 
25096862, 14553807, 44680473, 89177486, 16157923, 26324268, 171685793, 
191091250)

genre <- c("Action", "Romantic Comedy", "Drama", "Drama", "Adventure", 
"Horror", "Comedy", "Action", "Comedy", "Adventure", "Drama", 
"Drama", "Drama", "Comedy", "Action", "Romantic Comedy", "Drama", 
"Drama", "Comedy", "Comedy")

time <- c(121, 100, 112, NA, NA, NA, NA, 129, NA, 96, 111, NA, NA, NA, 
NA, 123, NA, 121, 93, 106)

rating <- c(NA, "PG", "R", "PG", "PG", "R", "PG", "R", NA, "G", "R", 
"R", "R", "R", NA, "R", "R", NA, "R", "R")

With the help of these vectors we are able to creat the sample data frame.

movies.sample <- cbind(m.names, boxoffice, genre, time, rating )
movies.sample
##       m.names                            boxoffice   genre            
##  [1,] "Baramgwa hamjje sarajida"         "28686545"  "Action"         
##  [2,] "Sleepless in Seattle"             "218076024" "Romantic Comedy"
##  [3,] "The Water Diviner"                "30864649"  "Drama"          
##  [4,] "Fly Away Home"                    "35870837"  "Drama"          
##  [5,] "The Three Musketeers"             "50375628"  "Adventure"      
##  [6,] "Candyman: Farewell to Flesh"      "13899536"  "Horror"         
##  [7,] "Honey I Blew Up the Kid"          "58662452"  "Comedy"         
##  [8,] "Kingsman: The Secret Service"     "404561724" "Action"         
##  [9,] "Ajab Prem Ki Ghazab Kahani"       "15906411"  "Comedy"         
## [10,] "A Bug's Life"                     "363089431" "Adventure"      
## [11,] "Courage Under Fire"               "100748262" "Drama"          
## [12,] "Dirty Pretty Things"              "14156753"  "Drama"          
## [13,] "In the Name of the Father"        "25096862"  "Drama"          
## [14,] "Soul Plane"                       "14553807"  "Comedy"         
## [15,] "Magnum Force"                     "44680473"  "Action"         
## [16,] "About Time"                       "89177486"  "Romantic Comedy"
## [17,] "House of Sand and Fog"            "16157923"  "Drama"          
## [18,] "Bokura ga ita Zenpen"             "26324268"  "Drama"          
## [19,] "Jackass 3D"                       "171685793" "Comedy"         
## [20,] "Tropic Thunder - A Pirate's Tale" "191091250" "Comedy"         
##       time  rating
##  [1,] "121" NA    
##  [2,] "100" "PG"  
##  [3,] "112" "R"   
##  [4,] NA    "PG"  
##  [5,] NA    "PG"  
##  [6,] NA    "R"   
##  [7,] NA    "PG"  
##  [8,] "129" "R"   
##  [9,] NA    NA    
## [10,] "96"  "G"   
## [11,] "111" "R"   
## [12,] NA    "R"   
## [13,] NA    "R"   
## [14,] NA    "R"   
## [15,] NA    NA    
## [16,] "123" "R"   
## [17,] NA    "R"   
## [18,] "121" NA    
## [19,] "93"  "R"   
## [20,] "106" "R"
movies <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/11/movies.txt",
sep = "\t",
header = T,
stringsAsFactors = F)

Q2

How many rows and columns are in the movies dataset?

dim(movies)
## [1] 5000   14

What are the column names?

names(movies)
##  [1] "name"                            "boxoffice.total"                
##  [3] "boxoffice.domestic"              "boxoffice.international"        
##  [5] "dvd.domestic"                    "budget"                         
##  [7] "rating"                          "genre"                          
##  [9] "creative.type"                   "time"                           
## [11] "year"                            "production.method"              
## [13] "sequel"                          "boxoffice.domestic.inflationadj"

Q3

Look at the first few rows of the dataset using the head() function.

head(movies)
##                          name boxoffice.total boxoffice.domestic
## 1                      Avatar      2783918982          760507625
## 2                     Titanic      2207615668          658672302
## 3              Jurassic World      1665443635          651443635
## 4                The Avengers      1519479547          623279547
## 5                   Furious 7      1516246709          351032910
## 6 The Avengers: Age of Ultron      1404705868          459005868
##   boxoffice.international dvd.domestic    budget rating             genre
## 1              2023411357    230915507 425000000  PG-13            Action
## 2              1548943366           NA 200000000  PG-13 Thriller/Suspense
## 3              1014000000           NA 215000000  PG-13            Action
## 4               896200000    109515497 225000000  PG-13         Adventure
## 5              1165213799     14947559 190000000  PG-13            Action
## 6               945700000      7312791 250000000  PG-13            Action
##          creative.type time year     production.method sequel
## 1      Science Fiction  162 2009 Animation/Live Action      0
## 2   Historical Fiction  194 1997           Live Action      0
## 3      Science Fiction  124 2015           Live Action      1
## 4           Super Hero  143 2012 Animation/Live Action      0
## 5 Contemporary Fiction  137 2014           Live Action      1
## 6           Super Hero  141 2015           Live Action      1
##   boxoffice.domestic.inflationadj
## 1                       826198130
## 2                      1139182838
## 3                       651443635
## 4                       655383136
## 5                       351032910
## 6                       459005868

Look at the last few rows using the tail() function.

tail(movies)
##                               name boxoffice.total boxoffice.domestic
## 4995                      Idlewild        12571185           12571185
## 4996 Hellraiser III: Hell on Earth        12525537           12525537
## 4997                 Sorority Boys        12516222           10198766
## 4998                    Love Jones        12514138           12514138
## 4999                         Klute        12512637           12512637
## 5000        Breakin' All the Rules        12512317           12232382
##      boxoffice.international dvd.domestic   budget rating           genre
## 4995                       0      8753109 15000000      R         Musical
## 4996                       0           NA        0      R          Horror
## 4997                 2317456           NA 12000000      R          Comedy
## 4998                       0           NA 10000000      R           Drama
## 4999                       0           NA        0   <NA>            <NA>
## 5000                  279935           NA  9000000  PG-13 Romantic Comedy
##             creative.type time year production.method sequel
## 4995   Historical Fiction   NA 2006       Live Action      0
## 4996              Fantasy   NA 1992       Live Action      1
## 4997 Contemporary Fiction   NA 2002       Live Action      0
## 4998 Contemporary Fiction   NA 1997       Live Action      0
## 4999                 <NA>   NA 1971              <NA>      0
## 5000 Contemporary Fiction   NA 2004       Live Action      0
##      boxoffice.domestic.inflationadj
## 4995                        16064248
## 4996                        12525537
## 4997                        14692539
## 4998                        22819901
## 4999                        63473192
## 5000                        16487126

Look at rows 20 through 30.

movies[c(20:30),]
##                                            name boxoffice.total
## 20          Star Wars Ep. I: The Phantom Menace      1027044677
## 21                          Alice in Wonderland      1025491110
## 22            The Hobbit: An Unexpected Journey      1017003568
## 23                              The Dark Knight      1002891358
## 24                                The Lion King       987480140
## 25                              Despicable Me 2       974873764
## 26     Harry Potter and the Sorcerer\xd5s Stone       974755371
## 27     Pirates of the Caribbean: At World's End       963420425
## 28          The Hobbit: The Desolation of Smaug       960366855
## 29 Harry Potter and the Deathly Hallows: Part I       959301070
## 30    The Hobbit: The Battle of the Five Armies       955119788
##    boxoffice.domestic boxoffice.international dvd.domestic    budget
## 20          474544677               552500000           NA 115000000
## 21          334191110               691300000     82297116 200000000
## 22          303003568               714000000     34191124 250000000
## 23          533345358               469546000    282134660 185000000
## 24          422780140               564700000     86246006  79300000
## 25          368065385               606808379    124381409  76000000
## 26          317575550               657179821           NA 125000000
## 27          309420425               654000000    312228146 300000000
## 28          258366855               702000000     40247146 250000000
## 29          295001070               664300000     94385569 125000000
## 30          255119788               700000000     22439417 250000000
##    rating     genre      creative.type time year     production.method
## 20     PG Adventure    Science Fiction  133 1999 Animation/Live Action
## 21     PG Adventure            Fantasy  108 2010 Animation/Live Action
## 22  PG-13 Adventure            Fantasy  169 2012 Animation/Live Action
## 23  PG-13    Action         Super Hero  152 2008           Live Action
## 24      G Adventure       Kids Fiction   87 1994        Hand Animation
## 25     PG    Comedy       Kids Fiction   98 2013     Digital Animation
## 26     PG Adventure            Fantasy  152 2001 Animation/Live Action
## 27  PG-13 Adventure Historical Fiction  167 2007           Live Action
## 28  PG-13 Adventure            Fantasy  201 2013 Animation/Live Action
## 29  PG-13 Adventure            Fantasy  146 2010 Animation/Live Action
## 30  PG-13 Adventure            Fantasy  144 2014           Live Action
##    sequel boxoffice.domestic.inflationadj
## 20      1                       755899504
## 21      0                       354522122
## 22      1                       317749322
## 23      1                       621624466
## 24      0                       756219975
## 25      1                       378930222
## 26      0                       468612897
## 27      1                       376431535
## 28      1                       265846191
## 29      1                       312886528
## 30      1                       260520067

Q4

Show me all the data for Harry Potter and the Chamber of Secrets.

movies[movies$name ==  "Harry Potter and the Chamber of Secrets",]
##                                       name boxoffice.total
## 40 Harry Potter and the Chamber of Secrets       878979634
##    boxoffice.domestic boxoffice.international dvd.domestic    budget
## 40          261987880               616991754           NA 100000000
##    rating     genre creative.type time year     production.method sequel
## 40     PG Adventure       Fantasy  161 2002 Animation/Live Action      1
##    boxoffice.domestic.inflationadj
## 40                       376584346

What was the rating of Harry Potter and the Chamber of Secrets?

movies[movies$name ==  "Harry Potter and the Chamber of Secrets", names(movies) == "rating"]
## [1] "PG"

What was the boxoffice total of Harry Potter and the Deathly Hallows?

movies[movies$name ==  "Harry Potter and the Deathly Hallows: Part I", names(movies) == "boxoffice.total"]
## [1] 959301070

What was the running time of Harry Potter and the Order of the Phoenix?

movies[movies$name ==  "Harry Potter and the Order of the Phoenix", names(movies) == "time"]
## [1] 138

Q5

It’s easy to sort rows in a dataframe according to a column: just use the following code structure.

Using this logic, sort the movies dataframe by year (with the oldest movies at the top of the dataframe).

index <- order(movies$year, decreasing = F)
movies.order.year <- movies[index,]
head(movies.order.year)
##                                 name boxoffice.total boxoffice.domestic
## 3798                  The Big Parade        22000000           11000000
## 682  Snow White and the Seven Dwarfs       184925485          184925485
## 217               Gone with the Wind       390525192          198680470
## 2918                The Wizard of Oz        33711566           33711566
## 1490                       Pinocchio        84300000           84300000
## 1504                        Fantasia        83320000           83320000
##      boxoffice.international dvd.domestic  budget rating     genre
## 3798                11000000           NA  245000   <NA>     Drama
## 682                        0           NA 1488000      G   Musical
## 217                191844722           NA 3900000      G     Drama
## 2918                       0           NA 2777000  PG-13   Musical
## 1490                       0           NA       0      G Adventure
## 1504                       0           NA 2280000      G   Musical
##                creative.type time year production.method sequel
## 3798                    <NA>   NA 1925       Live Action      0
## 682                  Fantasy   83 1937    Hand Animation      0
## 217       Historical Fiction  220 1939       Live Action      0
## 2918                 Fantasy  101 1939       Live Action      0
## 1490            Kids Fiction   NA 1940    Hand Animation      0
## 1504 Multiple Creative Types   NA 1940    Hand Animation      0
##      boxoffice.domestic.inflationadj
## 3798                               0
## 682                       6729679608
## 217                       2033987841
## 2918                      1031987972
## 1490                      2927763484
## 1504                      2893727800

What are the names of the 5 oldest movies in the dataset?

movies.order.year$name[c(1:5)]
## [1] "The Big Parade"                  "Snow White and the Seven Dwarfs"
## [3] "Gone with the Wind"              "The Wizard of Oz"               
## [5] "Pinocchio"

Q6

Now, sort the dataframe back into its original order (by boxoffice). The first movie in the dataframe should be Avatar!

index2 <- order(movies$boxoffice.total, decreasing = T)
movies.order.boxoffice.total <- movies[index2,]
head(movies.order.boxoffice.total)
##                          name boxoffice.total boxoffice.domestic
## 1                      Avatar      2783918982          760507625
## 2                     Titanic      2207615668          658672302
## 3              Jurassic World      1665443635          651443635
## 4                The Avengers      1519479547          623279547
## 5                   Furious 7      1516246709          351032910
## 6 The Avengers: Age of Ultron      1404705868          459005868
##   boxoffice.international dvd.domestic    budget rating             genre
## 1              2023411357    230915507 425000000  PG-13            Action
## 2              1548943366           NA 200000000  PG-13 Thriller/Suspense
## 3              1014000000           NA 215000000  PG-13            Action
## 4               896200000    109515497 225000000  PG-13         Adventure
## 5              1165213799     14947559 190000000  PG-13            Action
## 6               945700000      7312791 250000000  PG-13            Action
##          creative.type time year     production.method sequel
## 1      Science Fiction  162 2009 Animation/Live Action      0
## 2   Historical Fiction  194 1997           Live Action      0
## 3      Science Fiction  124 2015           Live Action      1
## 4           Super Hero  143 2012 Animation/Live Action      0
## 5 Contemporary Fiction  137 2014           Live Action      1
## 6           Super Hero  141 2015           Live Action      1
##   boxoffice.domestic.inflationadj
## 1                       826198130
## 2                      1139182838
## 3                       651443635
## 4                       655383136
## 5                       351032910
## 6                       459005868

Q7

Add a new column called “boxoffice.millions” that has the box-office values in millions of dollars. For example, a value of 1000000 in the original boxoffice vector should be 1 in boxoffice.millions

boxoffice.millions <- movies$boxoffice.total / 1000000
movies2 <- cbind(movies, boxoffice.millions)
head(movies2)
##                          name boxoffice.total boxoffice.domestic
## 1                      Avatar      2783918982          760507625
## 2                     Titanic      2207615668          658672302
## 3              Jurassic World      1665443635          651443635
## 4                The Avengers      1519479547          623279547
## 5                   Furious 7      1516246709          351032910
## 6 The Avengers: Age of Ultron      1404705868          459005868
##   boxoffice.international dvd.domestic    budget rating             genre
## 1              2023411357    230915507 425000000  PG-13            Action
## 2              1548943366           NA 200000000  PG-13 Thriller/Suspense
## 3              1014000000           NA 215000000  PG-13            Action
## 4               896200000    109515497 225000000  PG-13         Adventure
## 5              1165213799     14947559 190000000  PG-13            Action
## 6               945700000      7312791 250000000  PG-13            Action
##          creative.type time year     production.method sequel
## 1      Science Fiction  162 2009 Animation/Live Action      0
## 2   Historical Fiction  194 1997           Live Action      0
## 3      Science Fiction  124 2015           Live Action      1
## 4           Super Hero  143 2012 Animation/Live Action      0
## 5 Contemporary Fiction  137 2014           Live Action      1
## 6           Super Hero  141 2015           Live Action      1
##   boxoffice.domestic.inflationadj boxoffice.millions
## 1                       826198130           2783.919
## 2                      1139182838           2207.616
## 3                       651443635           1665.444
## 4                       655383136           1519.480
## 5                       351032910           1516.247
## 6                       459005868           1404.706

Q8

What is the mean box-office totals of all movies?

median(movies$time, na.rm = T)
## [1] 108

What is the median running length of the movies?