# Always print this out before your assignment
getwd()
## [1] "/Users/ripley/Desktop"
# load all your libraries in this chunk
library("tidyverse")
# note, do not run install.packages() inside a code chunk. install
# them in the console outside of a code chunk.
1a) Here is my response to quetsion 1 a)
# got it!\U{01fae1}
1b) Response to part b.
# cool!
etc…
2a) Response to part a.
getwd()
## [1] "/Users/ripley/Desktop"
# this is the working directory. it tells you which folders on my
# laptop it is finding information from.
2b) Response to part b.
movies <- read_csv("/Users/ripley/Desktop/MGSC 310/datasets/IMDB_movies.csv")
2c) Response to part c.
dim(movies)
## [1] 3889 25
nrow(movies)
## [1] 3889
ncol(movies)
## [1] 25
2d) Response to part d.
names(movies)
## [1] "movie_title" "director_name"
## [3] "gross" "budget"
## [5] "country" "title_year"
## [7] "imdb_score" "language"
## [9] "duration" "genres"
## [11] "content_rating" "aspect_ratio"
## [13] "color" "plot_keywords"
## [15] "movie_facebook_likes" "director_facebook_likes"
## [17] "cast_total_facebook_likes" "facenumber_in_poster"
## [19] "actor_1_facebook_likes" "actor_1_name"
## [21] "actor_2_facebook_likes" "actor_2_name"
## [23] "num_user_for_reviews" "num_critic_for_reviews"
## [25] "num_voted_users"
2e) Response to part e.
glimpse(movies)
## Rows: 3,889
## Columns: 25
## $ movie_title <chr> "Avatar", "Pirates of the Caribbea…
## $ director_name <chr> "James Cameron", "Gore Verbinski",…
## $ gross <dbl> 760505847, 309404152, 200074175, 4…
## $ budget <dbl> 237000000, 300000000, 245000000, 2…
## $ country <chr> "USA", "USA", "UK", "USA", "USA", …
## $ title_year <dbl> 2009, 2007, 2015, 2012, 2012, 2007…
## $ imdb_score <dbl> 7.9, 7.1, 6.8, 8.5, 6.6, 6.2, 7.8,…
## $ language <chr> "English", "English", "English", "…
## $ duration <dbl> 178, 169, 148, 164, 132, 156, 100,…
## $ genres <chr> "Action|Adventure|Fantasy|Sci-Fi",…
## $ content_rating <chr> "PG-13", "PG-13", "PG-13", "PG-13"…
## $ aspect_ratio <dbl> 1.78, 2.35, 2.35, 2.35, 2.35, 2.35…
## $ color <chr> "Color", "Color", "Color", "Color"…
## $ plot_keywords <chr> "avatar|future|marine|native|parap…
## $ movie_facebook_likes <dbl> 33000, 0, 85000, 164000, 24000, 0,…
## $ director_facebook_likes <dbl> 0, 563, 0, 22000, 475, 0, 15, 0, 2…
## $ cast_total_facebook_likes <dbl> 4834, 48350, 11700, 106759, 1873, …
## $ facenumber_in_poster <dbl> 0, 0, 1, 0, 1, 0, 1, 4, 3, 0, 0, 1…
## $ actor_1_facebook_likes <dbl> 1000, 40000, 11000, 27000, 640, 24…
## $ actor_1_name <chr> "CCH Pounder", "Johnny Depp", "Chr…
## $ actor_2_facebook_likes <dbl> 936, 5000, 393, 23000, 632, 11000,…
## $ actor_2_name <chr> "Joel David Moore", "Orlando Bloom…
## $ num_user_for_reviews <dbl> 3054, 1238, 994, 2701, 738, 1902, …
## $ num_critic_for_reviews <dbl> 723, 302, 602, 813, 462, 392, 324,…
## $ num_voted_users <dbl> 886204, 471220, 275868, 1144337, 2…
# with glimpse, the second column shown tells you. if it says '<chr'>
# then it is a character variable and if it says '<dbl>' then it is a
# double (floating point number) type.
2f) Response to part f.
movies %>%
slice(1:20)
2g) Response to part g.
movies %>%
arrange(desc(gross)) %>%
slice(1:10)
# only one director has multiple movies in the top 10: Katsuhiro
# Ôtomo.
2h) Response to part h.
movies %>%
arrange(desc(budget)) %>%
slice(1:20)
# this list is practically the exact same! this might show a
# correlation between having a larger budget and earning more money
# from it. aka the more expensive the more profitable. very
# interesting!