Choose six recent popular movies. Ask at least five people that you know (friends, family, classmates, imaginary friends) to rate each of these movie that they have seen on a scale of 1 to 5. Take the results (observations) and store them in a SQL database. Load the information into an R dataframe.
Your deliverables should include your SQL scripts and your R Markdown code, posted to GitHub.
This is by design a very open ended assignment. A variety of reasonable approaches are acceptable. You can (and should) blank out your SQL password if your solution requires it; otherwise, full credit requires that your code is “reproducible,” with the assumption that I have the same database server and R software.
You may work in a small group on this assignment. If you work in a group, each group member should indicate who they worked with, and all group members should individually submit their week 2 assignment.
Please start early, and do work that you would want to include in a “presentations portfolio” that you might share in a job interview with a potential employer! You are encouraged to share thoughts, ask, and answer clarifying questions in the “Week 2: R and SQL” forum.
library(RMySQL)
## Loading required package: DBI
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
##
## Attaching package: 'RSQLite'
## The following object is masked from 'package:RMySQL':
##
## isIdCurrent
## sqldf will default to using MySQL
mydb = dbConnect(MySQL(), user='root', password='root', dbname='movie_ratings', host='localhost')
dbListTables(mydb)
## [1] "actor_info" "director_info" "genre_info" "movie_actor_map" "movie_genre_map" "movie_info"
#"movie_info"
#"genre_info"
#"director_info"
#"actor_info"
#"movie_actor_map"
#"movie_genre_map"
movie_info_rs = dbSendQuery(mydb, "select * from movie_info")
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as numeric
movie_info_df = fetch(movie_info_rs, n=-1)
genre_info_rs = dbSendQuery(mydb, "select * from genre_info")
genre_info_df = fetch(genre_info_rs, n=-1)
director_info_rs = dbSendQuery(mydb, "select * from director_info")
director_info_df = fetch(director_info_rs, n=-1)
actor_info_rs = dbSendQuery(mydb, "select * from actor_info")
actor_info_df = fetch(actor_info_rs, n=-1)
movie_actor_map_rs = dbSendQuery(mydb, "select * from movie_actor_map")
movie_actor_map_df = fetch(movie_actor_map_rs, n=-1)
movie_genre_map_rs = dbSendQuery(mydb, "select * from movie_genre_map")
movie_genre_map_df = fetch(movie_genre_map_rs, n=-1)
print(movie_info_df)
## ID TITLE DIRECTOR_ID RELEASE_DATE MINUTES RATINGS GROSS_INCOME
## 1 1 War Dogs 1 2016-08-09 114 3 39.80
## 2 2 The Light Between Oceans 2 2016-09-02 133 5 7.61
## 3 3 Hell or High Water (II) 3 2016-08-02 102 4 19.82
## 4 4 Sully 4 2016-09-09 96 5 35.51
## 5 5 The Legend of Tarzan 5 2016-06-30 110 3 125.90
## 6 6 Don't Breathe 6 2016-08-25 88 2 66.83
## 7 7 Mechanic Resurrection 7 2016-08-26 98 2 16.95
## 8 8 Jason Bourne 8 2016-07-29 123 3 156.98
## 9 9 Now You See Me 2 9 2016-06-10 129 4 65.03
## 10 10 Morgan 10 2016-09-02 92 1 0.00
## 11 11 Suicide Squad 11 2016-08-05 123 4 307.41
## 12 12 Star Trek Beyond 12 2016-07-22 122 5 150.89
## 13 13 Ghostbusters 13 2016-07-11 116 3 124.96
## 14 14 Hands of Stone 14 2016-08-26 111 4 1.75
## 15 15 Greater 15 2016-08-26 130 5 0.63
print(genre_info_df)
## ID NAME
## 1 1 Action
## 2 2 Adventure
## 3 3 Animation
## 4 4 Biography
## 5 5 Comedy
## 6 6 Crime
## 7 7 Drama
## 8 8 Family
## 9 9 Fantasy
## 10 10 Film-Noir
## 11 11 History
## 12 12 Horror
## 13 13 Music
## 14 14 Musical
## 15 15 Mystery
## 16 16 Romance
## 17 17 Sci-Fi
## 18 18 Sport
## 19 19 Thriller
## 20 20 War
## 21 21 Western
print(director_info_df)
## ID NAME
## 1 1 Todd Phillips
## 2 2 Derek Cianfrance
## 3 3 David Mackenzie
## 4 4 Clint Eastwood
## 5 5 David Yates
## 6 6 Fede Alvarez
## 7 7 Dennis Gansel
## 8 8 Paul Greengrass
## 9 9 Jon M. Chu
## 10 10 Luke Scott
## 11 11 David Ayer
## 12 12 Justin Lin
## 13 13 Paul Feig
## 14 14 Jonathan Jakubowicz
## 15 15 David Hunt
print(actor_info_df)
## ID NAME
## 1 1 Aaron Eckhart
## 2 2 Alexander Skarsgård
## 3 3 Alicia Vikander
## 4 4 Anya Taylor-Joy
## 5 5 Ben Foster
## 6 6 Chris Pine
## 7 7 Christian Stevens
## 8 8 Christoph Waltz
## 9 9 Christopher Severio
## 10 10 Dale Dickey
## 11 11 Daniel Zovatto
## 12 12 Dave Franco
## 13 13 Dylan Minnette
## 14 14 Edgar Ramírez
## 15 15 Florence Clery
## 16 16 Gregg Weiner
## 17 17 Jane Levy
## 18 18 Jared Leto
## 19 19 Jason Statham
## 20 20 Jesse Eisenberg
## 21 21 Jessica Alba
## 22 22 Jonah Hill
## 23 23 Karl Urban
## 24 24 Kate Mara
## 25 25 Kate McKinnon
## 26 26 Kristen Wiig
## 27 27 Laura Linney
## 28 28 Leslie Easterbrook
## 29 29 Leslie Jones
## 30 30 Margot Robbie
## 31 31 Mark Ruffalo
## 32 32 Matt Damon
## 33 33 Melissa McCarthy
## 34 34 Michael Fassbender
## 35 35 Michael Parks
## 36 36 Michael Yare
## 37 37 Michelle Yeoh
## 38 38 Miles Teller
## 39 39 Neal McDonough
## 40 40 Rachel Weisz
## 41 41 Robert De Niro
## 42 42 Rory J. Saper
## 43 43 Rose Leslie
## 44 44 Rubén Blades
## 45 45 Stephen Lang
## 46 46 Steve Lantz
## 47 47 Tom Hanks
## 48 48 Tommy Lee Jones
## 49 49 Usher Raymond
## 50 50 Valerie Mahaffey
## 51 51 Vincent Cassel
## 52 52 Viola Davis
## 53 53 Will Smith
## 54 54 William Sterchi
## 55 55 Woody Harrelson
## 56 56 Zachary Quinto
## 57 57 Zoe Saldana
print(movie_actor_map_df)
## MOVIE_ID ACTOR_ID
## 1 1 22
## 2 1 38
## 3 1 46
## 4 1 16
## 5 2 34
## 6 2 3
## 7 2 40
## 8 2 15
## 9 3 10
## 10 3 5
## 11 3 6
## 12 3 54
## 13 4 47
## 14 4 1
## 15 4 27
## 16 4 50
## 17 5 2
## 18 5 42
## 19 5 7
## 20 5 8
## 21 6 45
## 22 6 17
## 23 6 13
## 24 6 11
## 25 7 19
## 26 7 21
## 27 7 48
## 28 7 37
## 29 8 32
## 30 8 48
## 31 8 3
## 32 8 51
## 33 9 20
## 34 9 31
## 35 9 55
## 36 9 12
## 37 10 24
## 38 10 4
## 39 10 43
## 40 10 36
## 41 11 53
## 42 11 18
## 43 11 30
## 44 11 52
## 45 12 6
## 46 12 56
## 47 12 23
## 48 12 57
## 49 13 33
## 50 13 26
## 51 13 25
## 52 13 29
## 53 14 14
## 54 14 49
## 55 14 41
## 56 14 44
## 57 15 9
## 58 15 39
## 59 15 28
## 60 15 35
print(movie_genre_map_df)
## MOVIE_ID GENRE_ID
## 1 1 5
## 2 1 6
## 3 1 7
## 4 2 7
## 5 2 16
## 6 3 6
## 7 3 7
## 8 4 4
## 9 4 7
## 10 5 1
## 11 5 2
## 12 5 7
## 13 6 12
## 14 6 19
## 15 7 1
## 16 7 6
## 17 7 19
## 18 8 1
## 19 8 19
## 20 9 1
## 21 9 2
## 22 9 5
## 23 10 12
## 24 10 12
## 25 10 17
## 26 11 1
## 27 11 2
## 28 11 6
## 29 12 1
## 30 12 2
## 31 13 5
## 32 13 9
## 33 14 1
## 34 14 4
## 35 14 7
## 36 15 4
## 37 15 8
## 38 15 18
movie_actor_map_df = sqldf("select * from movie_info_df a, actor_info_df b, movie_actor_map_df c
where a.id = c.movie_id and b.id = c.actor_id and a.title = 'War Dogs'
")
## Loading required package: tcltk
print(movie_actor_map_df)
## ID TITLE DIRECTOR_ID RELEASE_DATE MINUTES RATINGS GROSS_INCOME ID NAME MOVIE_ID ACTOR_ID
## 1 1 War Dogs 1 2016-08-09 114 3 39.8 22 Jonah Hill 1 22
## 2 1 War Dogs 1 2016-08-09 114 3 39.8 38 Miles Teller 1 38
## 3 1 War Dogs 1 2016-08-09 114 3 39.8 46 Steve Lantz 1 46
## 4 1 War Dogs 1 2016-08-09 114 3 39.8 16 Gregg Weiner 1 16
movie_ratings_df = sqldf("select * from movie_info_df a where a.ratings = 4")
print(movie_ratings_df)
## ID TITLE DIRECTOR_ID RELEASE_DATE MINUTES RATINGS GROSS_INCOME
## 1 3 Hell or High Water (II) 3 2016-08-02 102 4 19.82
## 2 9 Now You See Me 2 9 2016-06-10 129 4 65.03
## 3 11 Suicide Squad 11 2016-08-05 123 4 307.41
## 4 14 Hands of Stone 14 2016-08-26 111 4 1.75
movie_genre_map_df = sqldf("select * from movie_info_df a, genre_info_df b, movie_genre_map_df c
where a.id = c.movie_id and b.id = c.genre_id and b.name = 'Action'
")
print(movie_genre_map_df)
## ID TITLE DIRECTOR_ID RELEASE_DATE MINUTES RATINGS GROSS_INCOME ID NAME MOVIE_ID GENRE_ID
## 1 5 The Legend of Tarzan 5 2016-06-30 110 3 125.90 1 Action 5 1
## 2 7 Mechanic Resurrection 7 2016-08-26 98 2 16.95 1 Action 7 1
## 3 8 Jason Bourne 8 2016-07-29 123 3 156.98 1 Action 8 1
## 4 9 Now You See Me 2 9 2016-06-10 129 4 65.03 1 Action 9 1
## 5 11 Suicide Squad 11 2016-08-05 123 4 307.41 1 Action 11 1
## 6 12 Star Trek Beyond 12 2016-07-22 122 5 150.89 1 Action 12 1
## 7 14 Hands of Stone 14 2016-08-26 111 4 1.75 1 Action 14 1