library(tidyverse)
library(openintro)
library(RMySQL)
Connecting to Database and Loading Data
First, connect to the database and see what tables there are.
# Connect to the MySQL database
mydb = dbConnect(MySQL(),
user= rstudioapi::askForPassword("Database user"),
password= rstudioapi::askForPassword("Database password"),
dbname='movie_ratings',
host='34.123.201.162')
# Inspect what tables we have
dbListTables(mydb)
## [1] "movie_ratings" "movie_ratings_unpivot"
There is a table named movie_ratings
and named
movie_ratings_unpivot
. Load both of the tables and inspect
them.
# Load both tables
mr <- dbGetQuery(mydb, "select * from movie_ratings")
mru <- dbGetQuery(mydb, "select * from movie_ratings_unpivot")
glimpse(mr)
## Rows: 7
## Columns: 7
## $ s_name <chr> "Alvaro", "Avocado Toast", "Emily", "Emma Griffen", "…
## $ avatar <chr> "4 - GOOD", "3 - OKAY", "4 - GOOD", "5 - AMAZING", "5…
## $ inception <chr> "4 - GOOD", "1 - DISSAPOINTING", "4 - GOOD", "4 - GOO…
## $ rodents <chr> "I did not watch this movie", "I did not watch this m…
## $ harry_potter <chr> "5 - AMAZING", "4 - GOOD", "3 - OKAY", "4 - GOOD", "5…
## $ top_gun_maverick <chr> "4 - GOOD", "I did not watch this movie", "2 - BAD", …
## $ causeway <chr> "2 - BAD", "I did not watch this movie", "I did not w…
## Rows: 27
## Columns: 3
## $ s_name <chr> "Alvaro", "Alvaro", "Alvaro", "Alvaro", "Alvaro", "Avocado Toas…
## $ movie <chr> "avatar", "causeway", "harry_potter", "inception", "top_gun_mav…
## $ rating <chr> "4", "2", "5", "4", "4", "3", "4", "1", "4", "3", "4", "5", "2"…
One table mr
has columns that are the name of the
reviewer, and the name of each movie. Each row has the entries for the
name of the reviewer followed by their ratings of each movie. The second
table mru
has columns that are the name of the reviewer,
the name of the movie, and the rating of the movie - with the entries
for did not watch the movie removed.
Basic data analysis
Based on the summary, the numbers in mru
are stored as
character strings. Convert them to integers.
# Convert ratings to integer
mru$rating <- as.integer(mru$rating)
Get a basic idea for how many ratings each movie got and how each
movie was rated.
ggplot(mru, aes(movie, rating)) +
geom_point() +
ggtitle("Ratings for Movies") +
xlab("Movie") +
ylab("Ratings") +
scale_x_discrete(labels=c("avatar" = "Avatar",
"causeway" = "Causeway",
"harry_potter" = "Harry Potter and the Sorcerers Stone",
"inception" = "Inception",
"rodents" = "Rodents of Unusual Size",
"top_gun_maverick" = "Top Gun: Maverick")) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

From the plot, it is clear that every movie was at least rated one
time.
Summarize into the mean movie rating.
mru_summary <-
mru %>%
group_by(movie) %>%
summarize(avg_rating=mean(rating))
Make a barchart of the mean ratings of each movie.
ggplot(data=mru_summary, aes(x=reorder(movie,-avg_rating), y=avg_rating)) +
geom_bar(stat="identity") +
ggtitle("Average Ratings for Movies") +
xlab("Movie") +
ylab("Average Rating") +
scale_x_discrete(labels=c("avatar" = "Avatar",
"causeway" = "Causeway",
"harry_potter" = "Harry Potter and the Sorcerers Stone",
"inception" = "Inception",
"rodents" = "Rodents of Unusual Size",
"top_gun_maverick" = "Top Gun: Maverick")) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
This barchart is not perfect in that, some of the averages were only
made up of one persons rating - however as a preliminary look it is
alright as long as this caveat is noted.
Additional Work
To extend this work, in the future if the participants rated more
movies I think it could make sense to normalize their ratings
Additionally in the future code could be built to recommend certain
movies to people who did not view them already.
LS0tDQp0aXRsZTogIlNRTCBhbmQgUiINCmF1dGhvcjogIktheWxlYWggR3JpZmZlbiINCmRhdGU6ICJgciBTeXMuRGF0ZSgpYCINCm91dHB1dDogb3BlbmludHJvOjpsYWJfcmVwb3J0DQotLS0NCg0KYGBge3IgbG9hZC1wYWNrYWdlcywgbWVzc2FnZT1GQUxTRX0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShvcGVuaW50cm8pDQpsaWJyYXJ5KFJNeVNRTCkNCmBgYA0KDQojIyMgQ29ubmVjdGluZyB0byBEYXRhYmFzZSBhbmQgTG9hZGluZyBEYXRhDQoNCkZpcnN0LCBjb25uZWN0IHRvIHRoZSBkYXRhYmFzZSBhbmQgc2VlIHdoYXQgdGFibGVzIHRoZXJlIGFyZS4NCmBgYHtyIGxvYWQtZGF0YWJhc2V9DQojIENvbm5lY3QgdG8gdGhlIE15U1FMIGRhdGFiYXNlDQoNCm15ZGIgPSBkYkNvbm5lY3QoTXlTUUwoKSwNCiAgICAgICAgICAgICAgICAgdXNlcj0gcnN0dWRpb2FwaTo6YXNrRm9yUGFzc3dvcmQoIkRhdGFiYXNlIHVzZXIiKSwNCiAgICAgICAgICAgICAgICAgcGFzc3dvcmQ9IHJzdHVkaW9hcGk6OmFza0ZvclBhc3N3b3JkKCJEYXRhYmFzZSBwYXNzd29yZCIpLA0KICAgICAgICAgICAgICAgICBkYm5hbWU9J21vdmllX3JhdGluZ3MnLA0KICAgICAgICAgICAgICAgICBob3N0PSczNC4xMjMuMjAxLjE2MicpDQojIEluc3BlY3Qgd2hhdCB0YWJsZXMgd2UgaGF2ZQ0KZGJMaXN0VGFibGVzKG15ZGIpDQpgYGANClRoZXJlIGlzIGEgdGFibGUgbmFtZWQgYG1vdmllX3JhdGluZ3NgIGFuZCBuYW1lZCBgbW92aWVfcmF0aW5nc191bnBpdm90YC4gTG9hZCBib3RoIG9mIHRoZSB0YWJsZXMgYW5kIGluc3BlY3QgdGhlbS4NCg0KYGBge3IgbG9hZC1pbnNwZWN0fQ0KIyBMb2FkIGJvdGggdGFibGVzDQptciA8LSBkYkdldFF1ZXJ5KG15ZGIsICJzZWxlY3QgKiBmcm9tIG1vdmllX3JhdGluZ3MiKQ0KbXJ1IDwtIGRiR2V0UXVlcnkobXlkYiwgInNlbGVjdCAqIGZyb20gbW92aWVfcmF0aW5nc191bnBpdm90IikNCmdsaW1wc2UobXIpDQpnbGltcHNlKG1ydSkNCmBgYA0KT25lIHRhYmxlIGBtcmAgaGFzIGNvbHVtbnMgdGhhdCBhcmUgdGhlIG5hbWUgb2YgdGhlIHJldmlld2VyLCBhbmQgdGhlIG5hbWUgb2YgZWFjaCBtb3ZpZS4gRWFjaCByb3cgaGFzIHRoZSBlbnRyaWVzIGZvciB0aGUgbmFtZSBvZiB0aGUgcmV2aWV3ZXIgZm9sbG93ZWQgYnkgdGhlaXIgcmF0aW5ncyBvZiBlYWNoIG1vdmllLiBUaGUgc2Vjb25kIHRhYmxlIGBtcnVgIGhhcyBjb2x1bW5zIHRoYXQgYXJlIHRoZSBuYW1lIG9mIHRoZSByZXZpZXdlciwgdGhlIG5hbWUgb2YgdGhlIG1vdmllLCBhbmQgdGhlIHJhdGluZyBvZiB0aGUgbW92aWUgLSB3aXRoIHRoZSBlbnRyaWVzIGZvciBkaWQgbm90IHdhdGNoIHRoZSBtb3ZpZSByZW1vdmVkLg0KDQojIyMgQmFzaWMgZGF0YSBhbmFseXNpcw0KDQpCYXNlZCBvbiB0aGUgc3VtbWFyeSwgdGhlIG51bWJlcnMgaW4gYG1ydWAgYXJlIHN0b3JlZCBhcyBjaGFyYWN0ZXIgc3RyaW5ncy4gQ29udmVydCB0aGVtIHRvIGludGVnZXJzLg0KYGBge3IgaW50ZWdlci1jb252ZXJzaW9ufQ0KIyBDb252ZXJ0IHJhdGluZ3MgdG8gaW50ZWdlcg0KbXJ1JHJhdGluZyA8LSBhcy5pbnRlZ2VyKG1ydSRyYXRpbmcpICAgIA0KYGBgDQoNCkdldCBhIGJhc2ljIGlkZWEgZm9yIGhvdyBtYW55IHJhdGluZ3MgZWFjaCBtb3ZpZSBnb3QgYW5kIGhvdyBlYWNoIG1vdmllIHdhcyByYXRlZC4NCg0KYGBge3IgZGF0YS1hbmFseXNpc30NCmdncGxvdChtcnUsIGFlcyhtb3ZpZSwgcmF0aW5nKSkgKw0KICBnZW9tX3BvaW50KCkgKw0KICBnZ3RpdGxlKCJSYXRpbmdzIGZvciBNb3ZpZXMiKSArDQogIHhsYWIoIk1vdmllIikgKyANCiAgeWxhYigiUmF0aW5ncyIpICsgDQogIHNjYWxlX3hfZGlzY3JldGUobGFiZWxzPWMoImF2YXRhciIgPSAiQXZhdGFyIiwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgImNhdXNld2F5IiA9ICJDYXVzZXdheSIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgImhhcnJ5X3BvdHRlciIgPSAiSGFycnkgUG90dGVyIGFuZCB0aGUgU29yY2VyZXJzIFN0b25lIiwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAiaW5jZXB0aW9uIiA9ICJJbmNlcHRpb24iLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICJyb2RlbnRzIiA9ICJSb2RlbnRzIG9mIFVudXN1YWwgU2l6ZSIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgInRvcF9ndW5fbWF2ZXJpY2siID0gIlRvcCBHdW46IE1hdmVyaWNrIikpICsgDQogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gOTAsIHZqdXN0ID0gMC41LCBoanVzdD0xKSkNCmBgYA0KDQpGcm9tIHRoZSBwbG90LCBpdCBpcyBjbGVhciB0aGF0IGV2ZXJ5IG1vdmllIHdhcyBhdCBsZWFzdCByYXRlZCBvbmUgdGltZS4NCg0KU3VtbWFyaXplIGludG8gdGhlIG1lYW4gbW92aWUgcmF0aW5nLg0KYGBge3Igc3VtbWFyeS1ieS1tb3ZpZX0NCm1ydV9zdW1tYXJ5IDwtDQogIG1ydSAlPiUgDQogIGdyb3VwX2J5KG1vdmllKSAlPiUgDQogIHN1bW1hcml6ZShhdmdfcmF0aW5nPW1lYW4ocmF0aW5nKSkNCmBgYA0KDQpNYWtlIGEgYmFyY2hhcnQgb2YgdGhlIG1lYW4gcmF0aW5ncyBvZiBlYWNoIG1vdmllLg0KYGBge3IgYmFyY2hhcnR9DQpnZ3Bsb3QoZGF0YT1tcnVfc3VtbWFyeSwgYWVzKHg9cmVvcmRlcihtb3ZpZSwtYXZnX3JhdGluZyksIHk9YXZnX3JhdGluZykpICsNCiAgZ2VvbV9iYXIoc3RhdD0iaWRlbnRpdHkiKSArIA0KICBnZ3RpdGxlKCJBdmVyYWdlIFJhdGluZ3MgZm9yIE1vdmllcyIpICsNCiAgeGxhYigiTW92aWUiKSArIA0KICB5bGFiKCJBdmVyYWdlIFJhdGluZyIpICsgDQogIHNjYWxlX3hfZGlzY3JldGUobGFiZWxzPWMoImF2YXRhciIgPSAiQXZhdGFyIiwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgImNhdXNld2F5IiA9ICJDYXVzZXdheSIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgImhhcnJ5X3BvdHRlciIgPSAiSGFycnkgUG90dGVyIGFuZCB0aGUgU29yY2VyZXJzIFN0b25lIiwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAiaW5jZXB0aW9uIiA9ICJJbmNlcHRpb24iLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICJyb2RlbnRzIiA9ICJSb2RlbnRzIG9mIFVudXN1YWwgU2l6ZSIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgInRvcF9ndW5fbWF2ZXJpY2siID0gIlRvcCBHdW46IE1hdmVyaWNrIikpICsgDQogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gOTAsIHZqdXN0ID0gMC41LCBoanVzdD0xKSkNCmBgYA0KVGhpcyBiYXJjaGFydCBpcyBub3QgcGVyZmVjdCBpbiB0aGF0LCBzb21lIG9mIHRoZSBhdmVyYWdlcyB3ZXJlIG9ubHkgbWFkZSB1cCBvZiBvbmUgcGVyc29ucyByYXRpbmcgLSBob3dldmVyIGFzIGEgcHJlbGltaW5hcnkgbG9vayBpdCBpcyBhbHJpZ2h0IGFzIGxvbmcgYXMgdGhpcyBjYXZlYXQgaXMgbm90ZWQuIA0KDQojIyMgQWRkaXRpb25hbCBXb3JrDQoNClRvIGV4dGVuZCB0aGlzIHdvcmssIGluIHRoZSBmdXR1cmUgaWYgdGhlIHBhcnRpY2lwYW50cyByYXRlZCBtb3JlIG1vdmllcyBJIHRoaW5rIGl0IGNvdWxkIG1ha2Ugc2Vuc2UgdG8gbm9ybWFsaXplIHRoZWlyIHJhdGluZ3MgQWRkaXRpb25hbGx5IGluIHRoZSBmdXR1cmUgY29kZSBjb3VsZCBiZSBidWlsdCB0byByZWNvbW1lbmQgY2VydGFpbiBtb3ZpZXMgdG8gcGVvcGxlIHdobyBkaWQgbm90IHZpZXcgdGhlbSBhbHJlYWR5Lg==