Assignment – SQL and R

Choose six recent popular movies. Ask at least five people that you know (friends, family, classmates,imaginary friends) to rate each of these movie that they have seen on a scale of 1 to 5. Take the results(observations) and store them in a SQL database. Load the information into an R dataframe.

Load Appropriate Packages

library(RMySQL)
## Loading required package: DBI

Establish Connection

con <- dbConnect(MySQL(),
                  user = "root", 
                  password = "spring2019",
                  dbname = "movieratings",
                  host = "localhost"
                     )

Queries to get data from the SQL database

query <- "select * from rating;"
res <- dbGetQuery(con, query)
res
##   ID      Name The_Lion_King_2019 Disney_Aladdin Hobbs_and_Shaw Godzilla
## 1  1    Rishan                  4              4              3        3
## 2  2  Damithri                  5              2              2        4
## 3  3     Geeth                  5              5              2        1
## 4  4   Thilini                  3              3              3        4
## 5  5 Shawantha                  4              5              3        3
## 6  6     Amila                  5              4              4        3
## 7  7  Nimanthi                  5              5              5        5
## 8  8 Rushanthi                  4              3              4        4
## 9  9       Leo                  5              5              3        3
##   Avengers_EndGame The_Hustle
## 1                4          5
## 2                4          5
## 3                2          5
## 4                3          3
## 5                4          4
## 6                5          3
## 7                5          5
## 8                4          2
## 9                3          4

Load data into a R dataframe

my_movie_ratings <- res
head(my_movie_ratings)
##   ID      Name The_Lion_King_2019 Disney_Aladdin Hobbs_and_Shaw Godzilla
## 1  1    Rishan                  4              4              3        3
## 2  2  Damithri                  5              2              2        4
## 3  3     Geeth                  5              5              2        1
## 4  4   Thilini                  3              3              3        4
## 5  5 Shawantha                  4              5              3        3
## 6  6     Amila                  5              4              4        3
##   Avengers_EndGame The_Hustle
## 1                4          5
## 2                4          5
## 3                2          5
## 4                3          3
## 5                4          4
## 6                5          3

Analyse Data using summary and graphs

summary(my_movie_ratings)
##        ID        Name           The_Lion_King_2019 Disney_Aladdin
##  Min.   :1   Length:9           Min.   :3.000      Min.   :2     
##  1st Qu.:3   Class :character   1st Qu.:4.000      1st Qu.:3     
##  Median :5   Mode  :character   Median :5.000      Median :4     
##  Mean   :5                      Mean   :4.444      Mean   :4     
##  3rd Qu.:7                      3rd Qu.:5.000      3rd Qu.:5     
##  Max.   :9                      Max.   :5.000      Max.   :5     
##  Hobbs_and_Shaw     Godzilla     Avengers_EndGame   The_Hustle
##  Min.   :2.000   Min.   :1.000   Min.   :2.000    Min.   :2   
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000    1st Qu.:3   
##  Median :3.000   Median :3.000   Median :4.000    Median :4   
##  Mean   :3.222   Mean   :3.333   Mean   :3.778    Mean   :4   
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000    3rd Qu.:5   
##  Max.   :5.000   Max.   :5.000   Max.   :5.000    Max.   :5
query1 <- "select * from rating;"
res <- dbGetQuery(con, query)
movie.rating <- data.frame(res)

query2 <- "select Name, The_Lion_King_2019 from rating;"

Lion_King_Rating <- dbGetQuery(con, query2)
Lion_King_Rating
##        Name The_Lion_King_2019
## 1    Rishan                  4
## 2  Damithri                  5
## 3     Geeth                  5
## 4   Thilini                  3
## 5 Shawantha                  4
## 6     Amila                  5
## 7  Nimanthi                  5
## 8 Rushanthi                  4
## 9       Leo                  5
library(ggplot2)
ggplot(data = Lion_King_Rating, aes(Lion_King_Rating$Name, Lion_King_Rating$The_Lion_King_2019)) + geom_point(aes(color = "Rating 1-5")) + geom_smooth(method = "lm")

query3 <- "select Name, Disney_Aladdin from rating;"
Disney_Aladdin_rating <- dbGetQuery(con, query3)
Disney_Aladdin_rating
##        Name Disney_Aladdin
## 1    Rishan              4
## 2  Damithri              2
## 3     Geeth              5
## 4   Thilini              3
## 5 Shawantha              5
## 6     Amila              4
## 7  Nimanthi              5
## 8 Rushanthi              3
## 9       Leo              5
ggplot(data = Disney_Aladdin_rating, aes(Disney_Aladdin_rating$Name, Disney_Aladdin_rating$Disney_Aladdin))+ geom_point(aes(color = "Rating 1-5")) + geom_smooth(method = "lm")

Disconnect from SQL database

dbDisconnect(con)
## [1] TRUE