library(tidyverse)
library(openintro)
library(rstudioapi)
library(RMySQL)
library(ggplot2)
user<- showPrompt(title='username', message='Enter username', default="")
pw<- askForPassword(prompt="enter something")
db<- dbConnect(MySQL(),
                 user= user,
                 password= pw,
                 dbname= user,
                 host= 'cunydata607sql.mysql.database.azure.com')


df<- dbGetQuery(db, 'select * from movie_ratings')

print(df)
##   id   name Spider_Man_Noway_Home Inside_Out_2 Despicable_Me_4 CODA
## 1  1   Jack                     4          4.5               4  3.5
## 2  2 Jennie                     3            5               4  3.5
## 3  3    Amy                     4            3               3  3.5
## 4  4    Jin                     5            5               4    4
## 5  5    Jay                     5            4             3.5    3
##   Deadpool_Wolverine Kingdom_of_the_Planet_of_the_Apes
## 1                  3                               4.5
## 2                  4                                 4
## 3                3.5                                 3
## 4                  4                                 4
## 5                  4                                 4

#Part 4:

#When missing values (NA) in the data, the best way is to fill these gas with mean or median number (in this case is mean or median of a rating movie). That allows us to maintain the integrity of the data. However, if there are large amount of missing data, it is better to remove the column of that data, to avoid biased analysis.

library(tidyr)
library(dplyr)
df <- df %>%
  pivot_longer(cols = starts_with("Spider_Man"):last_col(),
               names_to = "Movie",
               values_to = "Rating")


ggplot(df, aes(x = Movie, y = Rating, fill = name)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Movie Ratings", x = "Movie", y = "Rating") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

dbDisconnect(db)
## [1] TRUE