#Setting up connection
connection <- dbConnect(MySQL(), user = 'yanyi.li01',
password = '1e9b78b6741e00f1',
host = 'cunydata607sql.mysql.database.azure.com', port = 3306,
dbname = 'yanyi.li01')
#List the tables
dbListTables(connection)
## [1] "movie_reviews"
#Write query to access the table
result = dbSendQuery(connection, "select * from movie_reviews")
#Store the result in a data frame
data.frame = fetch(result, n=5)
print(data.frame)
## User_Id First_Name Deadpool_And_Wolverine Despicable_Me_4 Inside_Out_4
## 1 1 Jack 3 NA 4
## 2 2 Carlos 4 4 NA
## 3 3 Melissa 4 5 5
## 4 4 Fabricio 5 3 NA
## 5 5 Marcie 4 NA 5
## Alien_Romulus Beetlejuice_Beetlejuice It_Ends_With_Us
## 1 3 2 NA
## 2 4 3 4
## 3 NA NA 5
## 4 3 NA NA
## 5 NA 3 4
#Create a bar chart
ggplot(data.frame, aes(x = First_Name, y = Despicable_Me_4)) +
geom_col() +
labs(title = "Despicable Me 4")
#Replace missing data with mean
data.frame$Deadpool_And_Wolverine[is.na(data.frame$Deadpool_And_Wolverine)] <- mean(data.frame$Deadpool_And_Wolverine, na.rm = TRUE)
data.frame$Despicable_Me_4[is.na(data.frame$Despicable_Me_4)] <- mean(data.frame$Despicable_Me_4, na.rm = TRUE)
data.frame$Inside_Out_4[is.na(data.frame$Inside_Out_4)] <- mean(data.frame$Inside_Out_4, na.rm = TRUE)
data.frame$Alien_Romulus[is.na(data.frame$Alien_Romulus)] <- mean(data.frame$Alien_Romulus, na.rm = TRUE)
data.frame$Beetlejuice_Beetlejuice[is.na(data.frame$Beetlejuice_Beetlejuice)] <- mean(data.frame$Beetlejuice_Beetlejuice, na.rm = TRUE)
data.frame$It_Ends_With_Us[is.na(data.frame$It_Ends_With_Us)] <- mean(data.frame$It_Ends_With_Us, na.rm = TRUE)
#Round the mean to whole numbers
data.frame$Deadpool_And_Wolverine <- round(data.frame$Deadpool_And_Wolverine, digits = 0)
data.frame$Despicable_Me_4 <- round(data.frame$Despicable_Me_4, digits = 0)
data.frame$Inside_Out_4 <- round(data.frame$Inside_Out_4, digits = 0)
data.frame$Alien_Romulus <- round(data.frame$Alien_Romulus, digits = 0)
data.frame$Beetlejuice_Beetlejuice <- round(data.frame$Beetlejuice_Beetlejuice, digits = 0)
data.frame$It_Ends_With_Us <- round(data.frame$It_Ends_With_Us, digits = 0)
#Double check the new data frame
print(data.frame)
## User_Id First_Name Deadpool_And_Wolverine Despicable_Me_4 Inside_Out_4
## 1 1 Jack 3 4 4
## 2 2 Carlos 4 4 5
## 3 3 Melissa 4 5 5
## 4 4 Fabricio 5 3 5
## 5 5 Marcie 4 4 5
## Alien_Romulus Beetlejuice_Beetlejuice It_Ends_With_Us
## 1 3 2 4
## 2 4 3 4
## 3 3 3 5
## 4 3 3 4
## 5 3 3 4
#Create a bar chart
ggplot(data.frame, aes(x = First_Name, y = Despicable_Me_4)) +
geom_col() +
labs(title = "Despicable Me 4 Ratings")
Standardized rating has several benefits. Firstly, it improves the clarity and quality of data by saving time in reading and analyzing the information. Secondly, it makes it easier for others to understand, such as when presenting the process and results to an audience. Thirdly, standardized ratings are easier for people to respond to, eliminating concerns about using the wrong scale.
It is common practice to utilize a 5-point rating scale as it reduces the redundancy associated with completing multiple forms or surveys. Moreover, employing such ratings can aid in identifying potential biases or overlooked factors. Additionally, providing a non-mandatory section where respondents can offer comments on the subject matter or the ratings themselves can be advantageous.