library(knitr)
rm(list=ls())
#Fisrtly, I exported the sql file to a csv file onto my machine then I read the file onto RMarkdown. I was curious about certain variables so I did some data exploration.
Viewers_Rating<-read.csv("https://raw.githubusercontent.com/VioletaStoyanova/Data607/7e7dd15105ac29b83552264632e618d827582793/Viewers_Ratings.csv", header=TRUE)
summary(Viewers_Rating)
## idViewers_Ratings First Last Age Wonder_Woman
## Min. :1 Eric :1 Bastardo:1 Min. :21.0 Min. :3
## 1st Qu.:2 Gabrielle:1 Benson :1 1st Qu.:25.0 1st Qu.:3
## Median :3 Gil :1 Chico :1 Median :29.0 Median :4
## Mean :3 Ines :1 Hatcher :1 Mean :31.6 Mean :4
## 3rd Qu.:4 Luis :1 Johnson :1 3rd Qu.:38.0 3rd Qu.:5
## Max. :5 Max. :45.0 Max. :5
## IT Get_Out Lady_Bird The_Post The_Shape_of_Water
## Min. :2.0 Min. :2.0 Min. :3.0 Min. :3 Min. :3
## 1st Qu.:2.0 1st Qu.:3.0 1st Qu.:3.0 1st Qu.:4 1st Qu.:3
## Median :3.0 Median :4.0 Median :4.0 Median :4 Median :4
## Mean :3.2 Mean :3.8 Mean :3.6 Mean :4 Mean :4
## 3rd Qu.:4.0 3rd Qu.:5.0 3rd Qu.:4.0 3rd Qu.:4 3rd Qu.:5
## Max. :5.0 Max. :5.0 Max. :4.0 Max. :5 Max. :5
mean(Viewers_Rating$Age)
## [1] 31.6
#Next, I created a data frame.
Viewers_Rating_df<-data.frame(Viewers_Rating,stringsAsFactors=FALSE)
Viewers_Rating_df[]<-lapply(Viewers_Rating_df,as.array)
print(Viewers_Rating_df)
## idViewers_Ratings First Last Age Wonder_Woman IT Get_Out
## 1 1 Eric Johnson 45 3 2 2
## 2 2 Luis Bastardo 21 4 2 5
## 3 3 Gil Chico 38 5 5 5
## 4 4 Ines Benson 25 3 3 4
## 5 5 Gabrielle Hatcher 29 5 4 3
## Lady_Bird The_Post The_Shape_of_Water
## 1 4 4 5
## 2 3 4 3
## 3 3 5 5
## 4 4 3 4
## 5 4 4 3
#Next step is Data Visualization. I was particularly interested in how popular was each movie compared to the age of the viewers although the sample is pretty small to have any significant difference. Next possible steps are to add more data such as genre, release date, etc and see if there is any correlation between age and those variables.
library(ggplot2)
p <-ggplot(Viewers_Rating_df,aes(Age,Wonder_Woman))
p +geom_bar(stat = "identity", aes(fill=Age))

library(ggplot2)
p <-ggplot(Viewers_Rating_df,aes(Age,IT))
p +geom_bar(stat = "identity", aes(fill=Age))

library(ggplot2)
p <-ggplot(Viewers_Rating_df,aes(Age,Get_Out))
p +geom_bar(stat = "identity", aes(fill=Age))

library(ggplot2)
p <-ggplot(Viewers_Rating_df,aes(Age,Lady_Bird))
p +geom_bar(stat = "identity", aes(fill=Age))

library(ggplot2)
p <-ggplot(Viewers_Rating_df,aes(Age,The_Post))
p +geom_bar(stat = "identity", aes(fill=Age))

library(ggplot2)
p <-ggplot(Viewers_Rating_df,aes(Age,The_Shape_of_Water))
p +geom_bar(stat = "identity", aes(fill=Age))
