In this report, a basic survey was performed to rate the most recent top Box Office movies. The sample taken is six-6 movies from different Genres. The basic survey taken from friends and family members.
First step: Connect DB - PostgreSQL to RStudio.
#Connect postgresql db to Rstudio
require("RPostgreSQL")
## Loading required package: RPostgreSQL
## Loading required package: DBI
pw <- {
"yahia2010"
}
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "movies_rating",
host = "localhost",
port = 5432,
user = "postgres",
pw)
# clearing the pw
rm(pw)
#check if the database is connected
dbExistsTable(con, "movie_rating")
## [1] TRUE
library(kableExtra)
# return the table
dtab = dbGetQuery(con, "select * from movie_rating")
dtab %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width="100%",height="300px")
movie_name
|
movie_year
|
movie_rating
|
Angel Has Fallen
|
2019
|
5
|
Good Boys
|
2019
|
3
|
The Lion King
|
2019
|
5
|
Overcommer
|
2019
|
1
|
Spider-Man: Far From Home
|
2019
|
5
|
The Angry Birds Movie 2
|
2019
|
4
|
Brain On Fire
|
2019
|
NA
|
Brain On Fire
|
2019
|
NA
|
Brain On Fire
|
2019
|
NA
|
## here we have to clean the data set first before we start to analyise it.
str(dtab)
## 'data.frame': 9 obs. of 3 variables:
## $ movie_name : chr "Angel Has Fallen" "Good Boys" "The Lion King" "Overcommer" ...
## $ movie_year : chr "2019" "2019" "2019" "2019" ...
## $ movie_rating: int 5 3 5 1 5 4 NA NA NA
#The results of complete.cases() is a logical vector with the value TRUE for rows that are complete, and FALSE for rows that have some NA values. To remove the rows with missing data
complete.cases(dtab)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE
# Here we delete the last two rows from the dataset
#clean_data <- dtab[complete.cases(dtab), ]
#str(clean_data)
## we can use na.omit() function to omit all rows that contain NA values
clean_data <- na.omit(dtab)
clean_data %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width="100%",height="300px")
movie_name
|
movie_year
|
movie_rating
|
Angel Has Fallen
|
2019
|
5
|
Good Boys
|
2019
|
3
|
The Lion King
|
2019
|
5
|
Overcommer
|
2019
|
1
|
Spider-Man: Far From Home
|
2019
|
5
|
The Angry Birds Movie 2
|
2019
|
4
|
## Building a dotplot graph to illustrate the data
require(ggplot2)
## Loading required package: ggplot2
ggplot(clean_data, aes(x = movie_rating, y = movie_name)) + geom_boxplot() + theme_bw()

# close the connection
dbDisconnect(con)
## [1] TRUE