Assignments
- Collect data points
- Store the observations in a SQL database.
- Load the info into an R dataframe.
Requirements
- Include SQL scripts
- R markdown code
- Share on Github, Rpub.
Install packages
#install.packages("devtools")
#library('devtools')
#devtools::install_github("RcppCore/Rcpp")
#devtools::install_github("rstats-db/DBI")
#devtools::install_github("rstats-db/RPostgres")
Load required packages
library(DBI)
library(RPostgres)
library(knitr)
library(reshape2)
library(tidyr)
library(DT)
Set working directory where rmarkdown is located
set_wd <- function() {
library(rstudioapi) # make sure you have it installed
current_path <- getActiveDocumentContext()$path
setwd(dirname(current_path ))
print( getwd() )
}
AWS_connection_info
source("./source_lib/aws_rep_connect.R")
Connect to RDS PostgreSQL
con <- dbConnect(RPostgres::Postgres(),
host = host,
port = port,
dbname = dbname,
user = user,
password = password)
Send query and Fetch the dataset
# Query the table
rs <- dbSendQuery(con, statement="SELECT * FROM movie_rating;")
# Fetch all result
data <- dbFetch(rs, n= -1)
The Results
str(data)
## 'data.frame': 25 obs. of 4 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ person: chr "person1" "person1" "person1" "person1" ...
## $ movie : chr "Paddington 2" "Coco" "Jumanji: Welcome to the jungle" "Maze Runner: The Death Cure" ...
## $ score : int 5 5 4 2 3 3 3 3 5 4 ...
View the results in table
datatable(data)
Aggregate the data by movie title
group_by_movies <- dcast (data, movie~score)
colnames(group_by_movies) <- c("movie", "*", "**", "***", "****", "*****")
kable(group_by_movies, caption = "Movie rating sample")
Movie rating sample
Coco |
0 |
2 |
2 |
0 |
1 |
Jumanji: Welcome to the jungle |
0 |
1 |
1 |
3 |
0 |
Maze Runner: The Death Cure |
0 |
1 |
1 |
1 |
2 |
Paddington 2 |
2 |
1 |
1 |
0 |
1 |
The Greatest Showman |
0 |
1 |
2 |
1 |
1 |
Clear result and Disconnect
dbClearResult(rs)
dbDisconnect(con)