Title DATA 607 week 2 assignment
Author Rose Koh
Date 2018/02/10
Rpub Rpub Link
Github Github Link

Assignments

  1. Collect data points
  2. Store the observations in a SQL database.
  3. Load the info into an R dataframe.

Requirements

  1. Include SQL scripts
  2. R markdown code
  3. Share on Github, Rpub.

Install packages

#install.packages("devtools")
#library('devtools')
#devtools::install_github("RcppCore/Rcpp")
#devtools::install_github("rstats-db/DBI")
#devtools::install_github("rstats-db/RPostgres")

Load required packages

library(DBI)
library(RPostgres)
library(knitr)
library(reshape2)
library(tidyr)
library(DT)

Set working directory where rmarkdown is located

set_wd <- function() {
  library(rstudioapi) # make sure you have it installed
  current_path <- getActiveDocumentContext()$path 
  setwd(dirname(current_path ))
  print( getwd() )
}

AWS_connection_info

source("./source_lib/aws_rep_connect.R")

Connect to RDS PostgreSQL

con <- dbConnect(RPostgres::Postgres(),
                 host = host,
                 port = port,
                 dbname = dbname,
                 user = user,
                 password = password)

Send query and Fetch the dataset

# Query the table
rs <- dbSendQuery(con, statement="SELECT * FROM movie_rating;")

# Fetch all result
data <- dbFetch(rs, n= -1)

The Results

str(data)
## 'data.frame':    25 obs. of  4 variables:
##  $ id    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ person: chr  "person1" "person1" "person1" "person1" ...
##  $ movie : chr  "Paddington 2" "Coco" "Jumanji: Welcome to the jungle" "Maze Runner: The Death Cure" ...
##  $ score : int  5 5 4 2 3 3 3 3 5 4 ...

View the results in table

datatable(data)

Aggregate the data by movie title

group_by_movies <- dcast (data, movie~score)
colnames(group_by_movies) <- c("movie", "*", "**", "***", "****", "*****")
kable(group_by_movies, caption = "Movie rating sample")
Movie rating sample
movie * ** *** **** *****
Coco 0 2 2 0 1
Jumanji: Welcome to the jungle 0 1 1 3 0
Maze Runner: The Death Cure 0 1 1 1 2
Paddington 2 2 1 1 0 1
The Greatest Showman 0 1 2 1 1

Clear result and Disconnect

dbClearResult(rs)
dbDisconnect(con)