Title DATA 607 week 2 assignment

Author Rose Koh

Date 2018/02/10

Rpub Rpub Link

Github Github Link

Title	DATA 607 week 2 assignment
Author	Rose Koh
Date	2018/02/10
Rpub	Rpub Link
Github	Github Link

Assignments

Collect data points
Store the observations in a SQL database.
Load the info into an R dataframe.

Requirements

Include SQL scripts
R markdown code
Share on Github, Rpub.

Install packages

#install.packages("devtools")
#library('devtools')
#devtools::install_github("RcppCore/Rcpp")
#devtools::install_github("rstats-db/DBI")
#devtools::install_github("rstats-db/RPostgres")

Load required packages

library(DBI)
library(RPostgres)
library(knitr)
library(reshape2)
library(tidyr)
library(DT)

Set working directory where rmarkdown is located

set_wd <- function() {
  library(rstudioapi) # make sure you have it installed
  current_path <- getActiveDocumentContext()$path 
  setwd(dirname(current_path ))
  print( getwd() )
}

AWS_connection_info

source("./source_lib/aws_rep_connect.R")

Connect to RDS PostgreSQL

con <- dbConnect(RPostgres::Postgres(),
                 host = host,
                 port = port,
                 dbname = dbname,
                 user = user,
                 password = password)

Send query and Fetch the dataset

# Query the table
rs <- dbSendQuery(con, statement="SELECT * FROM movie_rating;")

# Fetch all result
data <- dbFetch(rs, n= -1)

The Results

str(data)

## 'data.frame':    25 obs. of  4 variables:
##  $ id    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ person: chr  "person1" "person1" "person1" "person1" ...
##  $ movie : chr  "Paddington 2" "Coco" "Jumanji: Welcome to the jungle" "Maze Runner: The Death Cure" ...
##  $ score : int  5 5 4 2 3 3 3 3 5 4 ...

View the results in table

datatable(data)

Aggregate the data by movie title

group_by_movies <- dcast (data, movie~score)
colnames(group_by_movies) <- c("movie", "*", "**", "***", "****", "*****")
kable(group_by_movies, caption = "Movie rating sample")

Movie rating sample
movie	*	**	***	****	*****
Coco	0	2	2	0	1
Jumanji: Welcome to the jungle	0	1	1	3	0
Maze Runner: The Death Cure	0	1	1	1	2
Paddington 2	2	1	1	0	1
The Greatest Showman	0	1	2	1	1

Clear result and Disconnect

dbClearResult(rs)
dbDisconnect(con)