CUNY SPS DATA607 HW2

Name: Chinedu Onyeka; Date: September 2nd, 2021

This assignment demonstrates ability to connect to a database from R

Load Libraries

library(RODBC) #To access MySQL database 
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Connect to the MySQL database using odbc connect and a dsn

conn <- odbcConnect("MySQL_DSN")

Read the reviewers information from MySQL database into R dataframe

reviews <- sqlQuery(conn, "SELECT * FROM reviewers")
head(reviews, n=20)
##    reviewer_id first_name last_name              title rating
## 1            1       Ashe     Denis           Avengers      5
## 2            1       Ashe     Denis      Black Panther      4
## 3            1       Ashe     Denis Something Borrowed      5
## 4            1       Ashe     Denis                Red      3
## 5            1       Ashe     Denis         Pure Haven      1
## 6            1       Ashe     Denis          Good Deed      5
## 7            2   Chinonye    Onyeka           Avengers      5
## 8            2   Chinonye    Onyeka      Black Panther      2
## 9            2   Chinonye    Onyeka Something Borrowed      4
## 10           2   Chinonye    Onyeka                Red      5
## 11           2   Chinonye    Onyeka         Pure Haven      4
## 12           2   Chinonye    Onyeka          Good Deed      3
## 13           3   Kingsley       Obi           Avengers      4
## 14           3   Kingsley       Obi      Black Panther      4
## 15           3   Kingsley       Obi Something Borrowed      4
## 16           3   Kingsley       Obi                Red      4
## 17           3   Kingsley       Obi         Pure Haven      4
## 18           3   Kingsley       Obi          Good Deed      4
## 19           4   Emmanuel    Nnamdi           Avengers      5
## 20           4   Emmanuel    Nnamdi      Black Panther      4
str(reviews)
## 'data.frame':    30 obs. of  5 variables:
##  $ reviewer_id: int  1 1 1 1 1 1 2 2 2 2 ...
##  $ first_name : chr  "Ashe" "Ashe" "Ashe" "Ashe" ...
##  $ last_name  : chr  "Denis" "Denis" "Denis" "Denis" ...
##  $ title      : chr  "Avengers" "Black Panther" "Something Borrowed" "Red" ...
##  $ rating     : int  5 4 5 3 1 5 5 2 4 5 ...

Check if the dataframe contains any missing values

anyNA(reviews)
## [1] TRUE

The missing values are in the rating column

anyNA(reviews$rating)
## [1] TRUE

Missing Values : I omit missing values

# avg_rating <- round(mean(reviews$rating, na.rm = TRUE),0)
reviews_new <- reviews %>% na.omit()

Check for missingness in the new dataframe

anyNA(reviews_new)
## [1] FALSE

Check the head of the new dataframe

head(reviews_new, n = 20)
##    reviewer_id first_name last_name              title rating
## 1            1       Ashe     Denis           Avengers      5
## 2            1       Ashe     Denis      Black Panther      4
## 3            1       Ashe     Denis Something Borrowed      5
## 4            1       Ashe     Denis                Red      3
## 5            1       Ashe     Denis         Pure Haven      1
## 6            1       Ashe     Denis          Good Deed      5
## 7            2   Chinonye    Onyeka           Avengers      5
## 8            2   Chinonye    Onyeka      Black Panther      2
## 9            2   Chinonye    Onyeka Something Borrowed      4
## 10           2   Chinonye    Onyeka                Red      5
## 11           2   Chinonye    Onyeka         Pure Haven      4
## 12           2   Chinonye    Onyeka          Good Deed      3
## 13           3   Kingsley       Obi           Avengers      4
## 14           3   Kingsley       Obi      Black Panther      4
## 15           3   Kingsley       Obi Something Borrowed      4
## 16           3   Kingsley       Obi                Red      4
## 17           3   Kingsley       Obi         Pure Haven      4
## 18           3   Kingsley       Obi          Good Deed      4
## 19           4   Emmanuel    Nnamdi           Avengers      5
## 20           4   Emmanuel    Nnamdi      Black Panther      4

Check the structure

str(reviews_new)
## 'data.frame':    27 obs. of  5 variables:
##  $ reviewer_id: int  1 1 1 1 1 1 2 2 2 2 ...
##  $ first_name : chr  "Ashe" "Ashe" "Ashe" "Ashe" ...
##  $ last_name  : chr  "Denis" "Denis" "Denis" "Denis" ...
##  $ title      : chr  "Avengers" "Black Panther" "Something Borrowed" "Red" ...
##  $ rating     : int  5 4 5 3 1 5 5 2 4 5 ...
##  - attr(*, "na.action")= 'omit' Named int [1:3] 25 27 28
##   ..- attr(*, "names")= chr [1:3] "25" "27" "28"

Show all 6 videos

sqlQuery(conn, "SELECT * FROM videos")
##   video_id              title
## 1        1           Avengers
## 2        2      Black Panther
## 3        3 Something Borrowed
## 4        4                Red
## 5        5         Pure Haven
## 6        6          Good Deed