Week 2 Assignment– SQL and R

Load library

# install.packages("odbc")
library(DBI)

Connect to the server and get DBMS Metadata. Note: I was able to use a password without having to share the password with people who are viewing your code. I am using a Windows Authentication.

con <- dbConnect(odbc::odbc(),
  .connection_string = "Driver={SQL Server};Server=(local);Database=tb;Trusted_Connection=Yes;")
dbGetInfo(con)
## $dbname
## [1] "tb"
## 
## $dbms.name
## [1] "Microsoft SQL Server"
## 
## $db.version
## [1] "14.00.2037"
## 
## $username
## [1] "dbo"
## 
## $host
## [1] ""
## 
## $port
## [1] ""
## 
## $sourcename
## [1] ""
## 
## $servername
## [1] "DESKTOP-TCAN6V5"
## 
## $drivername
## [1] "SQLSRV32.DLL"
## 
## $odbc.version
## [1] "03.80.0000"
## 
## $driver.version
## [1] "10.00.19041"
## 
## $odbcdriver.version
## [1] "03.52"
## 
## $supports.transactions
## [1] TRUE
## 
## $getdata.extensions.any_column
## [1] FALSE
## 
## $getdata.extensions.any_order
## [1] FALSE
## 
## attr(,"class")
## [1] "Microsoft SQL Server" "driver_info"          "list"

Create tables

rs = dbSendStatement(con, "DROP TABLE IF EXISTS dbo.survey;
DROP TABLE IF EXISTS dbo.movie;

CREATE TABLE dbo.movie (
movie_id INT NOT NULL PRIMARY KEY,
movie_name VARCHAR(40) NOT NULL);

CREATE TABLE dbo.survey ( 
username VARCHAR(40) NOT NULL, 
movie_id INT NOT NULL FOREIGN KEY REFERENCES movie(movie_id),
rate INT NULL);")
dbClearResult(rs)

Check table’s columns

dbListFields(con, "survey")
## [1] "username" "movie_id" "rate"

Imports a data file into a database table.

rs = dbSendStatement(con, "BULK INSERT dbo.movie
FROM 'C:\\CUNY\\DATA607\\Week2\\movie.csv'
WITH (FORMAT = 'CSV');

BULK INSERT dbo.survey
FROM 'C:\\CUNY\\DATA607\\Week2\\survey.csv'
WITH (FORMAT = 'CSV');")
dbClearResult(rs)

Delete rows without rating

rs = dbSendQuery(con, "DELETE FROM dbo.survey WHERE rate IS NULL")
dbClearResult(rs)

Load data into R data.frame

rs <- dbSendQuery(con, "SELECT s.username, m.movie_name, s.rate
FROM dbo.survey s
JOIN dbo.movie m ON s.movie_id = m.movie_id;")
df = data.frame(dbFetch(rs))
dbClearResult(rs)

Get the summary of the data frame anf first 5 rows

print(summary(df))
##    username          movie_name             rate      
##  Length:22          Length:22          Min.   :1.000  
##  Class :character   Class :character   1st Qu.:2.000  
##  Mode  :character   Mode  :character   Median :3.000  
##                                        Mean   :3.273  
##                                        3rd Qu.:4.750  
##                                        Max.   :5.000

Get first 6 rows

head(df)
##   username         movie_name rate
## 1    user1 One Night in Miami    1
## 2    user1           Hamilton    2
## 3    user1               1917    3
## 4    user1  The Invisible Man    4
## 5    user1               Soul    5
## 6    user2           Hamilton    5

Close connection

dbDisconnect(con)