Load library
# install.packages("odbc")
library(DBI)
Connect to the server and get DBMS Metadata. Note: I was able to use a password without having to share the password with people who are viewing your code. I am using a Windows Authentication.
con <- dbConnect(odbc::odbc(),
.connection_string = "Driver={SQL Server};Server=(local);Database=tb;Trusted_Connection=Yes;")
dbGetInfo(con)
## $dbname
## [1] "tb"
##
## $dbms.name
## [1] "Microsoft SQL Server"
##
## $db.version
## [1] "14.00.2037"
##
## $username
## [1] "dbo"
##
## $host
## [1] ""
##
## $port
## [1] ""
##
## $sourcename
## [1] ""
##
## $servername
## [1] "DESKTOP-TCAN6V5"
##
## $drivername
## [1] "SQLSRV32.DLL"
##
## $odbc.version
## [1] "03.80.0000"
##
## $driver.version
## [1] "10.00.19041"
##
## $odbcdriver.version
## [1] "03.52"
##
## $supports.transactions
## [1] TRUE
##
## $getdata.extensions.any_column
## [1] FALSE
##
## $getdata.extensions.any_order
## [1] FALSE
##
## attr(,"class")
## [1] "Microsoft SQL Server" "driver_info" "list"
Create tables
rs = dbSendStatement(con, "DROP TABLE IF EXISTS dbo.survey;
DROP TABLE IF EXISTS dbo.movie;
CREATE TABLE dbo.movie (
movie_id INT NOT NULL PRIMARY KEY,
movie_name VARCHAR(40) NOT NULL);
CREATE TABLE dbo.survey (
username VARCHAR(40) NOT NULL,
movie_id INT NOT NULL FOREIGN KEY REFERENCES movie(movie_id),
rate INT NULL);")
dbClearResult(rs)
Check table’s columns
dbListFields(con, "survey")
## [1] "username" "movie_id" "rate"
Imports a data file into a database table.
rs = dbSendStatement(con, "BULK INSERT dbo.movie
FROM 'C:\\CUNY\\DATA607\\Week2\\movie.csv'
WITH (FORMAT = 'CSV');
BULK INSERT dbo.survey
FROM 'C:\\CUNY\\DATA607\\Week2\\survey.csv'
WITH (FORMAT = 'CSV');")
dbClearResult(rs)
Delete rows without rating
rs = dbSendQuery(con, "DELETE FROM dbo.survey WHERE rate IS NULL")
dbClearResult(rs)
Load data into R data.frame
rs <- dbSendQuery(con, "SELECT s.username, m.movie_name, s.rate
FROM dbo.survey s
JOIN dbo.movie m ON s.movie_id = m.movie_id;")
df = data.frame(dbFetch(rs))
dbClearResult(rs)
Get the summary of the data frame anf first 5 rows
print(summary(df))
## username movie_name rate
## Length:22 Length:22 Min. :1.000
## Class :character Class :character 1st Qu.:2.000
## Mode :character Mode :character Median :3.000
## Mean :3.273
## 3rd Qu.:4.750
## Max. :5.000
Get first 6 rows
head(df)
## username movie_name rate
## 1 user1 One Night in Miami 1
## 2 user1 Hamilton 2
## 3 user1 1917 3
## 4 user1 The Invisible Man 4
## 5 user1 Soul 5
## 6 user2 Hamilton 5
Close connection
dbDisconnect(con)