df1 = data.frame(CustomerId = c(1:6), Product = c(rep("Toaster", 3), rep("Radio", 3)))
df2 = data.frame(CustomerId = c(2, 4, 6), State = c(rep("Alabama", 2), rep("Ohio", 1)))
df1
## CustomerId Product
## 1 1 Toaster
## 2 2 Toaster
## 3 3 Toaster
## 4 4 Radio
## 5 5 Radio
## 6 6 Radio
df2
## CustomerId State
## 1 2 Alabama
## 2 4 Alabama
## 3 6 Ohio
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## inner join
df3 <- sqldf("SELECT CustomerId, Product, State
FROM df1
JOIN df2 USING(CustomerID)")
## Loading required package: tcltk
## Warning: Quoted identifiers should have class SQL, use DBI::SQL() if the
## caller performs the quoting.
df3
## CustomerId Product State
## 1 2 Toaster Alabama
## 2 4 Radio Alabama
## 3 6 Radio Ohio
## left join (substitute 'right' for right join)
df4 <- sqldf("SELECT CustomerId, Product, State
FROM df1
LEFT JOIN df2 USING(CustomerID)")
df4
## CustomerId Product State
## 1 1 Toaster <NA>
## 2 2 Toaster Alabama
## 3 3 Toaster <NA>
## 4 4 Radio Alabama
## 5 5 Radio <NA>
## 6 6 Radio Ohio
df5 <- sqldf("SELECT CustomerId, Product, State
FROM df2
LEFT JOIN df1 USING(CustomerID)")
df5
## CustomerId Product State
## 1 2 Toaster Alabama
## 2 4 Radio Alabama
## 3 6 Radio Ohio
library(data.table)
dt1 <- data.table(df1, key = "CustomerId")
dt2 <- data.table(df2, key = "CustomerId")
joined.dt1.dt.2 <- dt1[dt2]
joined.dt1.dt.2
## CustomerId Product State
## 1: 2 Toaster Alabama
## 2: 4 Radio Alabama
## 3: 6 Radio Ohio
#base::merge also works on data.tables:
merge(dt1, dt2)
## CustomerId Product State
## 1: 2 Toaster Alabama
## 2: 4 Radio Alabama
## 3: 6 Radio Ohio
library(plyr)
join(df1, df2,
type = "left")
## Joining by: CustomerId
## CustomerId Product State
## 1 1 Toaster <NA>
## 2 2 Toaster Alabama
## 3 3 Toaster <NA>
## 4 4 Radio Alabama
## 5 5 Radio <NA>
## 6 6 Radio Ohio
# CustomerId Product State
# 1 2 Toaster Alabama
# 2 4 Radio Alabama
# 3 6 Radio Ohio
#Options for type: inner, left, right, full.
library(RODBC)
odbcDataSources()
## dBASE Files
## "Microsoft Access dBASE Driver (*.dbf, *.ndx, *.mdx)"
## Excel Files
## "Microsoft Excel Driver (*.xls, *.xlsx, *.xlsm, *.xlsb)"
## MS Access Database
## "Microsoft Access Driver (*.mdb, *.accdb)"
## PostgreSQL30
## "PostgreSQL ODBC Driver(ANSI)"
## ajaydb
## "PostgreSQL ODBC Driver(ANSI)"
chan=odbcConnect("PostgreSQL30","postgres;Password=root;Database=ajay")
sqlTables(chan)
## TABLE_CAT TABLE_SCHEM TABLE_NAME TABLE_TYPE REMARKS
## 1 ajay public iris TABLE
## 2 ajay public sales TABLE
## 3 ajay public temp TABLE
sqlFetch(chan,"sales",max=5)
## customer_id sales date product_id
## 1 10001 5230 2017-02-07 524
## 2 10002 2781 2017-05-12 469
## 3 10003 2083 2016-12-18 917
## 4 10004 214 2015-01-19 354
## 5 10005 9407 2016-09-26 292
sqlQuery(chan,"select * from sales where sales <300")
## customer_id sales date product_id
## 1 10004 214 2015-01-19 354
## 2 10043 251 2017-06-10 772
## 3 10072 146 2015-01-28 685
## 4 10168 115 2016-10-10 663
## 5 10260 123 2015-06-05 779
## 6 10279 229 2017-02-27 409
## 7 10297 127 2016-02-05 548
## 8 10298 190 2017-02-21 869
## 9 10411 115 2016-08-03 973
## 10 10469 247 2015-12-08 415