DATABASE QUERIES
library(tidyverse)
library(RSQLite)
library(DBI)Dataset
Northwind<-DBI::dbConnect(RSQLite::SQLite(), "C:/sqlite/db/Northwind_large.sqlite")
class(Northwind)[1] "SQLiteConnection"
attr(,"package")
[1] "RSQLite"
RSQLite::dbListTables(Northwind) [1] "Category" "Customer" "CustomerCustomerDemo"
[4] "CustomerDemographic" "Employee" "EmployeeTerritory"
[7] "Order" "OrderDetail" "Product"
[10] "ProductDetails_V" "Region" "Shipper"
[13] "Supplier" "Territory"
product<-dplyr::tbl(Northwind,"Product")
class(product)[1] "tbl_SQLiteConnection" "tbl_dbi" "tbl_sql"
[4] "tbl_lazy" "tbl"
product# Source: table<Product> [?? x 10]
# Database: sqlite 3.36.0 [C:\sqlite\db\Northwind_large.sqlite]
Id ProductName SupplierId CategoryId QuantityPerUnit UnitPrice
<int> <chr> <int> <int> <chr> <dbl>
1 1 Chai 1 1 10 boxes x 20 ba~ 18
2 2 Chang 1 1 24 - 12 oz bottl~ 19
3 3 Aniseed Syrup 1 2 12 - 550 ml bott~ 10
4 4 Chef Anton's Cajun S~ 2 2 48 - 6 oz jars 22
5 5 Chef Anton's Gumbo M~ 2 2 36 boxes 21.4
6 6 Grandma's Boysenberr~ 3 2 12 - 8 oz jars 25
7 7 Uncle Bob's Organic ~ 3 7 12 - 1 lb pkgs. 30
8 8 Northwoods Cranberry~ 3 2 12 - 12 oz jars 40
9 9 Mishi Kobe Niku 4 6 18 - 500 g pkgs. 97
10 10 Ikura 4 8 12 - 200 ml jars 31
# ... with more rows, and 4 more variables: UnitsInStock <int>,
# UnitsOnOrder <int>, ReorderLevel <int>, Discontinued <int>
q<-product %>%
filter(UnitPrice>20) %>%
group_by(SupplierId) %>%
summarize(stock=min(UnitsInStock)) %>%
arrange(desc(stock))
q# Source: lazy query [?? x 2]
# Database: sqlite 3.36.0 [C:\sqlite\db\Northwind_large.sqlite]
# Ordered by: desc(stock)
SupplierId stock
<int> <int>
1 25 115
2 9 104
3 8 40
4 6 35
5 4 29
6 15 26
7 5 22
8 26 21
9 28 19
10 29 17
# ... with more rows
class(q)[1] "tbl_SQLiteConnection" "tbl_dbi" "tbl_sql"
[4] "tbl_lazy" "tbl"
dplyr::show_query(q)<SQL>
SELECT `SupplierId`, MIN(`UnitsInStock`) AS `stock`
FROM `Product`
WHERE (`UnitPrice` > 20.0)
GROUP BY `SupplierId`
ORDER BY `stock` DESC
Combining Multiple Tables with dplyr in R
data("band_members")
band_members# A tibble: 3 x 2
name band
<chr> <chr>
1 Mick Stones
2 John Beatles
3 Paul Beatles
data("band_instruments")
band_instruments# A tibble: 3 x 2
name plays
<chr> <chr>
1 John guitar
2 Paul bass
3 Keith guitar
data("band_instruments2")
band_instruments2# A tibble: 3 x 2
artist plays
<chr> <chr>
1 John guitar
2 Paul bass
3 Keith guitar
inner_join()
inner_join(band_members,band_instruments)# A tibble: 2 x 3
name band plays
<chr> <chr> <chr>
1 John Beatles guitar
2 Paul Beatles bass
band_members %>% inner_join(band_instruments)# A tibble: 2 x 3
name band plays
<chr> <chr> <chr>
1 John Beatles guitar
2 Paul Beatles bass
left_join()
left_join(band_members,band_instruments)# A tibble: 3 x 3
name band plays
<chr> <chr> <chr>
1 Mick Stones <NA>
2 John Beatles guitar
3 Paul Beatles bass
right_join()
right_join(band_members,band_instruments)# A tibble: 3 x 3
name band plays
<chr> <chr> <chr>
1 John Beatles guitar
2 Paul Beatles bass
3 Keith <NA> guitar
full_join()
full_join(band_members,band_instruments)# A tibble: 4 x 3
name band plays
<chr> <chr> <chr>
1 Mick Stones <NA>
2 John Beatles guitar
3 Paul Beatles bass
4 Keith <NA> guitar
semi_join()
semi_join(band_members,band_instruments)# A tibble: 2 x 2
name band
<chr> <chr>
1 John Beatles
2 Paul Beatles
anti_join()
anti_join(band_members,band_instruments)# A tibble: 1 x 2
name band
<chr> <chr>
1 Mick Stones
full_join()
full_join(band_members, band_instruments2, by = c("name" = "artist"))# A tibble: 4 x 3
name band plays
<chr> <chr> <chr>
1 Mick Stones <NA>
2 John Beatles guitar
3 Paul Beatles bass
4 Keith <NA> guitar