TUGAS 4 PRAKTIKUM STA581

EKO SISWANTO (G1501211011)

DATABASE QUERIES

library(tidyverse)
library(RSQLite)
library(DBI)

Dataset

Northwind<-DBI::dbConnect(RSQLite::SQLite(), "C:/sqlite/db/Northwind_large.sqlite")

class(Northwind)
[1] "SQLiteConnection"
attr(,"package")
[1] "RSQLite"
RSQLite::dbListTables(Northwind)
 [1] "Category"             "Customer"             "CustomerCustomerDemo"
 [4] "CustomerDemographic"  "Employee"             "EmployeeTerritory"   
 [7] "Order"                "OrderDetail"          "Product"             
[10] "ProductDetails_V"     "Region"               "Shipper"             
[13] "Supplier"             "Territory"           
product<-dplyr::tbl(Northwind,"Product")
class(product)
[1] "tbl_SQLiteConnection" "tbl_dbi"              "tbl_sql"             
[4] "tbl_lazy"             "tbl"                 
product
# Source:   table<Product> [?? x 10]
# Database: sqlite 3.36.0 [C:\sqlite\db\Northwind_large.sqlite]
      Id ProductName           SupplierId CategoryId QuantityPerUnit   UnitPrice
   <int> <chr>                      <int>      <int> <chr>                 <dbl>
 1     1 Chai                           1          1 10 boxes x 20 ba~      18  
 2     2 Chang                          1          1 24 - 12 oz bottl~      19  
 3     3 Aniseed Syrup                  1          2 12 - 550 ml bott~      10  
 4     4 Chef Anton's Cajun S~          2          2 48 - 6 oz jars         22  
 5     5 Chef Anton's Gumbo M~          2          2 36 boxes               21.4
 6     6 Grandma's Boysenberr~          3          2 12 - 8 oz jars         25  
 7     7 Uncle Bob's Organic ~          3          7 12 - 1 lb pkgs.        30  
 8     8 Northwoods Cranberry~          3          2 12 - 12 oz jars        40  
 9     9 Mishi Kobe Niku                4          6 18 - 500 g pkgs.       97  
10    10 Ikura                          4          8 12 - 200 ml jars       31  
# ... with more rows, and 4 more variables: UnitsInStock <int>,
#   UnitsOnOrder <int>, ReorderLevel <int>, Discontinued <int>
q<-product %>%
  filter(UnitPrice>20) %>% 
  group_by(SupplierId) %>% 
  summarize(stock=min(UnitsInStock)) %>%
  arrange(desc(stock))
q
# Source:     lazy query [?? x 2]
# Database:   sqlite 3.36.0 [C:\sqlite\db\Northwind_large.sqlite]
# Ordered by: desc(stock)
   SupplierId stock
        <int> <int>
 1         25   115
 2          9   104
 3          8    40
 4          6    35
 5          4    29
 6         15    26
 7          5    22
 8         26    21
 9         28    19
10         29    17
# ... with more rows
class(q)
[1] "tbl_SQLiteConnection" "tbl_dbi"              "tbl_sql"             
[4] "tbl_lazy"             "tbl"                 
dplyr::show_query(q)
<SQL>
SELECT `SupplierId`, MIN(`UnitsInStock`) AS `stock`
FROM `Product`
WHERE (`UnitPrice` > 20.0)
GROUP BY `SupplierId`
ORDER BY `stock` DESC

Combining Multiple Tables with dplyr in R

data("band_members")
band_members
# A tibble: 3 x 2
  name  band   
  <chr> <chr>  
1 Mick  Stones 
2 John  Beatles
3 Paul  Beatles
data("band_instruments")
band_instruments
# A tibble: 3 x 2
  name  plays 
  <chr> <chr> 
1 John  guitar
2 Paul  bass  
3 Keith guitar
data("band_instruments2")
band_instruments2
# A tibble: 3 x 2
  artist plays 
  <chr>  <chr> 
1 John   guitar
2 Paul   bass  
3 Keith  guitar

inner_join()

inner_join(band_members,band_instruments)
# A tibble: 2 x 3
  name  band    plays 
  <chr> <chr>   <chr> 
1 John  Beatles guitar
2 Paul  Beatles bass  
band_members %>% inner_join(band_instruments)
# A tibble: 2 x 3
  name  band    plays 
  <chr> <chr>   <chr> 
1 John  Beatles guitar
2 Paul  Beatles bass  

left_join()

left_join(band_members,band_instruments)
# A tibble: 3 x 3
  name  band    plays 
  <chr> <chr>   <chr> 
1 Mick  Stones  <NA>  
2 John  Beatles guitar
3 Paul  Beatles bass  

right_join()

right_join(band_members,band_instruments)
# A tibble: 3 x 3
  name  band    plays 
  <chr> <chr>   <chr> 
1 John  Beatles guitar
2 Paul  Beatles bass  
3 Keith <NA>    guitar

full_join()

full_join(band_members,band_instruments)
# A tibble: 4 x 3
  name  band    plays 
  <chr> <chr>   <chr> 
1 Mick  Stones  <NA>  
2 John  Beatles guitar
3 Paul  Beatles bass  
4 Keith <NA>    guitar

semi_join()

semi_join(band_members,band_instruments)
# A tibble: 2 x 2
  name  band   
  <chr> <chr>  
1 John  Beatles
2 Paul  Beatles

anti_join()

anti_join(band_members,band_instruments)
# A tibble: 1 x 2
  name  band  
  <chr> <chr> 
1 Mick  Stones

full_join()

full_join(band_members, band_instruments2, by = c("name" = "artist"))
# A tibble: 4 x 3
  name  band    plays 
  <chr> <chr>   <chr> 
1 Mick  Stones  <NA>  
2 John  Beatles guitar
3 Paul  Beatles bass  
4 Keith <NA>    guitar