Basic Query with R

Deklarasi Package

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.4.0      v purrr   0.3.5 
## v tibble  3.1.8      v dplyr   1.0.10
## v tidyr   1.2.1      v stringr 1.4.1 
## v readr   2.1.3      v forcats 0.5.2 
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(RSQLite)
library(DBI)

nw <- DBI::dbConnect(RSQLite::SQLite(),"C:/sqlite/db/Northwind_large.sqlite")
class(nw)
## [1] "SQLiteConnection"
## attr(,"package")
## [1] "RSQLite"

melihat tabel yang ada di database nw

RSQLite::dbListTables(nw)
##  [1] "Category"             "Customer"             "CustomerCustomerDemo"
##  [4] "CustomerDemographic"  "Employee"             "EmployeeTerritory"   
##  [7] "Order"                "OrderDetail"          "Product"             
## [10] "ProductDetails_V"     "Region"               "Shipper"             
## [13] "Supplier"             "Territory"
prod <- dplyr::tbl(nw,"Product")
colnames(prod)
##  [1] "Id"              "ProductName"     "SupplierId"      "CategoryId"     
##  [5] "QuantityPerUnit" "UnitPrice"       "UnitsInStock"    "UnitsOnOrder"   
##  [9] "ReorderLevel"    "Discontinued"
prod
## # Source:   table<Product> [?? x 10]
## # Database: sqlite 3.40.1 [C:\sqlite\db\Northwind_large.sqlite]
##       Id Produ~1 Suppl~2 Categ~3 Quant~4 UnitP~5 Units~6 Units~7 Reord~8 Disco~9
##    <int> <chr>     <int>   <int> <chr>     <dbl>   <int>   <int>   <int>   <int>
##  1     1 Chai          1       1 10 box~    18        39       0      10       0
##  2     2 Chang         1       1 24 - 1~    19        17      40      25       0
##  3     3 Anisee~       1       2 12 - 5~    10        13      70      25       0
##  4     4 Chef A~       2       2 48 - 6~    22        53       0       0       0
##  5     5 Chef A~       2       2 36 box~    21.4       0       0       0       1
##  6     6 Grandm~       3       2 12 - 8~    25       120       0      25       0
##  7     7 Uncle ~       3       7 12 - 1~    30        15       0      10       0
##  8     8 Northw~       3       2 12 - 1~    40         6       0       0       0
##  9     9 Mishi ~       4       6 18 - 5~    97        29       0       0       1
## 10    10 Ikura         4       8 12 - 2~    31        31       0       0       0
## # ... with more rows, and abbreviated variable names 1: ProductName,
## #   2: SupplierId, 3: CategoryId, 4: QuantityPerUnit, 5: UnitPrice,
## #   6: UnitsInStock, 7: UnitsOnOrder, 8: ReorderLevel, 9: Discontinued
sup <-  dplyr::tbl(nw,"Supplier")
colnames(sup)
##  [1] "Id"           "CompanyName"  "ContactName"  "ContactTitle" "Address"     
##  [6] "City"         "Region"       "PostalCode"   "Country"      "Phone"       
## [11] "Fax"          "HomePage"
sup 
## # Source:   table<Supplier> [?? x 12]
## # Database: sqlite 3.40.1 [C:\sqlite\db\Northwind_large.sqlite]
##       Id CompanyName  Conta~1 Conta~2 Address City  Region Posta~3 Country Phone
##    <int> <chr>        <chr>   <chr>   <chr>   <chr> <chr>  <chr>   <chr>   <chr>
##  1     1 Exotic Liqu~ Charlo~ Purcha~ 49 Gil~ Lond~ Briti~ EC1 4SD UK      (171~
##  2     2 New Orleans~ Shelle~ Order ~ P.O. B~ New ~ North~ 70117   USA     (100~
##  3     3 Grandma Kel~ Regina~ Sales ~ 707 Ox~ Ann ~ North~ 48104   USA     (313~
##  4     4 Tokyo Trade~ Yoshi ~ Market~ 9-8 Se~ Tokyo Easte~ 100     Japan   (03)~
##  5     5 Cooperativa~ Antoni~ Export~ Calle ~ Ovie~ South~ 33007   Spain   (98)~
##  6     6 Mayumi's     Mayumi~ Market~ 92 Set~ Osaka Easte~ 545     Japan   (06)~
##  7     7 Pavlova, Lt~ Ian De~ Market~ 74 Ros~ Melb~ Victo~ 3058    Austra~ (03)~
##  8     8 Specialty B~ Peter ~ Sales ~ 29 Kin~ Manc~ Briti~ M14 GSD UK      (161~
##  9     9 PB Knäckebr~ Lars P~ Sales ~ Kaload~ Göte~ North~ S-345 ~ Sweden  031-~
## 10    10 Refrescos A~ Carlos~ Market~ Av. da~ Sao ~ South~ 5442    Brazil  (11)~
## # ... with more rows, 2 more variables: Fax <chr>, HomePage <chr>, and
## #   abbreviated variable names 1: ContactName, 2: ContactTitle, 3: PostalCode

Inner Joint

inner <- inner_join(prod,sup, by = c("SupplierId" = "Id"))
colnames(inner)
##  [1] "Id"              "ProductName"     "SupplierId"      "CategoryId"     
##  [5] "QuantityPerUnit" "UnitPrice"       "UnitsInStock"    "UnitsOnOrder"   
##  [9] "ReorderLevel"    "Discontinued"    "CompanyName"     "ContactName"    
## [13] "ContactTitle"    "Address"         "City"            "Region"         
## [17] "PostalCode"      "Country"         "Phone"           "Fax"            
## [21] "HomePage"
inner
## # Source:   SQL [?? x 21]
## # Database: sqlite 3.40.1 [C:\sqlite\db\Northwind_large.sqlite]
##       Id Produ~1 Suppl~2 Categ~3 Quant~4 UnitP~5 Units~6 Units~7 Reord~8 Disco~9
##    <int> <chr>     <int>   <int> <chr>     <dbl>   <int>   <int>   <int>   <int>
##  1     1 Chai          1       1 10 box~    18        39       0      10       0
##  2     2 Chang         1       1 24 - 1~    19        17      40      25       0
##  3     3 Anisee~       1       2 12 - 5~    10        13      70      25       0
##  4     4 Chef A~       2       2 48 - 6~    22        53       0       0       0
##  5     5 Chef A~       2       2 36 box~    21.4       0       0       0       1
##  6     6 Grandm~       3       2 12 - 8~    25       120       0      25       0
##  7     7 Uncle ~       3       7 12 - 1~    30        15       0      10       0
##  8     8 Northw~       3       2 12 - 1~    40         6       0       0       0
##  9     9 Mishi ~       4       6 18 - 5~    97        29       0       0       1
## 10    10 Ikura         4       8 12 - 2~    31        31       0       0       0
## # ... with more rows, 11 more variables: CompanyName <chr>, ContactName <chr>,
## #   ContactTitle <chr>, Address <chr>, City <chr>, Region <chr>,
## #   PostalCode <chr>, Country <chr>, Phone <chr>, Fax <chr>, HomePage <chr>,
## #   and abbreviated variable names 1: ProductName, 2: SupplierId,
## #   3: CategoryId, 4: QuantityPerUnit, 5: UnitPrice, 6: UnitsInStock,
## #   7: UnitsOnOrder, 8: ReorderLevel, 9: Discontinued

Left Joint

left <- left_join(prod,sup)
## Joining, by = "Id"
colnames(left)
##  [1] "Id"              "ProductName"     "SupplierId"      "CategoryId"     
##  [5] "QuantityPerUnit" "UnitPrice"       "UnitsInStock"    "UnitsOnOrder"   
##  [9] "ReorderLevel"    "Discontinued"    "CompanyName"     "ContactName"    
## [13] "ContactTitle"    "Address"         "City"            "Region"         
## [17] "PostalCode"      "Country"         "Phone"           "Fax"            
## [21] "HomePage"
left
## # Source:   SQL [?? x 21]
## # Database: sqlite 3.40.1 [C:\sqlite\db\Northwind_large.sqlite]
##       Id Produ~1 Suppl~2 Categ~3 Quant~4 UnitP~5 Units~6 Units~7 Reord~8 Disco~9
##    <int> <chr>     <int>   <int> <chr>     <dbl>   <int>   <int>   <int>   <int>
##  1     1 Chai          1       1 10 box~    18        39       0      10       0
##  2     2 Chang         1       1 24 - 1~    19        17      40      25       0
##  3     3 Anisee~       1       2 12 - 5~    10        13      70      25       0
##  4     4 Chef A~       2       2 48 - 6~    22        53       0       0       0
##  5     5 Chef A~       2       2 36 box~    21.4       0       0       0       1
##  6     6 Grandm~       3       2 12 - 8~    25       120       0      25       0
##  7     7 Uncle ~       3       7 12 - 1~    30        15       0      10       0
##  8     8 Northw~       3       2 12 - 1~    40         6       0       0       0
##  9     9 Mishi ~       4       6 18 - 5~    97        29       0       0       1
## 10    10 Ikura         4       8 12 - 2~    31        31       0       0       0
## # ... with more rows, 11 more variables: CompanyName <chr>, ContactName <chr>,
## #   ContactTitle <chr>, Address <chr>, City <chr>, Region <chr>,
## #   PostalCode <chr>, Country <chr>, Phone <chr>, Fax <chr>, HomePage <chr>,
## #   and abbreviated variable names 1: ProductName, 2: SupplierId,
## #   3: CategoryId, 4: QuantityPerUnit, 5: UnitPrice, 6: UnitsInStock,
## #   7: UnitsOnOrder, 8: ReorderLevel, 9: Discontinued

Right Joint

right <- right_join(prod,sup, by = c("SupplierId" = "Id"))
colnames(right)
##  [1] "Id"              "ProductName"     "SupplierId"      "CategoryId"     
##  [5] "QuantityPerUnit" "UnitPrice"       "UnitsInStock"    "UnitsOnOrder"   
##  [9] "ReorderLevel"    "Discontinued"    "CompanyName"     "ContactName"    
## [13] "ContactTitle"    "Address"         "City"            "Region"         
## [17] "PostalCode"      "Country"         "Phone"           "Fax"            
## [21] "HomePage"
right
## # Source:   SQL [?? x 21]
## # Database: sqlite 3.40.1 [C:\sqlite\db\Northwind_large.sqlite]
##       Id Produ~1 Suppl~2 Categ~3 Quant~4 UnitP~5 Units~6 Units~7 Reord~8 Disco~9
##    <int> <chr>     <int>   <int> <chr>     <dbl>   <int>   <int>   <int>   <int>
##  1     3 Anisee~       1       2 12 - 5~    10        13      70      25       0
##  2     1 Chai          1       1 10 box~    18        39       0      10       0
##  3     2 Chang         1       1 24 - 1~    19        17      40      25       0
##  4     4 Chef A~       2       2 48 - 6~    22        53       0       0       0
##  5     5 Chef A~       2       2 36 box~    21.4       0       0       0       1
##  6    65 Louisi~       2       2 32 - 8~    21.0      76       0       0       0
##  7    66 Louisi~       2       2 24 - 8~    17         4     100      20       0
##  8     6 Grandm~       3       2 12 - 8~    25       120       0      25       0
##  9     8 Northw~       3       2 12 - 1~    40         6       0       0       0
## 10     7 Uncle ~       3       7 12 - 1~    30        15       0      10       0
## # ... with more rows, 11 more variables: CompanyName <chr>, ContactName <chr>,
## #   ContactTitle <chr>, Address <chr>, City <chr>, Region <chr>,
## #   PostalCode <chr>, Country <chr>, Phone <chr>, Fax <chr>, HomePage <chr>,
## #   and abbreviated variable names 1: ProductName, 2: SupplierId,
## #   3: CategoryId, 4: QuantityPerUnit, 5: UnitPrice, 6: UnitsInStock,
## #   7: UnitsOnOrder, 8: ReorderLevel, 9: Discontinued

Full Joint

full <- full_join(prod,sup, by = c("SupplierId" = "Id"))
colnames(full)
##  [1] "Id"              "ProductName"     "SupplierId"      "CategoryId"     
##  [5] "QuantityPerUnit" "UnitPrice"       "UnitsInStock"    "UnitsOnOrder"   
##  [9] "ReorderLevel"    "Discontinued"    "CompanyName"     "ContactName"    
## [13] "ContactTitle"    "Address"         "City"            "Region"         
## [17] "PostalCode"      "Country"         "Phone"           "Fax"            
## [21] "HomePage"
full
## # Source:   SQL [?? x 21]
## # Database: sqlite 3.40.1 [C:\sqlite\db\Northwind_large.sqlite]
##       Id Produ~1 Suppl~2 Categ~3 Quant~4 UnitP~5 Units~6 Units~7 Reord~8 Disco~9
##    <int> <chr>     <int>   <int> <chr>     <dbl>   <int>   <int>   <int>   <int>
##  1     1 Chai          1       1 10 box~    18        39       0      10       0
##  2     2 Chang         1       1 24 - 1~    19        17      40      25       0
##  3     3 Anisee~       1       2 12 - 5~    10        13      70      25       0
##  4     4 Chef A~       2       2 48 - 6~    22        53       0       0       0
##  5     5 Chef A~       2       2 36 box~    21.4       0       0       0       1
##  6     6 Grandm~       3       2 12 - 8~    25       120       0      25       0
##  7     7 Uncle ~       3       7 12 - 1~    30        15       0      10       0
##  8     8 Northw~       3       2 12 - 1~    40         6       0       0       0
##  9     9 Mishi ~       4       6 18 - 5~    97        29       0       0       1
## 10    10 Ikura         4       8 12 - 2~    31        31       0       0       0
## # ... with more rows, 11 more variables: CompanyName <chr>, ContactName <chr>,
## #   ContactTitle <chr>, Address <chr>, City <chr>, Region <chr>,
## #   PostalCode <chr>, Country <chr>, Phone <chr>, Fax <chr>, HomePage <chr>,
## #   and abbreviated variable names 1: ProductName, 2: SupplierId,
## #   3: CategoryId, 4: QuantityPerUnit, 5: UnitPrice, 6: UnitsInStock,
## #   7: UnitsOnOrder, 8: ReorderLevel, 9: Discontinued

Semi Joint

semi <- semi_join(prod,sup, by = c("SupplierId" = "Id"))
colnames(semi)
##  [1] "Id"              "ProductName"     "SupplierId"      "CategoryId"     
##  [5] "QuantityPerUnit" "UnitPrice"       "UnitsInStock"    "UnitsOnOrder"   
##  [9] "ReorderLevel"    "Discontinued"
semi
## # Source:   SQL [?? x 10]
## # Database: sqlite 3.40.1 [C:\sqlite\db\Northwind_large.sqlite]
##       Id Produ~1 Suppl~2 Categ~3 Quant~4 UnitP~5 Units~6 Units~7 Reord~8 Disco~9
##    <int> <chr>     <int>   <int> <chr>     <dbl>   <int>   <int>   <int>   <int>
##  1     1 Chai          1       1 10 box~    18        39       0      10       0
##  2     2 Chang         1       1 24 - 1~    19        17      40      25       0
##  3     3 Anisee~       1       2 12 - 5~    10        13      70      25       0
##  4     4 Chef A~       2       2 48 - 6~    22        53       0       0       0
##  5     5 Chef A~       2       2 36 box~    21.4       0       0       0       1
##  6     6 Grandm~       3       2 12 - 8~    25       120       0      25       0
##  7     7 Uncle ~       3       7 12 - 1~    30        15       0      10       0
##  8     8 Northw~       3       2 12 - 1~    40         6       0       0       0
##  9     9 Mishi ~       4       6 18 - 5~    97        29       0       0       1
## 10    10 Ikura         4       8 12 - 2~    31        31       0       0       0
## # ... with more rows, and abbreviated variable names 1: ProductName,
## #   2: SupplierId, 3: CategoryId, 4: QuantityPerUnit, 5: UnitPrice,
## #   6: UnitsInStock, 7: UnitsOnOrder, 8: ReorderLevel, 9: Discontinued

Anti Joint

anti <- anti_join(prod,sup, by = c("SupplierId" = "Id"))
colnames(anti)
##  [1] "Id"              "ProductName"     "SupplierId"      "CategoryId"     
##  [5] "QuantityPerUnit" "UnitPrice"       "UnitsInStock"    "UnitsOnOrder"   
##  [9] "ReorderLevel"    "Discontinued"
anti
## # Source:   SQL [0 x 10]
## # Database: sqlite 3.40.1 [C:\sqlite\db\Northwind_large.sqlite]
## # ... with 10 variables: Id <int>, ProductName <chr>, SupplierId <int>,
## #   CategoryId <int>, QuantityPerUnit <chr>, UnitPrice <dbl>,
## #   UnitsInStock <int>, UnitsOnOrder <int>, ReorderLevel <int>,
## #   Discontinued <int>