A. Akses Database Company

1. Engine SQL

db<-DBI::dbConnect(RSQLite::SQLite(), "C:/MANAJEMEN DATA RELASIONAL/company.db")

Memilih Semua Kolom dengan Filter Tertentu

SELECT
  *
FROM
  employee
WHERE
  SALARY > 30000;
4 records
Fname Minit Lname Ssn Bdate Address Sex Salary Super_ssn Dno
Franklin T Wong 333445555 1965-12-08 638 Voss, Houston TX M 40000 888665555 5
Jennifer S Wallace 987654321 1941-06-20 291 Berry, Bellaire TX F 43000 888665555 4
Ramesh K Narayan 666884444 1962-09-15 975 Fire Oak, Humble TX M 38000 333445555 5
James E Borg 888665555 1937-11-10 450 Stone, Houston TX M 55000 NA 1

2. Menggunakan DPLYR

Fungsi dapat dijalankankan tanpa memanggil package dengan perintah library() atau require(), jika syntax yang ditulis menyertakan nama package tempat fungsi tersebut berasal.

Untuk menjalankan syntax berikut ini, kita perlu mengunduh Database company.db dan menyimpannya pada C:/sqlite/

company<-DBI::dbConnect(RSQLite::SQLite(), "C:/sqlite/company.db")
class(company)
## [1] "SQLiteConnection"
## attr(,"package")
## [1] "RSQLite"

Selanjutnya, fungsi dbListTables dapat digunakan untuk melihat tables yang terdapat di dalam database.

RSQLite::dbListTables(company)
## [1] "DEPARTMENT"     "DEPENDENT"      "DEPT_LOCATIONS" "EMPLOYEE"      
## [5] "PROJECT"        "WORKS_ON"
dplyr::tbl(company, "works_on")
## # Source:   table<works_on> [?? x 3]
## # Database: sqlite 3.37.0 [C:\sqlite\company.db]
##    Essn        Pno Hours
##    <chr>     <int> <dbl>
##  1 123456789     1  32.5
##  2 123456789     2   7.5
##  3 666884444     3  40  
##  4 453453453     1  20  
##  5 453453453     2  20  
##  6 333445555     2  10  
##  7 333445555     3  10  
##  8 333445555    10  10  
##  9 333445555    20  10  
## 10 999887777    30  30  
## # ... with more rows
works_on <- dplyr::tbl(company, "works_on")
class("works_on")
## [1] "character"
works_on
## # Source:   table<works_on> [?? x 3]
## # Database: sqlite 3.37.0 [C:\sqlite\company.db]
##    Essn        Pno Hours
##    <chr>     <int> <dbl>
##  1 123456789     1  32.5
##  2 123456789     2   7.5
##  3 666884444     3  40  
##  4 453453453     1  20  
##  5 453453453     2  20  
##  6 333445555     2  10  
##  7 333445555     3  10  
##  8 333445555    10  10  
##  9 333445555    20  10  
## 10 999887777    30  30  
## # ... with more rows
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
q<-works_on %>%
  select(Essn, Pno, Hours) %>%
  filter(Hours>10)
q
## # Source:   lazy query [?? x 3]
## # Database: sqlite 3.37.0 [C:\sqlite\company.db]
##   Essn        Pno Hours
##   <chr>     <int> <dbl>
## 1 123456789     1  32.5
## 2 666884444     3  40  
## 3 453453453     1  20  
## 4 453453453     2  20  
## 5 999887777    30  30  
## 6 987987987    10  35  
## 7 987654321    30  20  
## 8 987654321    20  15  
## 9 888665555    20  16
dplyr::show_query(q)
## <SQL>
## SELECT *
## FROM (SELECT `Essn`, `Pno`, `Hours`
## FROM `works_on`)
## WHERE (`Hours` > 10.0)

B. Data Wrangling

1. Datasets ChickWeight

Load package jika sudah diinstall:

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.5     v stringr 1.4.0
## v tidyr   1.1.4     v forcats 0.5.1
## v readr   2.1.0
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'stringr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Untuk melihat isi dari package, kita dapat menggunakan perintah help() atau dengan menuliskan ?? yang diikuti dengan keyword yang akan dicari.

?tidyverse
## starting httpd help server ... done

Digunakan data ChickWeight dari package datasets

library(datasets)
data("ChickWeight")
ChickWeight<-tibble::as_tibble(ChickWeight)
class(ChickWeight)
## [1] "tbl_df"     "tbl"        "data.frame"
View(ChickWeight)
glimpse(ChickWeight)
## Rows: 578
## Columns: 4
## $ weight <dbl> 42, 51, 59, 64, 76, 93, 106, 125, 149, 171, 199, 205, 40, 49, 5~
## $ Time   <dbl> 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 21, 0, 2, 4, 6, 8, 10, 1~
## $ Chick  <ord> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, ~
## $ Diet   <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~

2. Penggunaan masing-masing fungsi summarise(), arrange(), filter(), mutate(), select()

Menggunaan fungsi summarise() untuk meringkas data.

ChickWeight %>% 
                group_by(Diet) %>% 
                summarise(mean_weight=mean(weight), .groups='drop')
## # A tibble: 4 x 2
##   Diet  mean_weight
##   <fct>       <dbl>
## 1 1            103.
## 2 2            123.
## 3 3            143.
## 4 4            135.

Menggunakan fungsi arrange() untuk mengurutkan data.

ChickWeight %>% 
                arrange(weight)
## # A tibble: 578 x 4
##    weight  Time Chick Diet 
##     <dbl> <dbl> <ord> <fct>
##  1     35     2 18    1    
##  2     39     2 3     1    
##  3     39     0 18    1    
##  4     39     0 27    2    
##  5     39     0 28    2    
##  6     39     0 29    2    
##  7     39     0 33    3    
##  8     39     0 36    3    
##  9     39     0 48    4    
## 10     40     0 2     1    
## # ... with 568 more rows

Menggunakan fungsi filter() untuk memilih sebagian data berdasarkan nilai tertentu.

ChickWeight %>% 
                filter(weight>100)
## # A tibble: 300 x 4
##    weight  Time Chick Diet 
##     <dbl> <dbl> <ord> <fct>
##  1    106    12 1     1    
##  2    125    14 1     1    
##  3    149    16 1     1    
##  4    171    18 1     1    
##  5    199    20 1     1    
##  6    205    21 1     1    
##  7    103    10 2     1    
##  8    122    12 2     1    
##  9    138    14 2     1    
## 10    162    16 2     1    
## # ... with 290 more rows

Menggunakan fungsi mutate() untuk menambahkan peubah baru pada data.

ChickWeight %>% 
                mutate(weight2=weight*2)
## # A tibble: 578 x 5
##    weight  Time Chick Diet  weight2
##     <dbl> <dbl> <ord> <fct>   <dbl>
##  1     42     0 1     1          84
##  2     51     2 1     1         102
##  3     59     4 1     1         118
##  4     64     6 1     1         128
##  5     76     8 1     1         152
##  6     93    10 1     1         186
##  7    106    12 1     1         212
##  8    125    14 1     1         250
##  9    149    16 1     1         298
## 10    171    18 1     1         342
## # ... with 568 more rows

Fungsi select() dapat digunakan untuk memilih subset data berdasarkan peubah tertentu.

ChickWeight %>% 
                select(weight, Time, Chick)
## # A tibble: 578 x 3
##    weight  Time Chick
##     <dbl> <dbl> <ord>
##  1     42     0 1    
##  2     51     2 1    
##  3     59     4 1    
##  4     64     6 1    
##  5     76     8 1    
##  6     93    10 1    
##  7    106    12 1    
##  8    125    14 1    
##  9    149    16 1    
## 10    171    18 1    
## # ... with 568 more rows

3. Fungsi select() dan mutate() digunakan bersama-sama

ChickWeight %>% 
                select(weight, Time, Chick) %>% 
                mutate(weight2=weight*2)
## # A tibble: 578 x 4
##    weight  Time Chick weight2
##     <dbl> <dbl> <ord>   <dbl>
##  1     42     0 1          84
##  2     51     2 1         102
##  3     59     4 1         118
##  4     64     6 1         128
##  5     76     8 1         152
##  6     93    10 1         186
##  7    106    12 1         212
##  8    125    14 1         250
##  9    149    16 1         298
## 10    171    18 1         342
## # ... with 568 more rows
ChickWeight_baru1 <- ChickWeight %>% 
                select(weight, Time, Chick) %>% 
                mutate(weight2=weight*2)
ChickWeight_baru1
## # A tibble: 578 x 4
##    weight  Time Chick weight2
##     <dbl> <dbl> <ord>   <dbl>
##  1     42     0 1          84
##  2     51     2 1         102
##  3     59     4 1         118
##  4     64     6 1         128
##  5     76     8 1         152
##  6     93    10 1         186
##  7    106    12 1         212
##  8    125    14 1         250
##  9    149    16 1         298
## 10    171    18 1         342
## # ... with 568 more rows

3. Fungsi select(), mutate(), arrange(), filter(), summarise() digunakan bersama-sama

ChickWeight %>% 
                select(Diet, weight) %>% 
                mutate(weight2=weight*2) %>% 
                arrange(weight2) %>% 
                filter(weight2>100) %>% 
                group_by(Diet) %>% summarise(mean_weight=mean(weight2), .groups='drop')
## # A tibble: 4 x 2
##   Diet  mean_weight
##   <fct>       <dbl>
## 1 1            230.
## 2 2            271.
## 3 3            319.
## 4 4            293.
ChickWeight_baru2 <- ChickWeight %>% 
                select(Diet, weight) %>% 
                mutate(weight2=weight*2) %>% 
                arrange(weight2) %>% 
                filter(weight2>100) %>% 
                group_by(Diet) %>% summarise(mean_weight=mean(weight2), .groups='drop')
ChickWeight_baru2
## # A tibble: 4 x 2
##   Diet  mean_weight
##   <fct>       <dbl>
## 1 1            230.
## 2 2            271.
## 3 3            319.
## 4 4            293.