A(Akses Database)

1A(Engine SQL)

chinook<-DBI::dbConnect(RSQLite::SQLite(), "D:/Ica/Mandarel/chinook.db")
class(chinook)
## [1] "SQLiteConnection"
## attr(,"package")
## [1] "RSQLite"
RSQLite::dbListTables(chinook)
##  [1] "albums"          "artists"         "customers"       "employees"      
##  [5] "genres"          "invoice_items"   "invoices"        "media_types"    
##  [9] "playlist_track"  "playlists"       "sqlite_sequence" "sqlite_stat1"   
## [13] "tracks"

2A(DPLYR)

chinook<-DBI::dbConnect(RSQLite::SQLite(), "D:/Ica/Mandarel/chinook.db")
class(chinook)
## [1] "SQLiteConnection"
## attr(,"package")
## [1] "RSQLite"
RSQLite::dbListTables(chinook)
##  [1] "albums"          "artists"         "customers"       "employees"      
##  [5] "genres"          "invoice_items"   "invoices"        "media_types"    
##  [9] "playlist_track"  "playlists"       "sqlite_sequence" "sqlite_stat1"   
## [13] "tracks"
dplyr::tbl(chinook, "invoice_items")
## # Source:   table<invoice_items> [?? x 5]
## # Database: sqlite 3.37.0 [D:\ICA\Mandarel\chinook.db]
##    InvoiceLineId InvoiceId TrackId UnitPrice Quantity
##            <int>     <int>   <int>     <dbl>    <int>
##  1             1         1       2      0.99        1
##  2             2         1       4      0.99        1
##  3             3         2       6      0.99        1
##  4             4         2       8      0.99        1
##  5             5         2      10      0.99        1
##  6             6         2      12      0.99        1
##  7             7         3      16      0.99        1
##  8             8         3      20      0.99        1
##  9             9         3      24      0.99        1
## 10            10         3      28      0.99        1
## # ... with more rows
invoice_items<-dplyr::tbl(chinook, "invoice_items")
class("invoice_items")
## [1] "character"
invoice_items
## # Source:   table<invoice_items> [?? x 5]
## # Database: sqlite 3.37.0 [D:\ICA\Mandarel\chinook.db]
##    InvoiceLineId InvoiceId TrackId UnitPrice Quantity
##            <int>     <int>   <int>     <dbl>    <int>
##  1             1         1       2      0.99        1
##  2             2         1       4      0.99        1
##  3             3         2       6      0.99        1
##  4             4         2       8      0.99        1
##  5             5         2      10      0.99        1
##  6             6         2      12      0.99        1
##  7             7         3      16      0.99        1
##  8             8         3      20      0.99        1
##  9             9         3      24      0.99        1
## 10            10         3      28      0.99        1
## # ... with more rows
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data1 <- invoice_items %>%
  select(InvoiceLineId, InvoiceId, TrackId, UnitPrice, Quantity) %>%
  filter(TrackId>10)
data1
## # Source:   lazy query [?? x 5]
## # Database: sqlite 3.37.0 [D:\ICA\Mandarel\chinook.db]
##    InvoiceLineId InvoiceId TrackId UnitPrice Quantity
##            <int>     <int>   <int>     <dbl>    <int>
##  1             6         2      12      0.99        1
##  2           582       108      13      0.99        1
##  3          1156       214      14      0.99        1
##  4          1730       319      15      0.99        1
##  5             7         3      16      0.99        1
##  6           583       109      19      0.99        1
##  7             8         3      20      0.99        1
##  8          1157       214      20      0.99        1
##  9          1731       319      21      0.99        1
## 10             9         3      24      0.99        1
## # ... with more rows
dplyr::show_query(data1)
## <SQL>
## SELECT *
## FROM (SELECT `InvoiceLineId`, `InvoiceId`, `TrackId`, `UnitPrice`, `Quantity`
## FROM `invoice_items`)
## WHERE (`TrackId` > 10.0)

B(Data Wrangling)

library(tidyverse)
library(datasets)
data(esoph)

esoph<-tibble::as_tibble(esoph)
class(esoph)
glimpse(esoph)

Summarise()

esoph %>% group_by(agegp) %>% summarise(avg_ncontrols=mean(ncontrols))
## # A tibble: 6 x 2
##   agegp avg_ncontrols
##   <ord>         <dbl>
## 1 25-34          7.67
## 2 35-44         12.7 
## 3 45-54         10.4 
## 4 55-64         10.4 
## 5 65-74          7.07
## 6 75+            2.82

Arrange()

esoph %>% arrange(desc(agegp))
##    agegp     alcgp    tobgp ncases ncontrols
## 1    75+ 0-39g/day 0-9g/day      1        17
## 2    75+ 0-39g/day    10-19      2         4
## 3    75+ 0-39g/day      30+      1         2
## 4    75+     40-79 0-9g/day      2         3
## 5    75+     40-79    10-19      1         2
## 6    75+     40-79    20-29      0         3
## 7    75+     40-79      30+      1         0
## 8    75+    80-119 0-9g/day      1         0
## 9    75+    80-119    10-19      1         0
## 10   75+      120+ 0-9g/day      2         0
## 11   75+      120+    10-19      1         0
## 12 65-74 0-39g/day 0-9g/day      5        43
## 13 65-74 0-39g/day    10-19      4        10
## 14 65-74 0-39g/day    20-29      2         5
## 15 65-74 0-39g/day      30+      0         2
## 16 65-74     40-79 0-9g/day     17        17
## 17 65-74     40-79    10-19      3         7
## 18 65-74     40-79    20-29      5         4
## 19 65-74    80-119 0-9g/day      6         7
## 20 65-74    80-119    10-19      4         8
## 21 65-74    80-119    20-29      2         1
## 22 65-74    80-119      30+      1         0
## 23 65-74      120+ 0-9g/day      3         1
## 24 65-74      120+    10-19      1         1
## 25 65-74      120+    20-29      1         0
## 26 65-74      120+      30+      1         0
## 27 55-64 0-39g/day 0-9g/day      2        47
## 28 55-64 0-39g/day    10-19      3        19
## 29 55-64 0-39g/day    20-29      3         9
## 30 55-64 0-39g/day      30+      4         2
## 31 55-64     40-79 0-9g/day      9        31
## 32 55-64     40-79    10-19      6        15
## 33 55-64     40-79    20-29      4        13
## 34 55-64     40-79      30+      3         3
## 35 55-64    80-119 0-9g/day      9         9
## 36 55-64    80-119    10-19      8         7
## 37 55-64    80-119    20-29      3         3
## 38 55-64    80-119      30+      4         0
## 39 55-64      120+ 0-9g/day      5         5
## 40 55-64      120+    10-19      6         1
## 41 55-64      120+    20-29      2         1
## 42 55-64      120+      30+      5         1
## 43 45-54 0-39g/day 0-9g/day      1        45
## 44 45-54 0-39g/day    10-19      0        18
## 45 45-54 0-39g/day    20-29      0        10
## 46 45-54 0-39g/day      30+      0         4
## 47 45-54     40-79 0-9g/day      6        32
## 48 45-54     40-79    10-19      4        17
## 49 45-54     40-79    20-29      5        10
## 50 45-54     40-79      30+      5         2
## 51 45-54    80-119 0-9g/day      3        13
## 52 45-54    80-119    10-19      6         8
## 53 45-54    80-119    20-29      1         4
## 54 45-54    80-119      30+      2         2
## 55 45-54      120+ 0-9g/day      4         0
## 56 45-54      120+    10-19      3         1
## 57 45-54      120+    20-29      2         1
## 58 45-54      120+      30+      4         0
## 59 35-44 0-39g/day 0-9g/day      0        60
## 60 35-44 0-39g/day    10-19      1        13
## 61 35-44 0-39g/day    20-29      0         7
## 62 35-44 0-39g/day      30+      0         8
## 63 35-44     40-79 0-9g/day      0        35
## 64 35-44     40-79    10-19      3        20
## 65 35-44     40-79    20-29      1        13
## 66 35-44     40-79      30+      0         8
## 67 35-44    80-119 0-9g/day      0        11
## 68 35-44    80-119    10-19      0         6
## 69 35-44    80-119    20-29      0         2
## 70 35-44    80-119      30+      0         1
## 71 35-44      120+ 0-9g/day      2         1
## 72 35-44      120+    10-19      0         3
## 73 35-44      120+    20-29      2         2
## 74 25-34 0-39g/day 0-9g/day      0        40
## 75 25-34 0-39g/day    10-19      0        10
## 76 25-34 0-39g/day    20-29      0         6
## 77 25-34 0-39g/day      30+      0         5
## 78 25-34     40-79 0-9g/day      0        27
## 79 25-34     40-79    10-19      0         7
## 80 25-34     40-79    20-29      0         4
## 81 25-34     40-79      30+      0         7
## 82 25-34    80-119 0-9g/day      0         2
## 83 25-34    80-119    10-19      0         1
## 84 25-34    80-119      30+      0         2
## 85 25-34      120+ 0-9g/day      0         1
## 86 25-34      120+    10-19      1         0
## 87 25-34      120+    20-29      0         1
## 88 25-34      120+      30+      0         2

Filter()

esoph %>% filter(ncases==1)
##    agegp     alcgp    tobgp ncases ncontrols
## 1  25-34      120+    10-19      1         0
## 2  35-44 0-39g/day    10-19      1        13
## 3  35-44     40-79    20-29      1        13
## 4  45-54 0-39g/day 0-9g/day      1        45
## 5  45-54    80-119    20-29      1         4
## 6  65-74    80-119      30+      1         0
## 7  65-74      120+    10-19      1         1
## 8  65-74      120+    20-29      1         0
## 9  65-74      120+      30+      1         0
## 10   75+ 0-39g/day 0-9g/day      1        17
## 11   75+ 0-39g/day      30+      1         2
## 12   75+     40-79    10-19      1         2
## 13   75+     40-79      30+      1         0
## 14   75+    80-119 0-9g/day      1         0
## 15   75+    80-119    10-19      1         0
## 16   75+      120+    10-19      1         0

Mutate()

esoph %>% mutate(ngp=ncases)
##    agegp     alcgp    tobgp ncases ncontrols ngp
## 1  25-34 0-39g/day 0-9g/day      0        40   0
## 2  25-34 0-39g/day    10-19      0        10   0
## 3  25-34 0-39g/day    20-29      0         6   0
## 4  25-34 0-39g/day      30+      0         5   0
## 5  25-34     40-79 0-9g/day      0        27   0
## 6  25-34     40-79    10-19      0         7   0
## 7  25-34     40-79    20-29      0         4   0
## 8  25-34     40-79      30+      0         7   0
## 9  25-34    80-119 0-9g/day      0         2   0
## 10 25-34    80-119    10-19      0         1   0
## 11 25-34    80-119      30+      0         2   0
## 12 25-34      120+ 0-9g/day      0         1   0
## 13 25-34      120+    10-19      1         0   1
## 14 25-34      120+    20-29      0         1   0
## 15 25-34      120+      30+      0         2   0
## 16 35-44 0-39g/day 0-9g/day      0        60   0
## 17 35-44 0-39g/day    10-19      1        13   1
## 18 35-44 0-39g/day    20-29      0         7   0
## 19 35-44 0-39g/day      30+      0         8   0
## 20 35-44     40-79 0-9g/day      0        35   0
## 21 35-44     40-79    10-19      3        20   3
## 22 35-44     40-79    20-29      1        13   1
## 23 35-44     40-79      30+      0         8   0
## 24 35-44    80-119 0-9g/day      0        11   0
## 25 35-44    80-119    10-19      0         6   0
## 26 35-44    80-119    20-29      0         2   0
## 27 35-44    80-119      30+      0         1   0
## 28 35-44      120+ 0-9g/day      2         1   2
## 29 35-44      120+    10-19      0         3   0
## 30 35-44      120+    20-29      2         2   2
## 31 45-54 0-39g/day 0-9g/day      1        45   1
## 32 45-54 0-39g/day    10-19      0        18   0
## 33 45-54 0-39g/day    20-29      0        10   0
## 34 45-54 0-39g/day      30+      0         4   0
## 35 45-54     40-79 0-9g/day      6        32   6
## 36 45-54     40-79    10-19      4        17   4
## 37 45-54     40-79    20-29      5        10   5
## 38 45-54     40-79      30+      5         2   5
## 39 45-54    80-119 0-9g/day      3        13   3
## 40 45-54    80-119    10-19      6         8   6
## 41 45-54    80-119    20-29      1         4   1
## 42 45-54    80-119      30+      2         2   2
## 43 45-54      120+ 0-9g/day      4         0   4
## 44 45-54      120+    10-19      3         1   3
## 45 45-54      120+    20-29      2         1   2
## 46 45-54      120+      30+      4         0   4
## 47 55-64 0-39g/day 0-9g/day      2        47   2
## 48 55-64 0-39g/day    10-19      3        19   3
## 49 55-64 0-39g/day    20-29      3         9   3
## 50 55-64 0-39g/day      30+      4         2   4
## 51 55-64     40-79 0-9g/day      9        31   9
## 52 55-64     40-79    10-19      6        15   6
## 53 55-64     40-79    20-29      4        13   4
## 54 55-64     40-79      30+      3         3   3
## 55 55-64    80-119 0-9g/day      9         9   9
## 56 55-64    80-119    10-19      8         7   8
## 57 55-64    80-119    20-29      3         3   3
## 58 55-64    80-119      30+      4         0   4
## 59 55-64      120+ 0-9g/day      5         5   5
## 60 55-64      120+    10-19      6         1   6
## 61 55-64      120+    20-29      2         1   2
## 62 55-64      120+      30+      5         1   5
## 63 65-74 0-39g/day 0-9g/day      5        43   5
## 64 65-74 0-39g/day    10-19      4        10   4
## 65 65-74 0-39g/day    20-29      2         5   2
## 66 65-74 0-39g/day      30+      0         2   0
## 67 65-74     40-79 0-9g/day     17        17  17
## 68 65-74     40-79    10-19      3         7   3
## 69 65-74     40-79    20-29      5         4   5
## 70 65-74    80-119 0-9g/day      6         7   6
## 71 65-74    80-119    10-19      4         8   4
## 72 65-74    80-119    20-29      2         1   2
## 73 65-74    80-119      30+      1         0   1
## 74 65-74      120+ 0-9g/day      3         1   3
## 75 65-74      120+    10-19      1         1   1
## 76 65-74      120+    20-29      1         0   1
## 77 65-74      120+      30+      1         0   1
## 78   75+ 0-39g/day 0-9g/day      1        17   1
## 79   75+ 0-39g/day    10-19      2         4   2
## 80   75+ 0-39g/day      30+      1         2   1
## 81   75+     40-79 0-9g/day      2         3   2
## 82   75+     40-79    10-19      1         2   1
## 83   75+     40-79    20-29      0         3   0
## 84   75+     40-79      30+      1         0   1
## 85   75+    80-119 0-9g/day      1         0   1
## 86   75+    80-119    10-19      1         0   1
## 87   75+      120+ 0-9g/day      2         0   2
## 88   75+      120+    10-19      1         0   1

Select()

esoph %>% select(agegp, tobgp, ncases, ncontrols)
##    agegp    tobgp ncases ncontrols
## 1  25-34 0-9g/day      0        40
## 2  25-34    10-19      0        10
## 3  25-34    20-29      0         6
## 4  25-34      30+      0         5
## 5  25-34 0-9g/day      0        27
## 6  25-34    10-19      0         7
## 7  25-34    20-29      0         4
## 8  25-34      30+      0         7
## 9  25-34 0-9g/day      0         2
## 10 25-34    10-19      0         1
## 11 25-34      30+      0         2
## 12 25-34 0-9g/day      0         1
## 13 25-34    10-19      1         0
## 14 25-34    20-29      0         1
## 15 25-34      30+      0         2
## 16 35-44 0-9g/day      0        60
## 17 35-44    10-19      1        13
## 18 35-44    20-29      0         7
## 19 35-44      30+      0         8
## 20 35-44 0-9g/day      0        35
## 21 35-44    10-19      3        20
## 22 35-44    20-29      1        13
## 23 35-44      30+      0         8
## 24 35-44 0-9g/day      0        11
## 25 35-44    10-19      0         6
## 26 35-44    20-29      0         2
## 27 35-44      30+      0         1
## 28 35-44 0-9g/day      2         1
## 29 35-44    10-19      0         3
## 30 35-44    20-29      2         2
## 31 45-54 0-9g/day      1        45
## 32 45-54    10-19      0        18
## 33 45-54    20-29      0        10
## 34 45-54      30+      0         4
## 35 45-54 0-9g/day      6        32
## 36 45-54    10-19      4        17
## 37 45-54    20-29      5        10
## 38 45-54      30+      5         2
## 39 45-54 0-9g/day      3        13
## 40 45-54    10-19      6         8
## 41 45-54    20-29      1         4
## 42 45-54      30+      2         2
## 43 45-54 0-9g/day      4         0
## 44 45-54    10-19      3         1
## 45 45-54    20-29      2         1
## 46 45-54      30+      4         0
## 47 55-64 0-9g/day      2        47
## 48 55-64    10-19      3        19
## 49 55-64    20-29      3         9
## 50 55-64      30+      4         2
## 51 55-64 0-9g/day      9        31
## 52 55-64    10-19      6        15
## 53 55-64    20-29      4        13
## 54 55-64      30+      3         3
## 55 55-64 0-9g/day      9         9
## 56 55-64    10-19      8         7
## 57 55-64    20-29      3         3
## 58 55-64      30+      4         0
## 59 55-64 0-9g/day      5         5
## 60 55-64    10-19      6         1
## 61 55-64    20-29      2         1
## 62 55-64      30+      5         1
## 63 65-74 0-9g/day      5        43
## 64 65-74    10-19      4        10
## 65 65-74    20-29      2         5
## 66 65-74      30+      0         2
## 67 65-74 0-9g/day     17        17
## 68 65-74    10-19      3         7
## 69 65-74    20-29      5         4
## 70 65-74 0-9g/day      6         7
## 71 65-74    10-19      4         8
## 72 65-74    20-29      2         1
## 73 65-74      30+      1         0
## 74 65-74 0-9g/day      3         1
## 75 65-74    10-19      1         1
## 76 65-74    20-29      1         0
## 77 65-74      30+      1         0
## 78   75+ 0-9g/day      1        17
## 79   75+    10-19      2         4
## 80   75+      30+      1         2
## 81   75+ 0-9g/day      2         3
## 82   75+    10-19      1         2
## 83   75+    20-29      0         3
## 84   75+      30+      1         0
## 85   75+ 0-9g/day      1         0
## 86   75+    10-19      1         0
## 87   75+ 0-9g/day      2         0
## 88   75+    10-19      1         0

Gabung()

esoph %>% select(agegp, tobgp, ncases, ncontrols) %>% arrange(desc(agegp)) %>% filter(ncases==1) %>% mutate(ngp=ncases) %>% summarise(avg_ncontrols=mean(ncontrols))
##   avg_ncontrols
## 1        6.0625