A(Akses Database)
1A(Engine SQL)
chinook<-DBI::dbConnect(RSQLite::SQLite(), "D:/Ica/Mandarel/chinook.db")
class(chinook)
## [1] "SQLiteConnection"
## attr(,"package")
## [1] "RSQLite"
RSQLite::dbListTables(chinook)
## [1] "albums" "artists" "customers" "employees"
## [5] "genres" "invoice_items" "invoices" "media_types"
## [9] "playlist_track" "playlists" "sqlite_sequence" "sqlite_stat1"
## [13] "tracks"
2A(DPLYR)
chinook<-DBI::dbConnect(RSQLite::SQLite(), "D:/Ica/Mandarel/chinook.db")
class(chinook)
## [1] "SQLiteConnection"
## attr(,"package")
## [1] "RSQLite"
RSQLite::dbListTables(chinook)
## [1] "albums" "artists" "customers" "employees"
## [5] "genres" "invoice_items" "invoices" "media_types"
## [9] "playlist_track" "playlists" "sqlite_sequence" "sqlite_stat1"
## [13] "tracks"
dplyr::tbl(chinook, "invoice_items")
## # Source: table<invoice_items> [?? x 5]
## # Database: sqlite 3.37.0 [D:\ICA\Mandarel\chinook.db]
## InvoiceLineId InvoiceId TrackId UnitPrice Quantity
## <int> <int> <int> <dbl> <int>
## 1 1 1 2 0.99 1
## 2 2 1 4 0.99 1
## 3 3 2 6 0.99 1
## 4 4 2 8 0.99 1
## 5 5 2 10 0.99 1
## 6 6 2 12 0.99 1
## 7 7 3 16 0.99 1
## 8 8 3 20 0.99 1
## 9 9 3 24 0.99 1
## 10 10 3 28 0.99 1
## # ... with more rows
invoice_items<-dplyr::tbl(chinook, "invoice_items")
class("invoice_items")
## [1] "character"
invoice_items
## # Source: table<invoice_items> [?? x 5]
## # Database: sqlite 3.37.0 [D:\ICA\Mandarel\chinook.db]
## InvoiceLineId InvoiceId TrackId UnitPrice Quantity
## <int> <int> <int> <dbl> <int>
## 1 1 1 2 0.99 1
## 2 2 1 4 0.99 1
## 3 3 2 6 0.99 1
## 4 4 2 8 0.99 1
## 5 5 2 10 0.99 1
## 6 6 2 12 0.99 1
## 7 7 3 16 0.99 1
## 8 8 3 20 0.99 1
## 9 9 3 24 0.99 1
## 10 10 3 28 0.99 1
## # ... with more rows
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data1 <- invoice_items %>%
select(InvoiceLineId, InvoiceId, TrackId, UnitPrice, Quantity) %>%
filter(TrackId>10)
data1
## # Source: lazy query [?? x 5]
## # Database: sqlite 3.37.0 [D:\ICA\Mandarel\chinook.db]
## InvoiceLineId InvoiceId TrackId UnitPrice Quantity
## <int> <int> <int> <dbl> <int>
## 1 6 2 12 0.99 1
## 2 582 108 13 0.99 1
## 3 1156 214 14 0.99 1
## 4 1730 319 15 0.99 1
## 5 7 3 16 0.99 1
## 6 583 109 19 0.99 1
## 7 8 3 20 0.99 1
## 8 1157 214 20 0.99 1
## 9 1731 319 21 0.99 1
## 10 9 3 24 0.99 1
## # ... with more rows
dplyr::show_query(data1)
## <SQL>
## SELECT *
## FROM (SELECT `InvoiceLineId`, `InvoiceId`, `TrackId`, `UnitPrice`, `Quantity`
## FROM `invoice_items`)
## WHERE (`TrackId` > 10.0)
B(Data Wrangling)
library(tidyverse)
library(datasets)
data(esoph)
esoph<-tibble::as_tibble(esoph)
class(esoph)
glimpse(esoph)
Summarise()
esoph %>% group_by(agegp) %>% summarise(avg_ncontrols=mean(ncontrols))
## # A tibble: 6 x 2
## agegp avg_ncontrols
## <ord> <dbl>
## 1 25-34 7.67
## 2 35-44 12.7
## 3 45-54 10.4
## 4 55-64 10.4
## 5 65-74 7.07
## 6 75+ 2.82
Arrange()
esoph %>% arrange(desc(agegp))
## agegp alcgp tobgp ncases ncontrols
## 1 75+ 0-39g/day 0-9g/day 1 17
## 2 75+ 0-39g/day 10-19 2 4
## 3 75+ 0-39g/day 30+ 1 2
## 4 75+ 40-79 0-9g/day 2 3
## 5 75+ 40-79 10-19 1 2
## 6 75+ 40-79 20-29 0 3
## 7 75+ 40-79 30+ 1 0
## 8 75+ 80-119 0-9g/day 1 0
## 9 75+ 80-119 10-19 1 0
## 10 75+ 120+ 0-9g/day 2 0
## 11 75+ 120+ 10-19 1 0
## 12 65-74 0-39g/day 0-9g/day 5 43
## 13 65-74 0-39g/day 10-19 4 10
## 14 65-74 0-39g/day 20-29 2 5
## 15 65-74 0-39g/day 30+ 0 2
## 16 65-74 40-79 0-9g/day 17 17
## 17 65-74 40-79 10-19 3 7
## 18 65-74 40-79 20-29 5 4
## 19 65-74 80-119 0-9g/day 6 7
## 20 65-74 80-119 10-19 4 8
## 21 65-74 80-119 20-29 2 1
## 22 65-74 80-119 30+ 1 0
## 23 65-74 120+ 0-9g/day 3 1
## 24 65-74 120+ 10-19 1 1
## 25 65-74 120+ 20-29 1 0
## 26 65-74 120+ 30+ 1 0
## 27 55-64 0-39g/day 0-9g/day 2 47
## 28 55-64 0-39g/day 10-19 3 19
## 29 55-64 0-39g/day 20-29 3 9
## 30 55-64 0-39g/day 30+ 4 2
## 31 55-64 40-79 0-9g/day 9 31
## 32 55-64 40-79 10-19 6 15
## 33 55-64 40-79 20-29 4 13
## 34 55-64 40-79 30+ 3 3
## 35 55-64 80-119 0-9g/day 9 9
## 36 55-64 80-119 10-19 8 7
## 37 55-64 80-119 20-29 3 3
## 38 55-64 80-119 30+ 4 0
## 39 55-64 120+ 0-9g/day 5 5
## 40 55-64 120+ 10-19 6 1
## 41 55-64 120+ 20-29 2 1
## 42 55-64 120+ 30+ 5 1
## 43 45-54 0-39g/day 0-9g/day 1 45
## 44 45-54 0-39g/day 10-19 0 18
## 45 45-54 0-39g/day 20-29 0 10
## 46 45-54 0-39g/day 30+ 0 4
## 47 45-54 40-79 0-9g/day 6 32
## 48 45-54 40-79 10-19 4 17
## 49 45-54 40-79 20-29 5 10
## 50 45-54 40-79 30+ 5 2
## 51 45-54 80-119 0-9g/day 3 13
## 52 45-54 80-119 10-19 6 8
## 53 45-54 80-119 20-29 1 4
## 54 45-54 80-119 30+ 2 2
## 55 45-54 120+ 0-9g/day 4 0
## 56 45-54 120+ 10-19 3 1
## 57 45-54 120+ 20-29 2 1
## 58 45-54 120+ 30+ 4 0
## 59 35-44 0-39g/day 0-9g/day 0 60
## 60 35-44 0-39g/day 10-19 1 13
## 61 35-44 0-39g/day 20-29 0 7
## 62 35-44 0-39g/day 30+ 0 8
## 63 35-44 40-79 0-9g/day 0 35
## 64 35-44 40-79 10-19 3 20
## 65 35-44 40-79 20-29 1 13
## 66 35-44 40-79 30+ 0 8
## 67 35-44 80-119 0-9g/day 0 11
## 68 35-44 80-119 10-19 0 6
## 69 35-44 80-119 20-29 0 2
## 70 35-44 80-119 30+ 0 1
## 71 35-44 120+ 0-9g/day 2 1
## 72 35-44 120+ 10-19 0 3
## 73 35-44 120+ 20-29 2 2
## 74 25-34 0-39g/day 0-9g/day 0 40
## 75 25-34 0-39g/day 10-19 0 10
## 76 25-34 0-39g/day 20-29 0 6
## 77 25-34 0-39g/day 30+ 0 5
## 78 25-34 40-79 0-9g/day 0 27
## 79 25-34 40-79 10-19 0 7
## 80 25-34 40-79 20-29 0 4
## 81 25-34 40-79 30+ 0 7
## 82 25-34 80-119 0-9g/day 0 2
## 83 25-34 80-119 10-19 0 1
## 84 25-34 80-119 30+ 0 2
## 85 25-34 120+ 0-9g/day 0 1
## 86 25-34 120+ 10-19 1 0
## 87 25-34 120+ 20-29 0 1
## 88 25-34 120+ 30+ 0 2
Filter()
esoph %>% filter(ncases==1)
## agegp alcgp tobgp ncases ncontrols
## 1 25-34 120+ 10-19 1 0
## 2 35-44 0-39g/day 10-19 1 13
## 3 35-44 40-79 20-29 1 13
## 4 45-54 0-39g/day 0-9g/day 1 45
## 5 45-54 80-119 20-29 1 4
## 6 65-74 80-119 30+ 1 0
## 7 65-74 120+ 10-19 1 1
## 8 65-74 120+ 20-29 1 0
## 9 65-74 120+ 30+ 1 0
## 10 75+ 0-39g/day 0-9g/day 1 17
## 11 75+ 0-39g/day 30+ 1 2
## 12 75+ 40-79 10-19 1 2
## 13 75+ 40-79 30+ 1 0
## 14 75+ 80-119 0-9g/day 1 0
## 15 75+ 80-119 10-19 1 0
## 16 75+ 120+ 10-19 1 0
Mutate()
esoph %>% mutate(ngp=ncases)
## agegp alcgp tobgp ncases ncontrols ngp
## 1 25-34 0-39g/day 0-9g/day 0 40 0
## 2 25-34 0-39g/day 10-19 0 10 0
## 3 25-34 0-39g/day 20-29 0 6 0
## 4 25-34 0-39g/day 30+ 0 5 0
## 5 25-34 40-79 0-9g/day 0 27 0
## 6 25-34 40-79 10-19 0 7 0
## 7 25-34 40-79 20-29 0 4 0
## 8 25-34 40-79 30+ 0 7 0
## 9 25-34 80-119 0-9g/day 0 2 0
## 10 25-34 80-119 10-19 0 1 0
## 11 25-34 80-119 30+ 0 2 0
## 12 25-34 120+ 0-9g/day 0 1 0
## 13 25-34 120+ 10-19 1 0 1
## 14 25-34 120+ 20-29 0 1 0
## 15 25-34 120+ 30+ 0 2 0
## 16 35-44 0-39g/day 0-9g/day 0 60 0
## 17 35-44 0-39g/day 10-19 1 13 1
## 18 35-44 0-39g/day 20-29 0 7 0
## 19 35-44 0-39g/day 30+ 0 8 0
## 20 35-44 40-79 0-9g/day 0 35 0
## 21 35-44 40-79 10-19 3 20 3
## 22 35-44 40-79 20-29 1 13 1
## 23 35-44 40-79 30+ 0 8 0
## 24 35-44 80-119 0-9g/day 0 11 0
## 25 35-44 80-119 10-19 0 6 0
## 26 35-44 80-119 20-29 0 2 0
## 27 35-44 80-119 30+ 0 1 0
## 28 35-44 120+ 0-9g/day 2 1 2
## 29 35-44 120+ 10-19 0 3 0
## 30 35-44 120+ 20-29 2 2 2
## 31 45-54 0-39g/day 0-9g/day 1 45 1
## 32 45-54 0-39g/day 10-19 0 18 0
## 33 45-54 0-39g/day 20-29 0 10 0
## 34 45-54 0-39g/day 30+ 0 4 0
## 35 45-54 40-79 0-9g/day 6 32 6
## 36 45-54 40-79 10-19 4 17 4
## 37 45-54 40-79 20-29 5 10 5
## 38 45-54 40-79 30+ 5 2 5
## 39 45-54 80-119 0-9g/day 3 13 3
## 40 45-54 80-119 10-19 6 8 6
## 41 45-54 80-119 20-29 1 4 1
## 42 45-54 80-119 30+ 2 2 2
## 43 45-54 120+ 0-9g/day 4 0 4
## 44 45-54 120+ 10-19 3 1 3
## 45 45-54 120+ 20-29 2 1 2
## 46 45-54 120+ 30+ 4 0 4
## 47 55-64 0-39g/day 0-9g/day 2 47 2
## 48 55-64 0-39g/day 10-19 3 19 3
## 49 55-64 0-39g/day 20-29 3 9 3
## 50 55-64 0-39g/day 30+ 4 2 4
## 51 55-64 40-79 0-9g/day 9 31 9
## 52 55-64 40-79 10-19 6 15 6
## 53 55-64 40-79 20-29 4 13 4
## 54 55-64 40-79 30+ 3 3 3
## 55 55-64 80-119 0-9g/day 9 9 9
## 56 55-64 80-119 10-19 8 7 8
## 57 55-64 80-119 20-29 3 3 3
## 58 55-64 80-119 30+ 4 0 4
## 59 55-64 120+ 0-9g/day 5 5 5
## 60 55-64 120+ 10-19 6 1 6
## 61 55-64 120+ 20-29 2 1 2
## 62 55-64 120+ 30+ 5 1 5
## 63 65-74 0-39g/day 0-9g/day 5 43 5
## 64 65-74 0-39g/day 10-19 4 10 4
## 65 65-74 0-39g/day 20-29 2 5 2
## 66 65-74 0-39g/day 30+ 0 2 0
## 67 65-74 40-79 0-9g/day 17 17 17
## 68 65-74 40-79 10-19 3 7 3
## 69 65-74 40-79 20-29 5 4 5
## 70 65-74 80-119 0-9g/day 6 7 6
## 71 65-74 80-119 10-19 4 8 4
## 72 65-74 80-119 20-29 2 1 2
## 73 65-74 80-119 30+ 1 0 1
## 74 65-74 120+ 0-9g/day 3 1 3
## 75 65-74 120+ 10-19 1 1 1
## 76 65-74 120+ 20-29 1 0 1
## 77 65-74 120+ 30+ 1 0 1
## 78 75+ 0-39g/day 0-9g/day 1 17 1
## 79 75+ 0-39g/day 10-19 2 4 2
## 80 75+ 0-39g/day 30+ 1 2 1
## 81 75+ 40-79 0-9g/day 2 3 2
## 82 75+ 40-79 10-19 1 2 1
## 83 75+ 40-79 20-29 0 3 0
## 84 75+ 40-79 30+ 1 0 1
## 85 75+ 80-119 0-9g/day 1 0 1
## 86 75+ 80-119 10-19 1 0 1
## 87 75+ 120+ 0-9g/day 2 0 2
## 88 75+ 120+ 10-19 1 0 1
Select()
esoph %>% select(agegp, tobgp, ncases, ncontrols)
## agegp tobgp ncases ncontrols
## 1 25-34 0-9g/day 0 40
## 2 25-34 10-19 0 10
## 3 25-34 20-29 0 6
## 4 25-34 30+ 0 5
## 5 25-34 0-9g/day 0 27
## 6 25-34 10-19 0 7
## 7 25-34 20-29 0 4
## 8 25-34 30+ 0 7
## 9 25-34 0-9g/day 0 2
## 10 25-34 10-19 0 1
## 11 25-34 30+ 0 2
## 12 25-34 0-9g/day 0 1
## 13 25-34 10-19 1 0
## 14 25-34 20-29 0 1
## 15 25-34 30+ 0 2
## 16 35-44 0-9g/day 0 60
## 17 35-44 10-19 1 13
## 18 35-44 20-29 0 7
## 19 35-44 30+ 0 8
## 20 35-44 0-9g/day 0 35
## 21 35-44 10-19 3 20
## 22 35-44 20-29 1 13
## 23 35-44 30+ 0 8
## 24 35-44 0-9g/day 0 11
## 25 35-44 10-19 0 6
## 26 35-44 20-29 0 2
## 27 35-44 30+ 0 1
## 28 35-44 0-9g/day 2 1
## 29 35-44 10-19 0 3
## 30 35-44 20-29 2 2
## 31 45-54 0-9g/day 1 45
## 32 45-54 10-19 0 18
## 33 45-54 20-29 0 10
## 34 45-54 30+ 0 4
## 35 45-54 0-9g/day 6 32
## 36 45-54 10-19 4 17
## 37 45-54 20-29 5 10
## 38 45-54 30+ 5 2
## 39 45-54 0-9g/day 3 13
## 40 45-54 10-19 6 8
## 41 45-54 20-29 1 4
## 42 45-54 30+ 2 2
## 43 45-54 0-9g/day 4 0
## 44 45-54 10-19 3 1
## 45 45-54 20-29 2 1
## 46 45-54 30+ 4 0
## 47 55-64 0-9g/day 2 47
## 48 55-64 10-19 3 19
## 49 55-64 20-29 3 9
## 50 55-64 30+ 4 2
## 51 55-64 0-9g/day 9 31
## 52 55-64 10-19 6 15
## 53 55-64 20-29 4 13
## 54 55-64 30+ 3 3
## 55 55-64 0-9g/day 9 9
## 56 55-64 10-19 8 7
## 57 55-64 20-29 3 3
## 58 55-64 30+ 4 0
## 59 55-64 0-9g/day 5 5
## 60 55-64 10-19 6 1
## 61 55-64 20-29 2 1
## 62 55-64 30+ 5 1
## 63 65-74 0-9g/day 5 43
## 64 65-74 10-19 4 10
## 65 65-74 20-29 2 5
## 66 65-74 30+ 0 2
## 67 65-74 0-9g/day 17 17
## 68 65-74 10-19 3 7
## 69 65-74 20-29 5 4
## 70 65-74 0-9g/day 6 7
## 71 65-74 10-19 4 8
## 72 65-74 20-29 2 1
## 73 65-74 30+ 1 0
## 74 65-74 0-9g/day 3 1
## 75 65-74 10-19 1 1
## 76 65-74 20-29 1 0
## 77 65-74 30+ 1 0
## 78 75+ 0-9g/day 1 17
## 79 75+ 10-19 2 4
## 80 75+ 30+ 1 2
## 81 75+ 0-9g/day 2 3
## 82 75+ 10-19 1 2
## 83 75+ 20-29 0 3
## 84 75+ 30+ 1 0
## 85 75+ 0-9g/day 1 0
## 86 75+ 10-19 1 0
## 87 75+ 0-9g/day 2 0
## 88 75+ 10-19 1 0
Gabung()
esoph %>% select(agegp, tobgp, ncases, ncontrols) %>% arrange(desc(agegp)) %>% filter(ncases==1) %>% mutate(ngp=ncases) %>% summarise(avg_ncontrols=mean(ncontrols))
## avg_ncontrols
## 1 6.0625