A. Contoh akses database dari R

1. Engine SQL

library(RSQLite)
## Warning: package 'RSQLite' was built under R version 4.1.2
datach <- DBI::dbConnect(RSQLite::SQLite(), "C:/Users/ZHILLAN/Documents/Materi Kuliah/Semester 3/Manajemen Data Relasional/chinook.db")
SELECT
  *
FROM
albums;
Displaying records 1 - 10
AlbumId Title ArtistId
1 For Those About To Rock We Salute You 1
2 Balls to the Wall 2
3 Restless and Wild 2
4 Let There Be Rock 1
5 Big Ones 3
6 Jagged Little Pill 4
7 Facelift 5
8 Warner 25 Anos 6
9 Plays Metallica By Four Cellos 7
10 Audioslave 8

2. Package DPLYR

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
chinook <- DBI::dbConnect(RSQLite::SQLite(), "C:/Users/ZHILLAN/Documents/Materi Kuliah/Semester 3/Manajemen Data Relasional/chinook.db")

class(chinook)
## [1] "SQLiteConnection"
## attr(,"package")
## [1] "RSQLite"
RSQLite::dbListTables(chinook)
##  [1] "albums"          "artists"         "customers"       "employees"      
##  [5] "genres"          "invoice_items"   "invoices"        "media_types"    
##  [9] "playlist_track"  "playlists"       "sqlite_sequence" "sqlite_stat1"   
## [13] "tracks"
dplyr::tbl(chinook, "tracks")
## # Source:   table<tracks> [?? x 9]
## # Database: sqlite 3.36.0 [C:\Users\ZHILLAN\Documents\Materi Kuliah\Semester
## #   3\Manajemen Data Relasional\chinook.db]
##    TrackId Name     AlbumId MediaTypeId GenreId Composer     Milliseconds  Bytes
##      <int> <chr>      <int>       <int>   <int> <chr>               <int>  <int>
##  1       1 For Tho~       1           1       1 Angus Young~       343719 1.12e7
##  2       2 Balls t~       2           2       1 <NA>               342562 5.51e6
##  3       3 Fast As~       3           2       1 F. Baltes, ~       230619 3.99e6
##  4       4 Restles~       3           2       1 F. Baltes, ~       252051 4.33e6
##  5       5 Princes~       3           2       1 Deaffy & R.~       375418 6.29e6
##  6       6 Put The~       1           1       1 Angus Young~       205662 6.71e6
##  7       7 Let's G~       1           1       1 Angus Young~       233926 7.64e6
##  8       8 Inject ~       1           1       1 Angus Young~       210834 6.85e6
##  9       9 Snowbal~       1           1       1 Angus Young~       203102 6.60e6
## 10      10 Evil Wa~       1           1       1 Angus Young~       263497 8.61e6
## # ... with more rows, and 1 more variable: UnitPrice <dbl>
tracks <- dplyr::tbl(chinook, "tracks")
class(tracks)
## [1] "tbl_SQLiteConnection" "tbl_dbi"              "tbl_sql"             
## [4] "tbl_lazy"             "tbl"
tracks
## # Source:   table<tracks> [?? x 9]
## # Database: sqlite 3.36.0 [C:\Users\ZHILLAN\Documents\Materi Kuliah\Semester
## #   3\Manajemen Data Relasional\chinook.db]
##    TrackId Name     AlbumId MediaTypeId GenreId Composer     Milliseconds  Bytes
##      <int> <chr>      <int>       <int>   <int> <chr>               <int>  <int>
##  1       1 For Tho~       1           1       1 Angus Young~       343719 1.12e7
##  2       2 Balls t~       2           2       1 <NA>               342562 5.51e6
##  3       3 Fast As~       3           2       1 F. Baltes, ~       230619 3.99e6
##  4       4 Restles~       3           2       1 F. Baltes, ~       252051 4.33e6
##  5       5 Princes~       3           2       1 Deaffy & R.~       375418 6.29e6
##  6       6 Put The~       1           1       1 Angus Young~       205662 6.71e6
##  7       7 Let's G~       1           1       1 Angus Young~       233926 7.64e6
##  8       8 Inject ~       1           1       1 Angus Young~       210834 6.85e6
##  9       9 Snowbal~       1           1       1 Angus Young~       203102 6.60e6
## 10      10 Evil Wa~       1           1       1 Angus Young~       263497 8.61e6
## # ... with more rows, and 1 more variable: UnitPrice <dbl>
xtr <- tracks %>% select(-Bytes) %>% filter(Milliseconds>=300000)
xtr
## # Source:   lazy query [?? x 8]
## # Database: sqlite 3.36.0 [C:\Users\ZHILLAN\Documents\Materi Kuliah\Semester
## #   3\Manajemen Data Relasional\chinook.db]
##    TrackId Name     AlbumId MediaTypeId GenreId Composer  Milliseconds UnitPrice
##      <int> <chr>      <int>       <int>   <int> <chr>            <int>     <dbl>
##  1       1 For Tho~       1           1       1 Angus Yo~       343719      0.99
##  2       2 Balls t~       2           2       1 <NA>            342562      0.99
##  3       5 Princes~       3           2       1 Deaffy &~       375418      0.99
##  4      15 Go Down        4           1       1 AC/DC           331180      0.99
##  5      17 Let The~       4           1       1 AC/DC           366654      0.99
##  6      19 Problem~       4           1       1 AC/DC           325041      0.99
##  7      20 Overdose       4           1       1 AC/DC           369319      0.99
##  8      22 Whole L~       4           1       1 AC/DC           323761      0.99
##  9      24 Love In~       5           1       1 Steven T~       321828      0.99
## 10      26 What It~       5           1       1 Steven T~       310622      0.99
## # ... with more rows
dplyr::show_query(xtr)
## <SQL>
## SELECT *
## FROM (SELECT `TrackId`, `Name`, `AlbumId`, `MediaTypeId`, `GenreId`, `Composer`, `Milliseconds`, `UnitPrice`
## FROM `tracks`)
## WHERE (`Milliseconds` >= 300000.0)

B. Data Wrangling dari R

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.5     v stringr 1.4.0
## v tidyr   1.1.4     v forcats 0.5.1
## v readr   2.0.2
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'stringr' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(datasets)
## 1. Datasets
data("ChickWeight")
ChickWeight <- tibble::as_tibble(ChickWeight)
class(ChickWeight)
## [1] "tbl_df"     "tbl"        "data.frame"
glimpse(ChickWeight)
## Rows: 578
## Columns: 4
## $ weight <dbl> 42, 51, 59, 64, 76, 93, 106, 125, 149, 171, 199, 205, 40, 49, 5~
## $ Time   <dbl> 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 21, 0, 2, 4, 6, 8, 10, 1~
## $ Chick  <ord> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, ~
## $ Diet   <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
mean(ChickWeight$weight)
## [1] 121.8183
data("CO2")
CO2 <- tibble::as_tibble(CO2)
class(CO2)
## [1] "tbl_df"     "tbl"        "data.frame"
glimpse(CO2)
## Rows: 84
## Columns: 5
## $ Plant     <ord> Qn1, Qn1, Qn1, Qn1, Qn1, Qn1, Qn1, Qn2, Qn2, Qn2, Qn2, Qn2, ~
## $ Type      <fct> Quebec, Quebec, Quebec, Quebec, Quebec, Quebec, Quebec, Queb~
## $ Treatment <fct> nonchilled, nonchilled, nonchilled, nonchilled, nonchilled, ~
## $ conc      <dbl> 95, 175, 250, 350, 500, 675, 1000, 95, 175, 250, 350, 500, 6~
## $ uptake    <dbl> 16.0, 30.4, 34.8, 37.2, 35.3, 39.2, 39.7, 13.6, 27.3, 37.1, ~
CO2$uptake %>% mean()
## [1] 27.2131
## 2. Summarise
ChickWeight %>% group_by(Diet) %>% summarise(mean=mean(weight), .groups='drop')
## # A tibble: 4 x 2
##   Diet   mean
##   <fct> <dbl>
## 1 1      103.
## 2 2      123.
## 3 3      143.
## 4 4      135.
CO2 %>% group_by(Type) %>% summarise(mean=mean(uptake), .groups='drop')
## # A tibble: 2 x 2
##   Type         mean
##   <fct>       <dbl>
## 1 Quebec       33.5
## 2 Mississippi  20.9
## 3. Arrange
ChickWeight %>% arrange(Time, weight)
## # A tibble: 578 x 4
##    weight  Time Chick Diet 
##     <dbl> <dbl> <ord> <fct>
##  1     39     0 18    1    
##  2     39     0 27    2    
##  3     39     0 28    2    
##  4     39     0 29    2    
##  5     39     0 33    3    
##  6     39     0 36    3    
##  7     39     0 48    4    
##  8     40     0 2     1    
##  9     40     0 21    2    
## 10     40     0 25    2    
## # ... with 568 more rows
CO2 %>% arrange(desc(conc), desc(uptake))
## # A tibble: 84 x 5
##    Plant Type        Treatment   conc uptake
##    <ord> <fct>       <fct>      <dbl>  <dbl>
##  1 Qn3   Quebec      nonchilled  1000   45.5
##  2 Qn2   Quebec      nonchilled  1000   44.3
##  3 Qc2   Quebec      chilled     1000   42.4
##  4 Qc3   Quebec      chilled     1000   41.4
##  5 Qn1   Quebec      nonchilled  1000   39.7
##  6 Qc1   Quebec      chilled     1000   38.7
##  7 Mn1   Mississippi nonchilled  1000   35.5
##  8 Mn2   Mississippi nonchilled  1000   31.5
##  9 Mn3   Mississippi nonchilled  1000   27.8
## 10 Mc1   Mississippi chilled     1000   21.9
## # ... with 74 more rows
## 4. Filter
ChickWeight %>% filter(Time>=20, weight>=200)
## # A tibble: 51 x 4
##    weight  Time Chick Diet 
##     <dbl> <dbl> <ord> <fct>
##  1    205    21 1     1    
##  2    209    20 2     1    
##  3    215    21 2     1    
##  4    202    21 3     1    
##  5    220    20 5     1    
##  6    223    21 5     1    
##  7    288    20 7     1    
##  8    305    21 7     1    
##  9    205    21 12    1    
## 10    259    20 14    1    
## # ... with 41 more rows
CO2 %>% filter(Treatment=="nonchilled", conc<=500)
## # A tibble: 30 x 5
##    Plant Type   Treatment   conc uptake
##    <ord> <fct>  <fct>      <dbl>  <dbl>
##  1 Qn1   Quebec nonchilled    95   16  
##  2 Qn1   Quebec nonchilled   175   30.4
##  3 Qn1   Quebec nonchilled   250   34.8
##  4 Qn1   Quebec nonchilled   350   37.2
##  5 Qn1   Quebec nonchilled   500   35.3
##  6 Qn2   Quebec nonchilled    95   13.6
##  7 Qn2   Quebec nonchilled   175   27.3
##  8 Qn2   Quebec nonchilled   250   37.1
##  9 Qn2   Quebec nonchilled   350   41.8
## 10 Qn2   Quebec nonchilled   500   40.6
## # ... with 20 more rows
## 5. Mutate
ChickWeight %>% mutate(H=Time/weight)
## # A tibble: 578 x 5
##    weight  Time Chick Diet       H
##     <dbl> <dbl> <ord> <fct>  <dbl>
##  1     42     0 1     1     0     
##  2     51     2 1     1     0.0392
##  3     59     4 1     1     0.0678
##  4     64     6 1     1     0.0938
##  5     76     8 1     1     0.105 
##  6     93    10 1     1     0.108 
##  7    106    12 1     1     0.113 
##  8    125    14 1     1     0.112 
##  9    149    16 1     1     0.107 
## 10    171    18 1     1     0.105 
## # ... with 568 more rows
CO2 %>% mutate(ct=conc/uptake)
## # A tibble: 84 x 6
##    Plant Type   Treatment   conc uptake    ct
##    <ord> <fct>  <fct>      <dbl>  <dbl> <dbl>
##  1 Qn1   Quebec nonchilled    95   16    5.94
##  2 Qn1   Quebec nonchilled   175   30.4  5.76
##  3 Qn1   Quebec nonchilled   250   34.8  7.18
##  4 Qn1   Quebec nonchilled   350   37.2  9.41
##  5 Qn1   Quebec nonchilled   500   35.3 14.2 
##  6 Qn1   Quebec nonchilled   675   39.2 17.2 
##  7 Qn1   Quebec nonchilled  1000   39.7 25.2 
##  8 Qn2   Quebec nonchilled    95   13.6  6.99
##  9 Qn2   Quebec nonchilled   175   27.3  6.41
## 10 Qn2   Quebec nonchilled   250   37.1  6.74
## # ... with 74 more rows
## 6. Select
ChickWeight %>% select(-Chick)
## # A tibble: 578 x 3
##    weight  Time Diet 
##     <dbl> <dbl> <fct>
##  1     42     0 1    
##  2     51     2 1    
##  3     59     4 1    
##  4     64     6 1    
##  5     76     8 1    
##  6     93    10 1    
##  7    106    12 1    
##  8    125    14 1    
##  9    149    16 1    
## 10    171    18 1    
## # ... with 568 more rows
CO2 %>% select(Plant, Treatment, conc)
## # A tibble: 84 x 3
##    Plant Treatment   conc
##    <ord> <fct>      <dbl>
##  1 Qn1   nonchilled    95
##  2 Qn1   nonchilled   175
##  3 Qn1   nonchilled   250
##  4 Qn1   nonchilled   350
##  5 Qn1   nonchilled   500
##  6 Qn1   nonchilled   675
##  7 Qn1   nonchilled  1000
##  8 Qn2   nonchilled    95
##  9 Qn2   nonchilled   175
## 10 Qn2   nonchilled   250
## # ... with 74 more rows
## 7. Pengguanaan fungsi secara bersamaan di R
ChickWeight %>% select(-Diet) %>% arrange(weight, Time) %>% filter(weight>=100)
## # A tibble: 304 x 3
##    weight  Time Chick
##     <dbl> <dbl> <ord>
##  1    100     8 47   
##  2    100    10 27   
##  3    100    18 9    
##  4    100    20 9    
##  5    101     8 14   
##  6    101     8 40   
##  7    101     8 46   
##  8    101    16 10   
##  9    102     8 25   
## 10    102    10 31   
## # ... with 294 more rows