Akses Database menggunakan R

Memanggil library yang dibutuhkan untuk akses database

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(RSQLite)
## Warning: package 'RSQLite' was built under R version 4.1.2
library(DBI)
## Warning: package 'DBI' was built under R version 4.1.2

Menggunakan Engine SQL

db <- dbConnect(SQLite(), "C:/Users/VAIO/Documents/Materi/Manajemen Data Relasional/chinook.db")

Menggunakan Syntax SQl untuk mengakses database

SELECT
  *
FROM
  ALBUMS
Displaying records 1 - 10
AlbumId Title ArtistId
1 For Those About To Rock We Salute You 1
2 Balls to the Wall 2
3 Restless and Wild 2
4 Let There Be Rock 1
5 Big Ones 3
6 Jagged Little Pill 4
7 Facelift 5
8 Warner 25 Anos 6
9 Plays Metallica By Four Cellos 7
10 Audioslave 8

Menggunakan dplyr

chinook <- dbConnect(SQLite(), "C:/Users/VAIO/Documents/Materi/Manajemen Data Relasional/chinook.db")
class(chinook)
## [1] "SQLiteConnection"
## attr(,"package")
## [1] "RSQLite"
dbListTables(chinook)
##  [1] "albums"          "artists"         "customers"       "employees"      
##  [5] "genres"          "invoice_items"   "invoices"        "media_types"    
##  [9] "playlist_track"  "playlists"       "sqlite_sequence" "sqlite_stat1"   
## [13] "tracks"
costumers <- tbl(chinook,"customers")
class(costumers)
## [1] "tbl_SQLiteConnection" "tbl_dbi"              "tbl_sql"             
## [4] "tbl_lazy"             "tbl"
costumers
## # Source:   table<customers> [?? x 13]
## # Database: sqlite 3.37.0 [C:\Users\VAIO\Documents\Materi\Manajemen Data
## #   Relasional\chinook.db]
##    CustomerId FirstName LastName  Company Address City  State Country PostalCode
##         <int> <chr>     <chr>     <chr>   <chr>   <chr> <chr> <chr>   <chr>     
##  1          1 Luís      Gonçalves Embrae~ Av. Br~ São ~ SP    Brazil  12227-000 
##  2          2 Leonie    Köhler    <NA>    Theodo~ Stut~ <NA>  Germany 70174     
##  3          3 François  Tremblay  <NA>    1498 r~ Mont~ QC    Canada  H2G 1A7   
##  4          4 Bjørn     Hansen    <NA>    Ullevå~ Oslo  <NA>  Norway  0171      
##  5          5 František Wichterl~ JetBra~ Klanov~ Prag~ <NA>  Czech ~ 14700     
##  6          6 Helena    Holý      <NA>    Rilská~ Prag~ <NA>  Czech ~ 14300     
##  7          7 Astrid    Gruber    <NA>    Rotent~ Vien~ <NA>  Austria 1010      
##  8          8 Daan      Peeters   <NA>    Grétry~ Brus~ <NA>  Belgium 1000      
##  9          9 Kara      Nielsen   <NA>    Sønder~ Cope~ <NA>  Denmark 1720      
## 10         10 Eduardo   Martins   Woodst~ Rua Dr~ São ~ SP    Brazil  01007-010 
## # ... with more rows, and 4 more variables: Phone <chr>, Fax <chr>,
## #   Email <chr>, SupportRepId <int>
filtered <- costumers %>% 
  select(CustomerId, FirstName, Country) %>%
  filter(Country != "USA")
filtered
## # Source:   lazy query [?? x 3]
## # Database: sqlite 3.37.0 [C:\Users\VAIO\Documents\Materi\Manajemen Data
## #   Relasional\chinook.db]
##    CustomerId FirstName Country       
##         <int> <chr>     <chr>         
##  1          1 Luís      Brazil        
##  2          2 Leonie    Germany       
##  3          3 François  Canada        
##  4          4 Bjørn     Norway        
##  5          5 František Czech Republic
##  6          6 Helena    Czech Republic
##  7          7 Astrid    Austria       
##  8          8 Daan      Belgium       
##  9          9 Kara      Denmark       
## 10         10 Eduardo   Brazil        
## # ... with more rows
show_query(filtered)
## <SQL>
## SELECT *
## FROM (SELECT `CustomerId`, `FirstName`, `Country`
## FROM `customers`)
## WHERE (`Country` != 'USA')

Data Wrangling dengan tidyverse

Memanggil library tidyverse dan dataset yang akan digunakan

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.5     v stringr 1.4.0
## v tidyr   1.1.4     v forcats 0.5.1
## v readr   2.0.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(datasets)
data(Orange)
orange <- as_tibble(Orange)

Fungsi summarise

Mengelompokkan data orange berdasarkan umur dan menghitung rata-rata lingkarnya dengan menghilangkan variable lain

orange %>% group_by(age) %>% summarise(mean=mean(circumference), .groups = 'drop')
## # A tibble: 7 x 2
##     age  mean
##   <dbl> <dbl>
## 1   118  31  
## 2   484  57.8
## 3   664  93.2
## 4  1004 134. 
## 5  1231 146. 
## 6  1372 173. 
## 7  1582 176.

Fungsi arrange

Mengurutkan orange berdasarkan lingkarnya dari yang terbesar ke yang terkecil dan sebaliknya

orange %>% arrange(desc(circumference))
## # A tibble: 35 x 3
##    Tree    age circumference
##    <ord> <dbl>         <dbl>
##  1 4      1582           214
##  2 4      1372           209
##  3 2      1372           203
##  4 2      1582           203
##  5 4      1231           179
##  6 5      1582           177
##  7 5      1372           174
##  8 2      1231           172
##  9 4      1004           167
## 10 2      1004           156
## # ... with 25 more rows
orange %>% arrange(circumference)
## # A tibble: 35 x 3
##    Tree    age circumference
##    <ord> <dbl>         <dbl>
##  1 1       118            30
##  2 3       118            30
##  3 5       118            30
##  4 4       118            32
##  5 2       118            33
##  6 5       484            49
##  7 3       484            51
##  8 1       484            58
##  9 4       484            62
## 10 2       484            69
## # ... with 25 more rows

Fungsi filter

Menampilkan data orange dengan lingkar lebi besar dari 150

orange %>% filter(circumference > 150)
## # A tibble: 10 x 3
##    Tree    age circumference
##    <ord> <dbl>         <dbl>
##  1 2      1004           156
##  2 2      1231           172
##  3 2      1372           203
##  4 2      1582           203
##  5 4      1004           167
##  6 4      1231           179
##  7 4      1372           209
##  8 4      1582           214
##  9 5      1372           174
## 10 5      1582           177

Fungsi select

Menampilkan data orange selain variabel age

orange %>% select(-age)
## # A tibble: 35 x 2
##    Tree  circumference
##    <ord>         <dbl>
##  1 1                30
##  2 1                58
##  3 1                87
##  4 1               115
##  5 1               120
##  6 1               142
##  7 1               145
##  8 2                33
##  9 2                69
## 10 2               111
## # ... with 25 more rows

Fungsi mutate

Menamabahkan peubah baru yaitu growth (pertumbuhan) yang merupakan hasil bagi lingkar per umur

orange %>% mutate(growth = circumference / age)
## # A tibble: 35 x 4
##    Tree    age circumference growth
##    <ord> <dbl>         <dbl>  <dbl>
##  1 1       118            30 0.254 
##  2 1       484            58 0.120 
##  3 1       664            87 0.131 
##  4 1      1004           115 0.115 
##  5 1      1231           120 0.0975
##  6 1      1372           142 0.103 
##  7 1      1582           145 0.0917
##  8 2       118            33 0.280 
##  9 2       484            69 0.143 
## 10 2       664           111 0.167 
## # ... with 25 more rows

Menggabungkan fungsi

Melihat rata-rata pertumbuhan orange berdasarkan pohonnya

orange %>% mutate(growth = circumference / age) %>%
  group_by(Tree) %>%
  summarise(mean=mean(growth), .groups = 'drop')
## # A tibble: 5 x 2
##   Tree   mean
##   <ord> <dbl>
## 1 3     0.123
## 2 1     0.130
## 3 5     0.137
## 4 2     0.166
## 5 4     0.167

Menampilkan orange dengan lingkar lebih dari 100 dan diurutkan dari terbesar

orange %>% select(Tree, circumference) %>%
  filter(circumference > 100) %>%
  arrange(desc(circumference))
## # A tibble: 22 x 2
##    Tree  circumference
##    <ord>         <dbl>
##  1 4               214
##  2 4               209
##  3 2               203
##  4 2               203
##  5 4               179
##  6 5               177
##  7 5               174
##  8 2               172
##  9 4               167
## 10 2               156
## # ... with 12 more rows