Tugas Akhir Manajemen Data Relasional

A.Contoh Akses Database

1. Engine SQL

db<-DBI::dbConnect(RSQLite::SQLite(), "C:/SMT 3/Manajemen Data R/chinook (1)/chinook.db") 
SELECT
  *
FROM
  employees;
8 records
EmployeeId LastName FirstName Title ReportsTo BirthDate HireDate Address City State Country PostalCode Phone Fax Email
1 Adams Andrew General Manager NA 1962-02-18 00:00:00 2002-08-14 00:00:00 11120 Jasper Ave NW Edmonton AB Canada T5K 2N1 +1 (780) 428-9482 +1 (780) 428-3457
2 Edwards Nancy Sales Manager 1 1958-12-08 00:00:00 2002-05-01 00:00:00 825 8 Ave SW Calgary AB Canada T2P 2T3 +1 (403) 262-3443 +1 (403) 262-3322
3 Peacock Jane Sales Support Agent 2 1973-08-29 00:00:00 2002-04-01 00:00:00 1111 6 Ave SW Calgary AB Canada T2P 5M5 +1 (403) 262-3443 +1 (403) 262-6712
4 Park Margaret Sales Support Agent 2 1947-09-19 00:00:00 2003-05-03 00:00:00 683 10 Street SW Calgary AB Canada T2P 5G3 +1 (403) 263-4423 +1 (403) 263-4289
5 Johnson Steve Sales Support Agent 2 1965-03-03 00:00:00 2003-10-17 00:00:00 7727B 41 Ave Calgary AB Canada T3B 1Y7 1 (780) 836-9987 1 (780) 836-9543
6 Mitchell Michael IT Manager 1 1973-07-01 00:00:00 2003-10-17 00:00:00 5827 Bowness Road NW Calgary AB Canada T3B 0C5 +1 (403) 246-9887 +1 (403) 246-9899
7 King Robert IT Staff 6 1970-05-29 00:00:00 2004-01-02 00:00:00 590 Columbia Boulevard West Lethbridge AB Canada T1K 5N8 +1 (403) 456-9986 +1 (403) 456-8485
8 Callahan Laura IT Staff 6 1968-01-09 00:00:00 2004-03-04 00:00:00 923 7 ST NW Lethbridge AB Canada T1H 1Y8 +1 (403) 467-3351 +1 (403) 467-8772

2. Menggunakan DPLYR

chinook<-DBI::dbConnect(RSQLite::SQLite(), "C:/SMT 3/Manajemen Data R/chinook (1)/chinook.db")
class(chinook)
## [1] "SQLiteConnection"
## attr(,"package")
## [1] "RSQLite"
RSQLite::dbListTables(chinook)
##  [1] "albums"          "artists"         "customers"       "employees"      
##  [5] "genres"          "invoice_items"   "invoices"        "media_types"    
##  [9] "playlist_track"  "playlists"       "sqlite_sequence" "sqlite_stat1"   
## [13] "tracks"
dplyr::tbl(chinook, "customers")
## # Source:   table<customers> [?? x 13]
## # Database: sqlite 3.37.0 [C:\SMT 3\Manajemen Data R\chinook (1)\chinook.db]
##    CustomerId FirstName LastName  Company Address City  State Country PostalCode
##         <int> <chr>     <chr>     <chr>   <chr>   <chr> <chr> <chr>   <chr>     
##  1          1 Luís      Gonçalves Embrae~ Av. Br~ São ~ SP    Brazil  12227-000 
##  2          2 Leonie    Köhler    <NA>    Theodo~ Stut~ <NA>  Germany 70174     
##  3          3 François  Tremblay  <NA>    1498 r~ Mont~ QC    Canada  H2G 1A7   
##  4          4 Bjørn     Hansen    <NA>    Ullevå~ Oslo  <NA>  Norway  0171      
##  5          5 František Wichterl~ JetBra~ Klanov~ Prag~ <NA>  Czech ~ 14700     
##  6          6 Helena    Holý      <NA>    Rilská~ Prag~ <NA>  Czech ~ 14300     
##  7          7 Astrid    Gruber    <NA>    Rotent~ Vien~ <NA>  Austria 1010      
##  8          8 Daan      Peeters   <NA>    Grétry~ Brus~ <NA>  Belgium 1000      
##  9          9 Kara      Nielsen   <NA>    Sønder~ Cope~ <NA>  Denmark 1720      
## 10         10 Eduardo   Martins   Woodst~ Rua Dr~ São ~ SP    Brazil  01007-010 
## # ... with more rows, and 4 more variables: Phone <chr>, Fax <chr>,
## #   Email <chr>, SupportRepId <int>
customers<-dplyr::tbl(chinook, "customers")
class(customers)
## [1] "tbl_SQLiteConnection" "tbl_dbi"              "tbl_sql"             
## [4] "tbl_lazy"             "tbl"
customers
## # Source:   table<customers> [?? x 13]
## # Database: sqlite 3.37.0 [C:\SMT 3\Manajemen Data R\chinook (1)\chinook.db]
##    CustomerId FirstName LastName  Company Address City  State Country PostalCode
##         <int> <chr>     <chr>     <chr>   <chr>   <chr> <chr> <chr>   <chr>     
##  1          1 Luís      Gonçalves Embrae~ Av. Br~ São ~ SP    Brazil  12227-000 
##  2          2 Leonie    Köhler    <NA>    Theodo~ Stut~ <NA>  Germany 70174     
##  3          3 François  Tremblay  <NA>    1498 r~ Mont~ QC    Canada  H2G 1A7   
##  4          4 Bjørn     Hansen    <NA>    Ullevå~ Oslo  <NA>  Norway  0171      
##  5          5 František Wichterl~ JetBra~ Klanov~ Prag~ <NA>  Czech ~ 14700     
##  6          6 Helena    Holý      <NA>    Rilská~ Prag~ <NA>  Czech ~ 14300     
##  7          7 Astrid    Gruber    <NA>    Rotent~ Vien~ <NA>  Austria 1010      
##  8          8 Daan      Peeters   <NA>    Grétry~ Brus~ <NA>  Belgium 1000      
##  9          9 Kara      Nielsen   <NA>    Sønder~ Cope~ <NA>  Denmark 1720      
## 10         10 Eduardo   Martins   Woodst~ Rua Dr~ São ~ SP    Brazil  01007-010 
## # ... with more rows, and 4 more variables: Phone <chr>, Fax <chr>,
## #   Email <chr>, SupportRepId <int>
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Untuk mengetahui customer dengan SupportRepId sama dengn 4 dapat menggunakan syntax berikut ini.

q<-customers %>%
  select(CustomerId, FirstName, LastName, SupportRepId) %>%
  filter(SupportRepId==4)
q
## # Source:   lazy query [?? x 4]
## # Database: sqlite 3.37.0 [C:\SMT 3\Manajemen Data R\chinook (1)\chinook.db]
##    CustomerId FirstName LastName    SupportRepId
##         <int> <chr>     <chr>              <int>
##  1          4 Bjørn     Hansen                 4
##  2          5 František Wichterlová            4
##  3          8 Daan      Peeters                4
##  4          9 Kara      Nielsen                4
##  5         10 Eduardo   Martins                4
##  6         13 Fernanda  Ramos                  4
##  7         16 Frank     Harris                 4
##  8         20 Dan       Miller                 4
##  9         22 Heather   Leacock                4
## 10         23 John      Gordon                 4
## # ... with more rows
dplyr::show_query(q)
## <SQL>
## SELECT *
## FROM (SELECT `CustomerId`, `FirstName`, `LastName`, `SupportRepId`
## FROM `customers`)
## WHERE (`SupportRepId` = 4.0)

B. Data Wrangling

Digunakan data Loblolly dari package dataset.

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v stringr 1.4.0
## v tidyr   1.1.4     v forcats 0.5.1
## v readr   2.1.0
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'stringr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(datasets)
data("Loblolly")
Loblolly<-tibble::as_tibble(Loblolly)
class(Loblolly)
## [1] "tbl_df"     "tbl"        "data.frame"
view(Loblolly)
glimpse(Loblolly)
## Rows: 84
## Columns: 3
## $ height <dbl> 4.51, 10.89, 28.72, 41.74, 52.70, 60.92, 4.55, 10.92, 29.07, 42~
## $ age    <dbl> 3, 5, 10, 15, 20, 25, 3, 5, 10, 15, 20, 25, 3, 5, 10, 15, 20, 2~
## $ Seed   <ord> 301, 301, 301, 301, 301, 301, 303, 303, 303, 303, 303, 303, 305~

Contoh penggunaan fungsi dari operator rata-rata.

mean(Loblolly$height)
## [1] 32.3644

1. Fungsi summarise()

Menghitung rata-rata height berdasarkan peubah age dan Seed.

#Berdasarkan Peubah age
Loblolly %>% group_by(age) %>% summarise(mean=mean(height), .groups ='drop')
## # A tibble: 6 x 2
##     age  mean
##   <dbl> <dbl>
## 1     3  4.24
## 2     5 10.2 
## 3    10 27.4 
## 4    15 40.5 
## 5    20 51.5 
## 6    25 60.3
#Berdasarkan Peubah Seed
Loblolly %>% group_by(Seed) %>% summarise(mean=mean(height), .groups ='drop')
## # A tibble: 14 x 2
##    Seed   mean
##    <ord> <dbl>
##  1 329    30.3
##  2 327    30.6
##  3 325    31.9
##  4 307    31.3
##  5 331    31.0
##  6 311    31.7
##  7 315    32.4
##  8 321    31.2
##  9 319    32.9
## 10 301    33.2
## 11 323    33.6
## 12 309    33.8
## 13 303    34.1
## 14 305    35.1

2. Fungsi Arrange()

Mengurutkan data berdasarkan peubah height dari nilai terkecil dan nilai terbesar

#Mengurutkan dari nilai terkecil
Loblolly %>% arrange(height)
## # A tibble: 84 x 3
##    height   age Seed 
##     <dbl> <dbl> <ord>
##  1   3.46     3 331  
##  2   3.77     3 321  
##  3   3.88     3 311  
##  4   3.91     3 307  
##  5   3.93     3 329  
##  6   4.12     3 327  
##  7   4.32     3 315  
##  8   4.33     3 323  
##  9   4.38     3 325  
## 10   4.51     3 301  
## # ... with 74 more rows
#Mengurutkan dari nilai terbesar
Loblolly %>% arrange(desc(height))
## # A tibble: 84 x 3
##    height   age Seed 
##     <dbl> <dbl> <ord>
##  1   64.1    25 305  
##  2   63.4    25 303  
##  3   63.0    25 309  
##  4   61.6    25 323  
##  5   60.9    25 301  
##  6   60.7    25 319  
##  7   60.3    25 321  
##  8   60.1    25 315  
##  9   59.6    25 311  
## 10   59.5    25 331  
## # ... with 74 more rows

3. Fungsi filter()

Memilih sebagian data berdasarkan age sama dengan 10

Loblolly %>% filter(age==10)
## # A tibble: 14 x 3
##    height   age Seed 
##     <dbl> <dbl> <ord>
##  1   28.7    10 301  
##  2   29.1    10 303  
##  3   30.2    10 305  
##  4   25.7    10 307  
##  5   28.7    10 309  
##  6   26.0    10 311  
##  7   27.2    10 315  
##  8   27.9    10 319  
##  9   25.4    10 321  
## 10   29.0    10 323  
## 11   27.9    10 325  
## 12   26.5    10 327  
## 13   26.1    10 329  
## 14   25.8    10 331

4. Fungsi select()

Memilih subset data berdasarkan peubah age dan height.

Loblolly %>% select(age, height)
## # A tibble: 84 x 2
##      age height
##    <dbl>  <dbl>
##  1     3   4.51
##  2     5  10.9 
##  3    10  28.7 
##  4    15  41.7 
##  5    20  52.7 
##  6    25  60.9 
##  7     3   4.55
##  8     5  10.9 
##  9    10  29.1 
## 10    15  42.8 
## # ... with 74 more rows

5. Fungsi mutate ()

Menambahkan peubah tinggi pada data.

Loblolly %>% mutate(tinggi=height)
## # A tibble: 84 x 4
##    height   age Seed  tinggi
##     <dbl> <dbl> <ord>  <dbl>
##  1   4.51     3 301     4.51
##  2  10.9      5 301    10.9 
##  3  28.7     10 301    28.7 
##  4  41.7     15 301    41.7 
##  5  52.7     20 301    52.7 
##  6  60.9     25 301    60.9 
##  7   4.55     3 303     4.55
##  8  10.9      5 303    10.9 
##  9  29.1     10 303    29.1 
## 10  42.8     15 303    42.8 
## # ... with 74 more rows

6. Kombinasi Semua Fungsi

Mengkombinasikan fungsi select(), filter(), arrange(), dan mutate(). Dalam syntax tersebut dipilih subset data berupa Seed dan height lalu memilih data dengan Seed sama dengan 329 dan mengurutkan height mulai dari yang terbesar. Setelah itu menambahkan peubah baru berupa tinggi yang merupakan height.

Loblolly %>% select(Seed, height) %>% filter(Seed==329) %>% arrange(desc(height)) %>% mutate(tinggi=height)
## # A tibble: 6 x 3
##   Seed  height tinggi
##   <ord>  <dbl>  <dbl>
## 1 329    56.4   56.4 
## 2 329    48.3   48.3 
## 3 329    37.8   37.8 
## 4 329    26.1   26.1 
## 5 329     9.34   9.34
## 6 329     3.93   3.93