Memanggil library yang dibutuhkan untuk akses database
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(RSQLite)
## Warning: package 'RSQLite' was built under R version 4.1.2
library(DBI)
## Warning: package 'DBI' was built under R version 4.1.2
db <- dbConnect(SQLite(), "C:/Users/VAIO/Documents/Materi/Manajemen Data Relasional/chinook.db")
Menggunakan Syntax SQl untuk mengakses database
SELECT
*
FROM
ALBUMS
| AlbumId | Title | ArtistId |
|---|---|---|
| 1 | For Those About To Rock We Salute You | 1 |
| 2 | Balls to the Wall | 2 |
| 3 | Restless and Wild | 2 |
| 4 | Let There Be Rock | 1 |
| 5 | Big Ones | 3 |
| 6 | Jagged Little Pill | 4 |
| 7 | Facelift | 5 |
| 8 | Warner 25 Anos | 6 |
| 9 | Plays Metallica By Four Cellos | 7 |
| 10 | Audioslave | 8 |
chinook <- dbConnect(SQLite(), "C:/Users/VAIO/Documents/Materi/Manajemen Data Relasional/chinook.db")
class(chinook)
## [1] "SQLiteConnection"
## attr(,"package")
## [1] "RSQLite"
dbListTables(chinook)
## [1] "albums" "artists" "customers" "employees"
## [5] "genres" "invoice_items" "invoices" "media_types"
## [9] "playlist_track" "playlists" "sqlite_sequence" "sqlite_stat1"
## [13] "tracks"
costumers <- tbl(chinook,"customers")
class(costumers)
## [1] "tbl_SQLiteConnection" "tbl_dbi" "tbl_sql"
## [4] "tbl_lazy" "tbl"
costumers
## # Source: table<customers> [?? x 13]
## # Database: sqlite 3.37.0 [C:\Users\VAIO\Documents\Materi\Manajemen Data
## # Relasional\chinook.db]
## CustomerId FirstName LastName Company Address City State Country PostalCode
## <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 Luís Gonçalves Embrae~ Av. Br~ São ~ SP Brazil 12227-000
## 2 2 Leonie Köhler <NA> Theodo~ Stut~ <NA> Germany 70174
## 3 3 François Tremblay <NA> 1498 r~ Mont~ QC Canada H2G 1A7
## 4 4 Bjørn Hansen <NA> Ullevå~ Oslo <NA> Norway 0171
## 5 5 František Wichterl~ JetBra~ Klanov~ Prag~ <NA> Czech ~ 14700
## 6 6 Helena Holý <NA> Rilská~ Prag~ <NA> Czech ~ 14300
## 7 7 Astrid Gruber <NA> Rotent~ Vien~ <NA> Austria 1010
## 8 8 Daan Peeters <NA> Grétry~ Brus~ <NA> Belgium 1000
## 9 9 Kara Nielsen <NA> Sønder~ Cope~ <NA> Denmark 1720
## 10 10 Eduardo Martins Woodst~ Rua Dr~ São ~ SP Brazil 01007-010
## # ... with more rows, and 4 more variables: Phone <chr>, Fax <chr>,
## # Email <chr>, SupportRepId <int>
filtered <- costumers %>%
select(CustomerId, FirstName, Country) %>%
filter(Country != "USA")
filtered
## # Source: lazy query [?? x 3]
## # Database: sqlite 3.37.0 [C:\Users\VAIO\Documents\Materi\Manajemen Data
## # Relasional\chinook.db]
## CustomerId FirstName Country
## <int> <chr> <chr>
## 1 1 Luís Brazil
## 2 2 Leonie Germany
## 3 3 François Canada
## 4 4 Bjørn Norway
## 5 5 František Czech Republic
## 6 6 Helena Czech Republic
## 7 7 Astrid Austria
## 8 8 Daan Belgium
## 9 9 Kara Denmark
## 10 10 Eduardo Brazil
## # ... with more rows
show_query(filtered)
## <SQL>
## SELECT *
## FROM (SELECT `CustomerId`, `FirstName`, `Country`
## FROM `customers`)
## WHERE (`Country` != 'USA')
Memanggil library tidyverse dan dataset yang akan digunakan
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.5 v stringr 1.4.0
## v tidyr 1.1.4 v forcats 0.5.1
## v readr 2.0.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(datasets)
data(Orange)
orange <- as_tibble(Orange)
Mengelompokkan data orange berdasarkan umur dan menghitung rata-rata lingkarnya dengan menghilangkan variable lain
orange %>% group_by(age) %>% summarise(mean=mean(circumference), .groups = 'drop')
## # A tibble: 7 x 2
## age mean
## <dbl> <dbl>
## 1 118 31
## 2 484 57.8
## 3 664 93.2
## 4 1004 134.
## 5 1231 146.
## 6 1372 173.
## 7 1582 176.
Mengurutkan orange berdasarkan lingkarnya dari yang terbesar ke yang terkecil dan sebaliknya
orange %>% arrange(desc(circumference))
## # A tibble: 35 x 3
## Tree age circumference
## <ord> <dbl> <dbl>
## 1 4 1582 214
## 2 4 1372 209
## 3 2 1372 203
## 4 2 1582 203
## 5 4 1231 179
## 6 5 1582 177
## 7 5 1372 174
## 8 2 1231 172
## 9 4 1004 167
## 10 2 1004 156
## # ... with 25 more rows
orange %>% arrange(circumference)
## # A tibble: 35 x 3
## Tree age circumference
## <ord> <dbl> <dbl>
## 1 1 118 30
## 2 3 118 30
## 3 5 118 30
## 4 4 118 32
## 5 2 118 33
## 6 5 484 49
## 7 3 484 51
## 8 1 484 58
## 9 4 484 62
## 10 2 484 69
## # ... with 25 more rows
Menampilkan data orange dengan lingkar lebi besar dari 150
orange %>% filter(circumference > 150)
## # A tibble: 10 x 3
## Tree age circumference
## <ord> <dbl> <dbl>
## 1 2 1004 156
## 2 2 1231 172
## 3 2 1372 203
## 4 2 1582 203
## 5 4 1004 167
## 6 4 1231 179
## 7 4 1372 209
## 8 4 1582 214
## 9 5 1372 174
## 10 5 1582 177
Menampilkan data orange selain variabel age
orange %>% select(-age)
## # A tibble: 35 x 2
## Tree circumference
## <ord> <dbl>
## 1 1 30
## 2 1 58
## 3 1 87
## 4 1 115
## 5 1 120
## 6 1 142
## 7 1 145
## 8 2 33
## 9 2 69
## 10 2 111
## # ... with 25 more rows
Menamabahkan peubah baru yaitu growth (pertumbuhan) yang merupakan hasil bagi lingkar per umur
orange %>% mutate(growth = circumference / age)
## # A tibble: 35 x 4
## Tree age circumference growth
## <ord> <dbl> <dbl> <dbl>
## 1 1 118 30 0.254
## 2 1 484 58 0.120
## 3 1 664 87 0.131
## 4 1 1004 115 0.115
## 5 1 1231 120 0.0975
## 6 1 1372 142 0.103
## 7 1 1582 145 0.0917
## 8 2 118 33 0.280
## 9 2 484 69 0.143
## 10 2 664 111 0.167
## # ... with 25 more rows
Melihat rata-rata pertumbuhan orange berdasarkan pohonnya
orange %>% mutate(growth = circumference / age) %>%
group_by(Tree) %>%
summarise(mean=mean(growth), .groups = 'drop')
## # A tibble: 5 x 2
## Tree mean
## <ord> <dbl>
## 1 3 0.123
## 2 1 0.130
## 3 5 0.137
## 4 2 0.166
## 5 4 0.167
Menampilkan orange dengan lingkar lebih dari 100 dan diurutkan dari terbesar
orange %>% select(Tree, circumference) %>%
filter(circumference > 100) %>%
arrange(desc(circumference))
## # A tibble: 22 x 2
## Tree circumference
## <ord> <dbl>
## 1 4 214
## 2 4 209
## 3 2 203
## 4 2 203
## 5 4 179
## 6 5 177
## 7 5 174
## 8 2 172
## 9 4 167
## 10 2 156
## # ... with 12 more rows