#memanggil library yang dibutuhkan untuk akses database
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(RSQLite)
## Warning: package 'RSQLite' was built under R version 4.1.2
library(DBI)
## Warning: package 'DBI' was built under R version 4.1.1
#Menggunakan Engine SQL
db <- dbConnect(SQLite(), "D:/chinook.db")
#Menggunakan dplyr
chinook <- dbConnect(SQLite(), "D:/chinook.db")
class(chinook)
## [1] "SQLiteConnection"
## attr(,"package")
## [1] "RSQLite"
dbListTables(chinook)
## [1] "albums" "artists" "customers" "employees"
## [5] "genres" "invoice_items" "invoices" "media_types"
## [9] "playlist_track" "playlists" "sqlite_sequence" "sqlite_stat1"
## [13] "tracks"
costumers <- tbl(chinook,"customers")
class(costumers)
## [1] "tbl_SQLiteConnection" "tbl_dbi" "tbl_sql"
## [4] "tbl_lazy" "tbl"
filtered <- costumers %>%
select(CustomerId, FirstName, Country) %>%
filter(Country != "USA")
show_query(filtered)
## <SQL>
## SELECT *
## FROM (SELECT `CustomerId`, `FirstName`, `Country`
## FROM `customers`)
## WHERE (`Country` != 'USA')
#Data Wrangling dengan tidyverse
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.3 v stringr 1.4.0
## v tidyr 1.1.4 v forcats 0.5.1
## v readr 2.0.2
## Warning: package 'ggplot2' was built under R version 4.1.1
## Warning: package 'tidyr' was built under R version 4.1.1
## Warning: package 'readr' was built under R version 4.1.1
## Warning: package 'purrr' was built under R version 4.1.1
## Warning: package 'stringr' was built under R version 4.1.1
## Warning: package 'forcats' was built under R version 4.1.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(datasets)
data(Orange)
orange <- as_tibble(Orange)
#Fungsi arrange
orange %>% arrange(desc(circumference))
## # A tibble: 35 x 3
## Tree age circumference
## <ord> <dbl> <dbl>
## 1 4 1582 214
## 2 4 1372 209
## 3 2 1372 203
## 4 2 1582 203
## 5 4 1231 179
## 6 5 1582 177
## 7 5 1372 174
## 8 2 1231 172
## 9 4 1004 167
## 10 2 1004 156
## # ... with 25 more rows
orange %>% arrange(circumference)
## # A tibble: 35 x 3
## Tree age circumference
## <ord> <dbl> <dbl>
## 1 1 118 30
## 2 3 118 30
## 3 5 118 30
## 4 4 118 32
## 5 2 118 33
## 6 5 484 49
## 7 3 484 51
## 8 1 484 58
## 9 4 484 62
## 10 2 484 69
## # ... with 25 more rows
#Fungsi Filter
orange %>% filter(circumference > 150)
## # A tibble: 10 x 3
## Tree age circumference
## <ord> <dbl> <dbl>
## 1 2 1004 156
## 2 2 1231 172
## 3 2 1372 203
## 4 2 1582 203
## 5 4 1004 167
## 6 4 1231 179
## 7 4 1372 209
## 8 4 1582 214
## 9 5 1372 174
## 10 5 1582 177
#Menggabungkan fungsi
orange %>% mutate(growth = circumference / age) %>%
group_by(Tree) %>%
summarise(mean=mean(growth), .groups = 'drop')
## # A tibble: 5 x 2
## Tree mean
## <ord> <dbl>
## 1 3 0.123
## 2 1 0.130
## 3 5 0.137
## 4 2 0.166
## 5 4 0.167
#menampilkan orange dengan lingkar lebih dari 100 dan diurutkan dari terbesar
orange %>% select(Tree, circumference) %>%
filter(circumference > 100) %>%
arrange(desc(circumference))
## # A tibble: 22 x 2
## Tree circumference
## <ord> <dbl>
## 1 4 214
## 2 4 209
## 3 2 203
## 4 2 203
## 5 4 179
## 6 5 177
## 7 5 174
## 8 2 172
## 9 4 167
## 10 2 156
## # ... with 12 more rows
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.