A. Contoh Akses Database
1. Engine SQL
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(RSQLite)
library(DBI)
chinook <- DBI::dbConnect(RSQLite::SQLite(), "C:/sqlite/chinook.db")
chinook
## <SQLiteConnection>
## Path: C:\sqlite\chinook.db
## Extensions: TRUE
class(chinook)
## [1] "SQLiteConnection"
## attr(,"package")
## [1] "RSQLite"
dbListTables(chinook)
## [1] "albums" "artists" "customers" "employees"
## [5] "genres" "invoice_items" "invoices" "media_types"
## [9] "playlist_track" "playlists" "sqlite_sequence" "sqlite_stat1"
## [13] "tracks"
SELECT
InvoiceId, TrackId, UnitPrice
FROM
INVOICE_ITEMS;
Displaying records 1 - 10
| 1 |
2 |
0.99 |
| 1 |
4 |
0.99 |
| 2 |
6 |
0.99 |
| 2 |
8 |
0.99 |
| 2 |
10 |
0.99 |
| 2 |
12 |
0.99 |
| 3 |
16 |
0.99 |
| 3 |
20 |
0.99 |
| 3 |
24 |
0.99 |
| 3 |
28 |
0.99 |
2. Package dplyr
library(dplyr)
chinook <- DBI::dbConnect(RSQLite::SQLite(), "C:/sqlite/chinook.db")
dbListTables(chinook)
## [1] "albums" "artists" "customers" "employees"
## [5] "genres" "invoice_items" "invoices" "media_types"
## [9] "playlist_track" "playlists" "sqlite_sequence" "sqlite_stat1"
## [13] "tracks"
invoices <- tbl(chinook, "invoices")
class(invoices)
## [1] "tbl_SQLiteConnection" "tbl_dbi" "tbl_sql"
## [4] "tbl_lazy" "tbl"
query <- invoices %>%
select(-BillingState)
query
## # Source: lazy query [?? x 8]
## # Database: sqlite 3.37.0 [C:\sqlite\chinook.db]
## InvoiceId CustomerId InvoiceDate BillingAddress BillingCity BillingCountry
## <int> <int> <chr> <chr> <chr> <chr>
## 1 1 2 2009-01-01 0~ Theodor-Heuss-~ Stuttgart Germany
## 2 2 4 2009-01-02 0~ Ullevålsveien ~ Oslo Norway
## 3 3 8 2009-01-03 0~ Grétrystraat 63 Brussels Belgium
## 4 4 14 2009-01-06 0~ 8210 111 ST NW Edmonton Canada
## 5 5 23 2009-01-11 0~ 69 Salem Street Boston USA
## 6 6 37 2009-01-19 0~ Berger Straße ~ Frankfurt Germany
## 7 7 38 2009-02-01 0~ Barbarossastra~ Berlin Germany
## 8 8 40 2009-02-01 0~ 8, Rue Hanovre Paris France
## 9 9 42 2009-02-02 0~ 9, Place Louis~ Bordeaux France
## 10 10 46 2009-02-03 0~ 3 Chatham Stre~ Dublin Ireland
## # ... with more rows, and 2 more variables: BillingPostalCode <chr>,
## # Total <dbl>
show_query(query)
## <SQL>
## SELECT `InvoiceId`, `CustomerId`, `InvoiceDate`, `BillingAddress`, `BillingCity`, `BillingCountry`, `BillingPostalCode`, `Total`
## FROM `invoices`
B. Data Wrangling
library(datasets)
trees
## Girth Height Volume
## 1 8.3 70 10.3
## 2 8.6 65 10.3
## 3 8.8 63 10.2
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
## 7 11.0 66 15.6
## 8 11.0 75 18.2
## 9 11.1 80 22.6
## 10 11.2 75 19.9
## 11 11.3 79 24.2
## 12 11.4 76 21.0
## 13 11.4 76 21.4
## 14 11.7 69 21.3
## 15 12.0 75 19.1
## 16 12.9 74 22.2
## 17 12.9 85 33.8
## 18 13.3 86 27.4
## 19 13.7 71 25.7
## 20 13.8 64 24.9
## 21 14.0 78 34.5
## 22 14.2 80 31.7
## 23 14.5 74 36.3
## 24 16.0 72 38.3
## 25 16.3 77 42.6
## 26 17.3 81 55.4
## 27 17.5 82 55.7
## 28 17.9 80 58.3
## 29 18.0 80 51.5
## 30 18.0 80 51.0
## 31 20.6 87 77.0
colnames(trees)[1] <- "diameter"
colnames(trees)[2] <- "height"
colnames(trees)[3] <- "volume"
trees
## diameter height volume
## 1 8.3 70 10.3
## 2 8.6 65 10.3
## 3 8.8 63 10.2
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
## 7 11.0 66 15.6
## 8 11.0 75 18.2
## 9 11.1 80 22.6
## 10 11.2 75 19.9
## 11 11.3 79 24.2
## 12 11.4 76 21.0
## 13 11.4 76 21.4
## 14 11.7 69 21.3
## 15 12.0 75 19.1
## 16 12.9 74 22.2
## 17 12.9 85 33.8
## 18 13.3 86 27.4
## 19 13.7 71 25.7
## 20 13.8 64 24.9
## 21 14.0 78 34.5
## 22 14.2 80 31.7
## 23 14.5 74 36.3
## 24 16.0 72 38.3
## 25 16.3 77 42.6
## 26 17.3 81 55.4
## 27 17.5 82 55.7
## 28 17.9 80 58.3
## 29 18.0 80 51.5
## 30 18.0 80 51.0
## 31 20.6 87 77.0
trees2 <- trees %>%
mutate(Diameter=diameter*0.0254) %>%
mutate(Height=height*0.305) %>%
mutate(Volume=3.14*(Diameter/2)**2*Height)
trees2
## diameter height volume Diameter Height Volume
## 1 8.3 70 10.3 0.21082 21.350 0.7448883
## 2 8.6 65 10.3 0.21844 19.825 0.7425867
## 3 8.8 63 10.2 0.22352 19.215 0.7536034
## 4 10.5 72 16.4 0.26670 21.960 1.2261625
## 5 10.7 81 18.8 0.27178 24.705 1.4324831
## 6 10.8 83 19.7 0.27432 25.315 1.4954178
## 7 11.0 66 15.6 0.27940 20.130 1.2335769
## 8 11.0 75 18.2 0.27940 22.875 1.4017920
## 9 11.1 80 22.6 0.28194 24.400 1.5225546
## 10 11.2 75 19.9 0.28448 22.875 1.4532296
## 11 11.3 79 24.2 0.28702 24.095 1.5581918
## 12 11.4 76 21.0 0.28956 23.180 1.5256687
## 13 11.4 76 21.4 0.28956 23.180 1.5256687
## 14 11.7 69 21.3 0.29718 21.045 1.4590082
## 15 12.0 75 19.1 0.30480 22.875 1.6682483
## 16 12.9 74 22.2 0.32766 22.570 1.9021645
## 17 12.9 85 33.8 0.32766 25.925 2.1849187
## 18 13.3 86 27.4 0.33782 26.230 2.3498420
## 19 13.7 71 25.7 0.34798 21.655 2.0584315
## 20 13.8 64 24.9 0.35052 19.520 1.8826738
## 21 14.0 78 34.5 0.35560 23.790 2.3614981
## 22 14.2 80 31.7 0.36068 24.400 2.4917451
## 23 14.5 74 36.3 0.36830 22.570 2.4032816
## 24 16.0 72 38.3 0.40640 21.960 2.8471437
## 25 16.3 77 42.6 0.41402 23.485 3.1601148
## 26 17.3 81 55.4 0.43942 24.705 3.7446752
## 27 17.5 82 55.7 0.44450 25.010 3.8790634
## 28 17.9 80 58.3 0.45466 24.400 3.9594328
## 29 18.0 80 51.5 0.45720 24.400 4.0037959
## 30 18.0 80 51.0 0.45720 24.400 4.0037959
## 31 20.6 87 77.0 0.52324 26.535 5.7028326
trees3 <- trees2 %>% select(Diameter, Height, Volume)
trees3
## Diameter Height Volume
## 1 0.21082 21.350 0.7448883
## 2 0.21844 19.825 0.7425867
## 3 0.22352 19.215 0.7536034
## 4 0.26670 21.960 1.2261625
## 5 0.27178 24.705 1.4324831
## 6 0.27432 25.315 1.4954178
## 7 0.27940 20.130 1.2335769
## 8 0.27940 22.875 1.4017920
## 9 0.28194 24.400 1.5225546
## 10 0.28448 22.875 1.4532296
## 11 0.28702 24.095 1.5581918
## 12 0.28956 23.180 1.5256687
## 13 0.28956 23.180 1.5256687
## 14 0.29718 21.045 1.4590082
## 15 0.30480 22.875 1.6682483
## 16 0.32766 22.570 1.9021645
## 17 0.32766 25.925 2.1849187
## 18 0.33782 26.230 2.3498420
## 19 0.34798 21.655 2.0584315
## 20 0.35052 19.520 1.8826738
## 21 0.35560 23.790 2.3614981
## 22 0.36068 24.400 2.4917451
## 23 0.36830 22.570 2.4032816
## 24 0.40640 21.960 2.8471437
## 25 0.41402 23.485 3.1601148
## 26 0.43942 24.705 3.7446752
## 27 0.44450 25.010 3.8790634
## 28 0.45466 24.400 3.9594328
## 29 0.45720 24.400 4.0037959
## 30 0.45720 24.400 4.0037959
## 31 0.52324 26.535 5.7028326
trees4 <- trees3 %>% filter(Volume>1.5)
trees4
## Diameter Height Volume
## 1 0.28194 24.400 1.522555
## 2 0.28702 24.095 1.558192
## 3 0.28956 23.180 1.525669
## 4 0.28956 23.180 1.525669
## 5 0.30480 22.875 1.668248
## 6 0.32766 22.570 1.902164
## 7 0.32766 25.925 2.184919
## 8 0.33782 26.230 2.349842
## 9 0.34798 21.655 2.058431
## 10 0.35052 19.520 1.882674
## 11 0.35560 23.790 2.361498
## 12 0.36068 24.400 2.491745
## 13 0.36830 22.570 2.403282
## 14 0.40640 21.960 2.847144
## 15 0.41402 23.485 3.160115
## 16 0.43942 24.705 3.744675
## 17 0.44450 25.010 3.879063
## 18 0.45466 24.400 3.959433
## 19 0.45720 24.400 4.003796
## 20 0.45720 24.400 4.003796
## 21 0.52324 26.535 5.702833
trees4 %>% arrange(Volume)
## Diameter Height Volume
## 1 0.28194 24.400 1.522555
## 2 0.28956 23.180 1.525669
## 3 0.28956 23.180 1.525669
## 4 0.28702 24.095 1.558192
## 5 0.30480 22.875 1.668248
## 6 0.35052 19.520 1.882674
## 7 0.32766 22.570 1.902164
## 8 0.34798 21.655 2.058431
## 9 0.32766 25.925 2.184919
## 10 0.33782 26.230 2.349842
## 11 0.35560 23.790 2.361498
## 12 0.36830 22.570 2.403282
## 13 0.36068 24.400 2.491745
## 14 0.40640 21.960 2.847144
## 15 0.41402 23.485 3.160115
## 16 0.43942 24.705 3.744675
## 17 0.44450 25.010 3.879063
## 18 0.45466 24.400 3.959433
## 19 0.45720 24.400 4.003796
## 20 0.45720 24.400 4.003796
## 21 0.52324 26.535 5.702833
trees4 %>% summarise(qrt=quantile(Volume,c(0.25,0.5,0.75)))
## qrt
## 1 1.882674
## 2 2.361498
## 3 3.744675
Trees <- trees %>%
mutate(Diameter=diameter*0.0254) %>%
mutate(Height=height*0.305) %>%
mutate(Volume=3.14*(Diameter/2)**2*Height) %>%
select(Diameter, Height, Volume) %>%
filter(Volume>1.5) %>%
arrange(Volume)
Trees
## Diameter Height Volume
## 1 0.28194 24.400 1.522555
## 2 0.28956 23.180 1.525669
## 3 0.28956 23.180 1.525669
## 4 0.28702 24.095 1.558192
## 5 0.30480 22.875 1.668248
## 6 0.35052 19.520 1.882674
## 7 0.32766 22.570 1.902164
## 8 0.34798 21.655 2.058431
## 9 0.32766 25.925 2.184919
## 10 0.33782 26.230 2.349842
## 11 0.35560 23.790 2.361498
## 12 0.36830 22.570 2.403282
## 13 0.36068 24.400 2.491745
## 14 0.40640 21.960 2.847144
## 15 0.41402 23.485 3.160115
## 16 0.43942 24.705 3.744675
## 17 0.44450 25.010 3.879063
## 18 0.45466 24.400 3.959433
## 19 0.45720 24.400 4.003796
## 20 0.45720 24.400 4.003796
## 21 0.52324 26.535 5.702833