Tugas 3 Praktikum STA581
Soal 1
Penggunaan inner_join(), left_join(), right_join(), full_join(), semi_join(), anti_join() untuk menggabungkan 2 table atau lebih dari database chinook.db atau Northwind_large.sqlite.
library(RSQLite)
library(DBI)
library(tidyverse)## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Chinook <- DBI::dbConnect(RSQLite::SQLite(), "C:/sqlite/db/chinook.db")class(Chinook)## [1] "SQLiteConnection"
## attr(,"package")
## [1] "RSQLite"
RSQLite::dbListTables(Chinook)## [1] "albums" "artists" "customers" "employees"
## [5] "genres" "invoice_items" "invoices" "media_types"
## [9] "playlist_track" "playlists" "sqlite_sequence" "sqlite_stat1"
## [13] "tracks"
Inner Join
customers<-dplyr::tbl(Chinook,"customers")
customers %>% head(n=5)## # Source: lazy query [?? x 13]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## CustomerId FirstName LastName Company Address City State Country PostalCode
## <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 LuÃs Gonçalves Embrae~ Av. Br~ São ~ SP Brazil 12227-000
## 2 2 Leonie Köhler <NA> Theodo~ Stut~ <NA> Germany 70174
## 3 3 François Tremblay <NA> 1498 r~ Mont~ QC Canada H2G 1A7
## 4 4 Bjørn Hansen <NA> Ullevå~ Oslo <NA> Norway 0171
## 5 5 František Wichterlová JetBra~ Klanov~ Prag~ <NA> Czech ~ 14700
## # ... with 4 more variables: Phone <chr>, Fax <chr>, Email <chr>,
## # SupportRepId <int>
invoices<-dplyr::tbl(Chinook,"invoices")
invoices %>% head(n=5)## # Source: lazy query [?? x 9]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## InvoiceId CustomerId InvoiceDate BillingAddress BillingCity BillingState
## <int> <int> <chr> <chr> <chr> <chr>
## 1 1 2 2009-01-01 00~ Theodor-Heuss-St~ Stuttgart <NA>
## 2 2 4 2009-01-02 00~ Ullevålsveien 14 Oslo <NA>
## 3 3 8 2009-01-03 00~ Grétrystraat 63 Brussels <NA>
## 4 4 14 2009-01-06 00~ 8210 111 ST NW Edmonton AB
## 5 5 23 2009-01-11 00~ 69 Salem Street Boston MA
## # ... with 3 more variables: BillingCountry <chr>, BillingPostalCode <chr>,
## # Total <dbl>
Melihat nama pelanggan dan tanggal pernah melakukan transaksi.
inner_join(customers, invoices) %>% select(FirstName, LastName, InvoiceDate)## Joining, by = "CustomerId"
## # Source: lazy query [?? x 3]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## FirstName LastName InvoiceDate
## <chr> <chr> <chr>
## 1 LuÃs Gonçalves 2010-03-11 00:00:00
## 2 LuÃs Gonçalves 2010-06-13 00:00:00
## 3 LuÃs Gonçalves 2010-09-15 00:00:00
## 4 LuÃs Gonçalves 2011-05-06 00:00:00
## 5 LuÃs Gonçalves 2012-10-27 00:00:00
## 6 LuÃs Gonçalves 2012-12-07 00:00:00
## 7 LuÃs Gonçalves 2013-08-07 00:00:00
## 8 Leonie Köhler 2009-01-01 00:00:00
## 9 Leonie Köhler 2009-02-11 00:00:00
## 10 Leonie Köhler 2009-10-12 00:00:00
## # ... with more rows
Melihat nama pelanggan dan banyak pembelian yang pernah dilakukan, diurutkan dari yang paling sedikit
inner_join(customers, invoices) %>% select(FirstName, LastName, InvoiceId) %>% count(FirstName, LastName) %>% arrange(n)## Joining, by = "CustomerId"
## # Source: lazy query [?? x 3]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## # Groups: FirstName
## # Ordered by: n
## FirstName LastName n
## <chr> <chr> <int>
## 1 Puja Srivastava 6
## 2 Aaron Mitchell 7
## 3 Alexandre Rocha 7
## 4 Astrid Gruber 7
## 5 Bjørn Hansen 7
## 6 Camille Bernard 7
## 7 Daan Peeters 7
## 8 Dan Miller 7
## 9 Diego Gutiérrez 7
## 10 Dominique Lefebvre 7
## # ... with more rows
Left Join
albums<-dplyr::tbl(Chinook,"albums")
albums %>% head(n=5)## # Source: lazy query [?? x 3]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## AlbumId Title ArtistId
## <int> <chr> <int>
## 1 1 For Those About To Rock We Salute You 1
## 2 2 Balls to the Wall 2
## 3 3 Restless and Wild 2
## 4 4 Let There Be Rock 1
## 5 5 Big Ones 3
invoice_items<-dplyr::tbl(Chinook,"invoice_items")
invoice_items %>% head(n=5)## # Source: lazy query [?? x 5]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## InvoiceLineId InvoiceId TrackId UnitPrice Quantity
## <int> <int> <int> <dbl> <int>
## 1 1 1 2 0.99 1
## 2 2 1 4 0.99 1
## 3 3 2 6 0.99 1
## 4 4 2 8 0.99 1
## 5 5 2 10 0.99 1
tracks<-dplyr::tbl(Chinook,"tracks")
tracks %>% head(n=5)## # Source: lazy query [?? x 9]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## TrackId Name AlbumId MediaTypeId GenreId Composer Milliseconds Bytes
## <int> <chr> <int> <int> <int> <chr> <int> <int>
## 1 1 For Tho~ 1 1 1 Angus Young,~ 343719 1.12e7
## 2 2 Balls t~ 2 2 1 <NA> 342562 5.51e6
## 3 3 Fast As~ 3 2 1 F. Baltes, S~ 230619 3.99e6
## 4 4 Restles~ 3 2 1 F. Baltes, R~ 252051 4.33e6
## 5 5 Princes~ 3 2 1 Deaffy & R.A~ 375418 6.29e6
## # ... with 1 more variable: UnitPrice <dbl>
Melihat 5 album teratas dengan penjualan terbanyak
left_join(left_join(invoice_items,tracks),albums) %>% select(Title) %>% count(Title) %>% arrange(desc(n)) %>% head(5)## Joining, by = c("TrackId", "UnitPrice")
## Joining, by = "AlbumId"
## # Source: lazy query [?? x 2]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## # Ordered by: desc(n)
## Title n
## <chr> <int>
## 1 Minha Historia 27
## 2 Greatest Hits 26
## 3 Unplugged 25
## 4 Acústico 22
## 5 Greatest Kiss 20
Right Join
playlist_track<-dplyr::tbl(Chinook,"playlist_track")
playlist_track %>% head(n=5)## # Source: lazy query [?? x 2]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## PlaylistId TrackId
## <int> <int>
## 1 1 3402
## 2 1 3389
## 3 1 3390
## 4 1 3391
## 5 1 3392
tracks<-dplyr::tbl(Chinook,"tracks")
tracks %>% head(n=5)## # Source: lazy query [?? x 9]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## TrackId Name AlbumId MediaTypeId GenreId Composer Milliseconds Bytes
## <int> <chr> <int> <int> <int> <chr> <int> <int>
## 1 1 For Tho~ 1 1 1 Angus Young,~ 343719 1.12e7
## 2 2 Balls t~ 2 2 1 <NA> 342562 5.51e6
## 3 3 Fast As~ 3 2 1 F. Baltes, S~ 230619 3.99e6
## 4 4 Restles~ 3 2 1 F. Baltes, R~ 252051 4.33e6
## 5 5 Princes~ 3 2 1 Deaffy & R.A~ 375418 6.29e6
## # ... with 1 more variable: UnitPrice <dbl>
Melihat daftar lagu yang terdapat pada playlist Classical
playlist_track %>%
right_join(tracks) %>%
filter(PlaylistID == 12) %>%
select(Name)## Joining, by = "TrackId"
## # Source: lazy query [?? x 1]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## Name
## <chr>
## 1 "Intoitus: Adorate Deum"
## 2 "Miserere mei, Deus"
## 3 "Canon and Gigue in D Major: I. Canon"
## 4 "Concerto No. 1 in E Major, RV 269 \"Spring\": I. Allegro"
## 5 "Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace"
## 6 "Aria Mit 30 Veränderungen, BWV 988 \"Goldberg Variations\": Aria"
## 7 "Suite for Solo Cello No. 1 in G Major, BWV 1007: I. Prélude"
## 8 "The Messiah: Behold, I Tell You a Mystery... The Trumpet Shall Sound"
## 9 "Solomon HWV 67: The Arrival of the Queen of Sheba"
## 10 "\"Eine Kleine Nachtmusik\" Serenade In G, K. 525: I. Allegro"
## # ... with more rows
Full Join
customers<-dplyr::tbl(Chinook,"customers")
customers %>% head(n=5)## # Source: lazy query [?? x 13]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## CustomerId FirstName LastName Company Address City State Country PostalCode
## <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 LuÃs Gonçalves Embrae~ Av. Br~ São ~ SP Brazil 12227-000
## 2 2 Leonie Köhler <NA> Theodo~ Stut~ <NA> Germany 70174
## 3 3 François Tremblay <NA> 1498 r~ Mont~ QC Canada H2G 1A7
## 4 4 Bjørn Hansen <NA> Ullevå~ Oslo <NA> Norway 0171
## 5 5 František Wichterlová JetBra~ Klanov~ Prag~ <NA> Czech ~ 14700
## # ... with 4 more variables: Phone <chr>, Fax <chr>, Email <chr>,
## # SupportRepId <int>
invoices<-dplyr::tbl(Chinook,"invoices")
invoices %>% head(n=5)## # Source: lazy query [?? x 9]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## InvoiceId CustomerId InvoiceDate BillingAddress BillingCity BillingState
## <int> <int> <chr> <chr> <chr> <chr>
## 1 1 2 2009-01-01 00~ Theodor-Heuss-St~ Stuttgart <NA>
## 2 2 4 2009-01-02 00~ Ullevålsveien 14 Oslo <NA>
## 3 3 8 2009-01-03 00~ Grétrystraat 63 Brussels <NA>
## 4 4 14 2009-01-06 00~ 8210 111 ST NW Edmonton AB
## 5 5 23 2009-01-11 00~ 69 Salem Street Boston MA
## # ... with 3 more variables: BillingCountry <chr>, BillingPostalCode <chr>,
## # Total <dbl>
Melihat nama pelanggan dengan total harga pembeliannya melebihi $20
full_join(invoices,customers) %>% filter(Total>20) %>% select(FirstName, LastName, Total)## Joining, by = "CustomerId"
## # Source: lazy query [?? x 3]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## FirstName LastName Total
## <chr> <chr> <dbl>
## 1 Ladislav Kovács 21.9
## 2 Hugh O'Reilly 21.9
## 3 Richard Cunningham 23.9
## 4 Helena Holý 25.9
Semi Join
artists<-dplyr::tbl(Chinook,"artists")
artists %>% head(n=5)## # Source: lazy query [?? x 2]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## ArtistId Name
## <int> <chr>
## 1 1 AC/DC
## 2 2 Accept
## 3 3 Aerosmith
## 4 4 Alanis Morissette
## 5 5 Alice In Chains
albums<-dplyr::tbl(Chinook,"albums")
albums %>% head(n=5)## # Source: lazy query [?? x 3]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## AlbumId Title ArtistId
## <int> <chr> <int>
## 1 1 For Those About To Rock We Salute You 1
## 2 2 Balls to the Wall 2
## 3 3 Restless and Wild 2
## 4 4 Let There Be Rock 1
## 5 5 Big Ones 3
Melihat daftar nama artis yang telah mengeluarkan album yang ada pada daftar album
semi_join(artists, albums)## Joining, by = "ArtistId"
## # Source: lazy query [?? x 2]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## ArtistId Name
## <int> <chr>
## 1 1 AC/DC
## 2 2 Accept
## 3 3 Aerosmith
## 4 4 Alanis Morissette
## 5 5 Alice In Chains
## 6 6 Antônio Carlos Jobim
## 7 7 Apocalyptica
## 8 8 Audioslave
## 9 9 BackBeat
## 10 10 Billy Cobham
## # ... with more rows
Anti Join
tracks<-dplyr::tbl(Chinook,"tracks")
tracks %>% head(n=5)## # Source: lazy query [?? x 9]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## TrackId Name AlbumId MediaTypeId GenreId Composer Milliseconds Bytes
## <int> <chr> <int> <int> <int> <chr> <int> <int>
## 1 1 For Tho~ 1 1 1 Angus Young,~ 343719 1.12e7
## 2 2 Balls t~ 2 2 1 <NA> 342562 5.51e6
## 3 3 Fast As~ 3 2 1 F. Baltes, S~ 230619 3.99e6
## 4 4 Restles~ 3 2 1 F. Baltes, R~ 252051 4.33e6
## 5 5 Princes~ 3 2 1 Deaffy & R.A~ 375418 6.29e6
## # ... with 1 more variable: UnitPrice <dbl>
invoice_items<-dplyr::tbl(Chinook,"invoice_items")
invoice_items %>% head(n=5)## # Source: lazy query [?? x 5]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## InvoiceLineId InvoiceId TrackId UnitPrice Quantity
## <int> <int> <int> <dbl> <int>
## 1 1 1 2 0.99 1
## 2 2 1 4 0.99 1
## 3 3 2 6 0.99 1
## 4 4 2 8 0.99 1
## 5 5 2 10 0.99 1
Melihat daftar lagu dan rinciannya yang belum pernah terjual
anti_join(tracks, invoice_items)## Joining, by = c("TrackId", "UnitPrice")
## # Source: lazy query [?? x 9]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## TrackId Name AlbumId MediaTypeId GenreId Composer Milliseconds Bytes
## <int> <chr> <int> <int> <int> <chr> <int> <int>
## 1 7 Let's G~ 1 1 1 Angus Young~ 233926 7.64e6
## 2 11 C.O.D. 1 1 1 Angus Young~ 199836 6.57e6
## 3 17 Let The~ 4 1 1 AC/DC 366654 1.20e7
## 4 18 Bad Boy~ 4 1 1 AC/DC 267728 8.78e6
## 5 22 Whole L~ 4 1 1 AC/DC 323761 1.05e7
## 6 23 Walk On~ 5 1 1 Steven Tyle~ 295680 9.72e6
## 7 27 Dude (L~ 5 1 1 Steven Tyle~ 264855 8.68e6
## 8 29 Cryin' 5 1 1 Steven Tyle~ 309263 1.01e7
## 9 33 The Oth~ 5 1 1 Steven Tyle~ 244375 7.98e6
## 10 34 Crazy 5 1 1 Steven Tyle~ 316656 1.04e7
## # ... with more rows, and 1 more variable: UnitPrice <dbl>
Soal 2
Membuat satu Peta dari daerah Samarinda, Kalimantan Timur, menggunakan data jumlah SD, SMP, dan SMA/SMK perkota/kabupaten di wilayah Kalimantan Timur.
library(sf)## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1
library(ggplot2)
library(tigris)## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(dplyr)Admin2Kabupaten<- "D:/Wa Ode Rona Freya/IPB Pascasarjana/STA581 SAINS DATA/Pertemuan 3/idn_admbnda_adm2_bps_20200401.shp"glimpse(Admin2Kabupaten)## chr "D:/Wa Ode Rona Freya/IPB Pascasarjana/STA581 SAINS DATA/Pertemuan 3/idn_admbnda_adm2_bps_20200401.shp"
Admin2<-st_read(Admin2Kabupaten)## Reading layer `idn_admbnda_adm2_bps_20200401' from data source
## `D:\Wa Ode Rona Freya\IPB Pascasarjana\STA581 SAINS DATA\Pertemuan 3\idn_admbnda_adm2_bps_20200401.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 522 features and 14 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 95.01079 ymin: -11.00762 xmax: 141.0194 ymax: 6.07693
## Geodetic CRS: WGS 84
glimpse(Admin2)## Rows: 522
## Columns: 15
## $ Shape_Leng <dbl> 2.360029, 1.963994, 4.590182, 3.287754, 4.448584, 4.907219,~
## $ Shape_Area <dbl> 0.22896809, 0.15413587, 0.23639581, 0.31616114, 0.34303826,~
## $ ADM2_EN <chr> "Aceh Barat", "Aceh Barat Daya", "Aceh Besar", "Aceh Jaya",~
## $ ADM2_PCODE <chr> "ID1107", "ID1112", "ID1108", "ID1116", "ID1103", "ID1102",~
## $ ADM2_REF <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ ADM2ALT1EN <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ ADM2ALT2EN <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ ADM1_EN <chr> "Aceh", "Aceh", "Aceh", "Aceh", "Aceh", "Aceh", "Aceh", "Ac~
## $ ADM1_PCODE <chr> "ID11", "ID11", "ID11", "ID11", "ID11", "ID11", "ID11", "ID~
## $ ADM0_EN <chr> "Indonesia", "Indonesia", "Indonesia", "Indonesia", "Indone~
## $ ADM0_PCODE <chr> "ID", "ID", "ID", "ID", "ID", "ID", "ID", "ID", "ID", "ID",~
## $ date <date> 2019-12-20, 2019-12-20, 2019-12-20, 2019-12-20, 2019-12-20~
## $ validOn <date> 2020-04-01, 2020-04-01, 2020-04-01, 2020-04-01, 2020-04-01~
## $ validTo <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((96.26836 4...., MULTIPOLYGON (~
KaltimIndonesia<- readxl::read_xlsx("KaltimIndonesia.xlsx")merged_KaltimIndonesia <- geo_join(spatial_data=Admin2,
data_frame=KaltimIndonesia, by_sp="ADM2_PCODE",
by_df="ADM2_PCODE", how = "inner")## Warning: We recommend using the dplyr::*_join() family of functions instead.
mycol <- c("green", "yellow", "red", "red4")pDATA<-ggplot()+
geom_sf(data=merged_KaltimIndonesia,aes(fill=DATA))+
scale_fill_gradientn(colours=mycol)+
labs(title="Kaltim dan Indonesia")
pDATAKaltimKalimantan<- readxl::read_xlsx("KaltimKalimantan.xlsx")merged_KaltimKalimantan <- geo_join(spatial_data=Admin2,
data_frame=KaltimKalimantan, by_sp="ADM2_PCODE",
by_df="ADM2_PCODE", how = "inner")## Warning: We recommend using the dplyr::*_join() family of functions instead.
pDATA<-ggplot()+
geom_sf(data=merged_KaltimKalimantan,aes(fill=DATA))+
scale_fill_gradientn(colours=mycol)+
labs(title="Kaltim dan Pulau Kalimantan")
pDATASamarindaKaltim<- readxl::read_xlsx("SamarindaKaltim.xlsx")merged_SamarindaKaltim <- geo_join(spatial_data=Admin2,
data_frame=SamarindaKaltim, by_sp="ADM2_PCODE",
by_df="ADM2_PCODE", how = "inner")## Warning: We recommend using the dplyr::*_join() family of functions instead.
pDATA<-ggplot()+
geom_sf(data=merged_SamarindaKaltim,aes(fill=DATA))+
scale_fill_gradientn(colours=mycol)+
labs(title="Samarinda dan Kalimantan Timur")
pDATA