Tugas 3 Praktikum STA581

Soal 1

Penggunaan inner_join(), left_join(), right_join(), full_join(), semi_join(), anti_join() untuk menggabungkan 2 table atau lebih dari database chinook.db atau Northwind_large.sqlite.

library(RSQLite)
library(DBI)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
Chinook <- DBI::dbConnect(RSQLite::SQLite(), "C:/sqlite/db/chinook.db")
class(Chinook)
## [1] "SQLiteConnection"
## attr(,"package")
## [1] "RSQLite"
RSQLite::dbListTables(Chinook)
##  [1] "albums"          "artists"         "customers"       "employees"      
##  [5] "genres"          "invoice_items"   "invoices"        "media_types"    
##  [9] "playlist_track"  "playlists"       "sqlite_sequence" "sqlite_stat1"   
## [13] "tracks"

Inner Join

customers<-dplyr::tbl(Chinook,"customers")
customers %>% head(n=5)
## # Source:   lazy query [?? x 13]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
##   CustomerId FirstName LastName    Company Address City  State Country PostalCode
##        <int> <chr>     <chr>       <chr>   <chr>   <chr> <chr> <chr>   <chr>     
## 1          1 Luís      Gonçalves   Embrae~ Av. Br~ São ~ SP    Brazil  12227-000 
## 2          2 Leonie    Köhler      <NA>    Theodo~ Stut~ <NA>  Germany 70174     
## 3          3 François  Tremblay    <NA>    1498 r~ Mont~ QC    Canada  H2G 1A7   
## 4          4 Bjørn     Hansen      <NA>    Ullevå~ Oslo  <NA>  Norway  0171      
## 5          5 František Wichterlová JetBra~ Klanov~ Prag~ <NA>  Czech ~ 14700     
## # ... with 4 more variables: Phone <chr>, Fax <chr>, Email <chr>,
## #   SupportRepId <int>
invoices<-dplyr::tbl(Chinook,"invoices")
invoices %>% head(n=5)
## # Source:   lazy query [?? x 9]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
##   InvoiceId CustomerId InvoiceDate    BillingAddress    BillingCity BillingState
##       <int>      <int> <chr>          <chr>             <chr>       <chr>       
## 1         1          2 2009-01-01 00~ Theodor-Heuss-St~ Stuttgart   <NA>        
## 2         2          4 2009-01-02 00~ Ullevålsveien 14  Oslo        <NA>        
## 3         3          8 2009-01-03 00~ Grétrystraat 63   Brussels    <NA>        
## 4         4         14 2009-01-06 00~ 8210 111 ST NW    Edmonton    AB          
## 5         5         23 2009-01-11 00~ 69 Salem Street   Boston      MA          
## # ... with 3 more variables: BillingCountry <chr>, BillingPostalCode <chr>,
## #   Total <dbl>

Melihat nama pelanggan dan tanggal pernah melakukan transaksi.

inner_join(customers, invoices) %>% select(FirstName, LastName, InvoiceDate)
## Joining, by = "CustomerId"
## # Source:   lazy query [?? x 3]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
##    FirstName LastName  InvoiceDate        
##    <chr>     <chr>     <chr>              
##  1 Luís      Gonçalves 2010-03-11 00:00:00
##  2 Luís      Gonçalves 2010-06-13 00:00:00
##  3 Luís      Gonçalves 2010-09-15 00:00:00
##  4 Luís      Gonçalves 2011-05-06 00:00:00
##  5 Luís      Gonçalves 2012-10-27 00:00:00
##  6 Luís      Gonçalves 2012-12-07 00:00:00
##  7 Luís      Gonçalves 2013-08-07 00:00:00
##  8 Leonie    Köhler    2009-01-01 00:00:00
##  9 Leonie    Köhler    2009-02-11 00:00:00
## 10 Leonie    Köhler    2009-10-12 00:00:00
## # ... with more rows

Melihat nama pelanggan dan banyak pembelian yang pernah dilakukan, diurutkan dari yang paling sedikit

inner_join(customers, invoices) %>% select(FirstName, LastName, InvoiceId) %>% count(FirstName, LastName) %>% arrange(n)
## Joining, by = "CustomerId"
## # Source:     lazy query [?? x 3]
## # Database:   sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## # Groups:     FirstName
## # Ordered by: n
##    FirstName LastName       n
##    <chr>     <chr>      <int>
##  1 Puja      Srivastava     6
##  2 Aaron     Mitchell       7
##  3 Alexandre Rocha          7
##  4 Astrid    Gruber         7
##  5 Bjørn     Hansen         7
##  6 Camille   Bernard        7
##  7 Daan      Peeters        7
##  8 Dan       Miller         7
##  9 Diego     Gutiérrez      7
## 10 Dominique Lefebvre       7
## # ... with more rows

Left Join

albums<-dplyr::tbl(Chinook,"albums")
albums %>% head(n=5)
## # Source:   lazy query [?? x 3]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
##   AlbumId Title                                 ArtistId
##     <int> <chr>                                    <int>
## 1       1 For Those About To Rock We Salute You        1
## 2       2 Balls to the Wall                            2
## 3       3 Restless and Wild                            2
## 4       4 Let There Be Rock                            1
## 5       5 Big Ones                                     3
invoice_items<-dplyr::tbl(Chinook,"invoice_items")
invoice_items %>% head(n=5)
## # Source:   lazy query [?? x 5]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
##   InvoiceLineId InvoiceId TrackId UnitPrice Quantity
##           <int>     <int>   <int>     <dbl>    <int>
## 1             1         1       2      0.99        1
## 2             2         1       4      0.99        1
## 3             3         2       6      0.99        1
## 4             4         2       8      0.99        1
## 5             5         2      10      0.99        1
tracks<-dplyr::tbl(Chinook,"tracks")
tracks %>% head(n=5)
## # Source:   lazy query [?? x 9]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
##   TrackId Name     AlbumId MediaTypeId GenreId Composer      Milliseconds  Bytes
##     <int> <chr>      <int>       <int>   <int> <chr>                <int>  <int>
## 1       1 For Tho~       1           1       1 Angus Young,~       343719 1.12e7
## 2       2 Balls t~       2           2       1 <NA>                342562 5.51e6
## 3       3 Fast As~       3           2       1 F. Baltes, S~       230619 3.99e6
## 4       4 Restles~       3           2       1 F. Baltes, R~       252051 4.33e6
## 5       5 Princes~       3           2       1 Deaffy & R.A~       375418 6.29e6
## # ... with 1 more variable: UnitPrice <dbl>

Melihat 5 album teratas dengan penjualan terbanyak

left_join(left_join(invoice_items,tracks),albums) %>% select(Title) %>% count(Title) %>% arrange(desc(n)) %>% head(5)
## Joining, by = c("TrackId", "UnitPrice")
## Joining, by = "AlbumId"
## # Source:     lazy query [?? x 2]
## # Database:   sqlite 3.36.0 [C:\sqlite\db\chinook.db]
## # Ordered by: desc(n)
##   Title              n
##   <chr>          <int>
## 1 Minha Historia    27
## 2 Greatest Hits     26
## 3 Unplugged         25
## 4 Acústico          22
## 5 Greatest Kiss     20

Right Join

playlist_track<-dplyr::tbl(Chinook,"playlist_track")
playlist_track %>% head(n=5)
## # Source:   lazy query [?? x 2]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
##   PlaylistId TrackId
##        <int>   <int>
## 1          1    3402
## 2          1    3389
## 3          1    3390
## 4          1    3391
## 5          1    3392
tracks<-dplyr::tbl(Chinook,"tracks")
tracks %>% head(n=5)
## # Source:   lazy query [?? x 9]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
##   TrackId Name     AlbumId MediaTypeId GenreId Composer      Milliseconds  Bytes
##     <int> <chr>      <int>       <int>   <int> <chr>                <int>  <int>
## 1       1 For Tho~       1           1       1 Angus Young,~       343719 1.12e7
## 2       2 Balls t~       2           2       1 <NA>                342562 5.51e6
## 3       3 Fast As~       3           2       1 F. Baltes, S~       230619 3.99e6
## 4       4 Restles~       3           2       1 F. Baltes, R~       252051 4.33e6
## 5       5 Princes~       3           2       1 Deaffy & R.A~       375418 6.29e6
## # ... with 1 more variable: UnitPrice <dbl>

Melihat daftar lagu yang terdapat pada playlist Classical

playlist_track %>% 
  right_join(tracks) %>% 
  filter(PlaylistID == 12) %>% 
  select(Name)
## Joining, by = "TrackId"
## # Source:   lazy query [?? x 1]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
##    Name                                                                  
##    <chr>                                                                 
##  1 "Intoitus: Adorate Deum"                                              
##  2 "Miserere mei, Deus"                                                  
##  3 "Canon and Gigue in D Major: I. Canon"                                
##  4 "Concerto No. 1 in E Major, RV 269 \"Spring\": I. Allegro"            
##  5 "Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace"              
##  6 "Aria Mit 30 Veränderungen, BWV 988 \"Goldberg Variations\": Aria"    
##  7 "Suite for Solo Cello No. 1 in G Major, BWV 1007: I. Prélude"         
##  8 "The Messiah: Behold, I Tell You a Mystery... The Trumpet Shall Sound"
##  9 "Solomon HWV 67: The Arrival of the Queen of Sheba"                   
## 10 "\"Eine Kleine Nachtmusik\" Serenade In G, K. 525: I. Allegro"        
## # ... with more rows

Full Join

customers<-dplyr::tbl(Chinook,"customers")
customers %>% head(n=5)
## # Source:   lazy query [?? x 13]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
##   CustomerId FirstName LastName    Company Address City  State Country PostalCode
##        <int> <chr>     <chr>       <chr>   <chr>   <chr> <chr> <chr>   <chr>     
## 1          1 Luís      Gonçalves   Embrae~ Av. Br~ São ~ SP    Brazil  12227-000 
## 2          2 Leonie    Köhler      <NA>    Theodo~ Stut~ <NA>  Germany 70174     
## 3          3 François  Tremblay    <NA>    1498 r~ Mont~ QC    Canada  H2G 1A7   
## 4          4 Bjørn     Hansen      <NA>    Ullevå~ Oslo  <NA>  Norway  0171      
## 5          5 František Wichterlová JetBra~ Klanov~ Prag~ <NA>  Czech ~ 14700     
## # ... with 4 more variables: Phone <chr>, Fax <chr>, Email <chr>,
## #   SupportRepId <int>
invoices<-dplyr::tbl(Chinook,"invoices")
invoices %>% head(n=5)
## # Source:   lazy query [?? x 9]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
##   InvoiceId CustomerId InvoiceDate    BillingAddress    BillingCity BillingState
##       <int>      <int> <chr>          <chr>             <chr>       <chr>       
## 1         1          2 2009-01-01 00~ Theodor-Heuss-St~ Stuttgart   <NA>        
## 2         2          4 2009-01-02 00~ Ullevålsveien 14  Oslo        <NA>        
## 3         3          8 2009-01-03 00~ Grétrystraat 63   Brussels    <NA>        
## 4         4         14 2009-01-06 00~ 8210 111 ST NW    Edmonton    AB          
## 5         5         23 2009-01-11 00~ 69 Salem Street   Boston      MA          
## # ... with 3 more variables: BillingCountry <chr>, BillingPostalCode <chr>,
## #   Total <dbl>

Melihat nama pelanggan dengan total harga pembeliannya melebihi $20

full_join(invoices,customers) %>% filter(Total>20) %>% select(FirstName, LastName, Total)
## Joining, by = "CustomerId"
## # Source:   lazy query [?? x 3]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
##   FirstName LastName   Total
##   <chr>     <chr>      <dbl>
## 1 Ladislav  Kovács      21.9
## 2 Hugh      O'Reilly    21.9
## 3 Richard   Cunningham  23.9
## 4 Helena    Holý        25.9

Semi Join

artists<-dplyr::tbl(Chinook,"artists")
artists %>% head(n=5)
## # Source:   lazy query [?? x 2]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
##   ArtistId Name             
##      <int> <chr>            
## 1        1 AC/DC            
## 2        2 Accept           
## 3        3 Aerosmith        
## 4        4 Alanis Morissette
## 5        5 Alice In Chains
albums<-dplyr::tbl(Chinook,"albums")
albums %>% head(n=5)
## # Source:   lazy query [?? x 3]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
##   AlbumId Title                                 ArtistId
##     <int> <chr>                                    <int>
## 1       1 For Those About To Rock We Salute You        1
## 2       2 Balls to the Wall                            2
## 3       3 Restless and Wild                            2
## 4       4 Let There Be Rock                            1
## 5       5 Big Ones                                     3

Melihat daftar nama artis yang telah mengeluarkan album yang ada pada daftar album

semi_join(artists, albums)
## Joining, by = "ArtistId"
## # Source:   lazy query [?? x 2]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
##    ArtistId Name                
##       <int> <chr>               
##  1        1 AC/DC               
##  2        2 Accept              
##  3        3 Aerosmith           
##  4        4 Alanis Morissette   
##  5        5 Alice In Chains     
##  6        6 Antônio Carlos Jobim
##  7        7 Apocalyptica        
##  8        8 Audioslave          
##  9        9 BackBeat            
## 10       10 Billy Cobham        
## # ... with more rows

Anti Join

tracks<-dplyr::tbl(Chinook,"tracks")
tracks %>% head(n=5)
## # Source:   lazy query [?? x 9]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
##   TrackId Name     AlbumId MediaTypeId GenreId Composer      Milliseconds  Bytes
##     <int> <chr>      <int>       <int>   <int> <chr>                <int>  <int>
## 1       1 For Tho~       1           1       1 Angus Young,~       343719 1.12e7
## 2       2 Balls t~       2           2       1 <NA>                342562 5.51e6
## 3       3 Fast As~       3           2       1 F. Baltes, S~       230619 3.99e6
## 4       4 Restles~       3           2       1 F. Baltes, R~       252051 4.33e6
## 5       5 Princes~       3           2       1 Deaffy & R.A~       375418 6.29e6
## # ... with 1 more variable: UnitPrice <dbl>
invoice_items<-dplyr::tbl(Chinook,"invoice_items")
invoice_items %>% head(n=5)
## # Source:   lazy query [?? x 5]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
##   InvoiceLineId InvoiceId TrackId UnitPrice Quantity
##           <int>     <int>   <int>     <dbl>    <int>
## 1             1         1       2      0.99        1
## 2             2         1       4      0.99        1
## 3             3         2       6      0.99        1
## 4             4         2       8      0.99        1
## 5             5         2      10      0.99        1

Melihat daftar lagu dan rinciannya yang belum pernah terjual

anti_join(tracks, invoice_items)
## Joining, by = c("TrackId", "UnitPrice")
## # Source:   lazy query [?? x 9]
## # Database: sqlite 3.36.0 [C:\sqlite\db\chinook.db]
##    TrackId Name     AlbumId MediaTypeId GenreId Composer     Milliseconds  Bytes
##      <int> <chr>      <int>       <int>   <int> <chr>               <int>  <int>
##  1       7 Let's G~       1           1       1 Angus Young~       233926 7.64e6
##  2      11 C.O.D.         1           1       1 Angus Young~       199836 6.57e6
##  3      17 Let The~       4           1       1 AC/DC              366654 1.20e7
##  4      18 Bad Boy~       4           1       1 AC/DC              267728 8.78e6
##  5      22 Whole L~       4           1       1 AC/DC              323761 1.05e7
##  6      23 Walk On~       5           1       1 Steven Tyle~       295680 9.72e6
##  7      27 Dude (L~       5           1       1 Steven Tyle~       264855 8.68e6
##  8      29 Cryin'         5           1       1 Steven Tyle~       309263 1.01e7
##  9      33 The Oth~       5           1       1 Steven Tyle~       244375 7.98e6
## 10      34 Crazy          5           1       1 Steven Tyle~       316656 1.04e7
## # ... with more rows, and 1 more variable: UnitPrice <dbl>

Soal 2

Membuat satu Peta dari daerah Samarinda, Kalimantan Timur, menggunakan data jumlah SD, SMP, dan SMA/SMK perkota/kabupaten di wilayah Kalimantan Timur.

library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1
library(ggplot2)
library(tigris)
## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(dplyr)
Admin2Kabupaten<- "D:/Wa Ode Rona Freya/IPB Pascasarjana/STA581 SAINS DATA/Pertemuan 3/idn_admbnda_adm2_bps_20200401.shp"
glimpse(Admin2Kabupaten)
##  chr "D:/Wa Ode Rona Freya/IPB Pascasarjana/STA581 SAINS DATA/Pertemuan 3/idn_admbnda_adm2_bps_20200401.shp"
Admin2<-st_read(Admin2Kabupaten)
## Reading layer `idn_admbnda_adm2_bps_20200401' from data source 
##   `D:\Wa Ode Rona Freya\IPB Pascasarjana\STA581 SAINS DATA\Pertemuan 3\idn_admbnda_adm2_bps_20200401.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 522 features and 14 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 95.01079 ymin: -11.00762 xmax: 141.0194 ymax: 6.07693
## Geodetic CRS:  WGS 84
glimpse(Admin2)
## Rows: 522
## Columns: 15
## $ Shape_Leng <dbl> 2.360029, 1.963994, 4.590182, 3.287754, 4.448584, 4.907219,~
## $ Shape_Area <dbl> 0.22896809, 0.15413587, 0.23639581, 0.31616114, 0.34303826,~
## $ ADM2_EN    <chr> "Aceh Barat", "Aceh Barat Daya", "Aceh Besar", "Aceh Jaya",~
## $ ADM2_PCODE <chr> "ID1107", "ID1112", "ID1108", "ID1116", "ID1103", "ID1102",~
## $ ADM2_REF   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ ADM2ALT1EN <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ ADM2ALT2EN <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ ADM1_EN    <chr> "Aceh", "Aceh", "Aceh", "Aceh", "Aceh", "Aceh", "Aceh", "Ac~
## $ ADM1_PCODE <chr> "ID11", "ID11", "ID11", "ID11", "ID11", "ID11", "ID11", "ID~
## $ ADM0_EN    <chr> "Indonesia", "Indonesia", "Indonesia", "Indonesia", "Indone~
## $ ADM0_PCODE <chr> "ID", "ID", "ID", "ID", "ID", "ID", "ID", "ID", "ID", "ID",~
## $ date       <date> 2019-12-20, 2019-12-20, 2019-12-20, 2019-12-20, 2019-12-20~
## $ validOn    <date> 2020-04-01, 2020-04-01, 2020-04-01, 2020-04-01, 2020-04-01~
## $ validTo    <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ geometry   <MULTIPOLYGON [°]> MULTIPOLYGON (((96.26836 4...., MULTIPOLYGON (~
KaltimIndonesia<- readxl::read_xlsx("KaltimIndonesia.xlsx")
merged_KaltimIndonesia <- geo_join(spatial_data=Admin2, 
                                  data_frame=KaltimIndonesia, by_sp="ADM2_PCODE", 
                                  by_df="ADM2_PCODE", how = "inner")
## Warning: We recommend using the dplyr::*_join() family of functions instead.
mycol <- c("green", "yellow", "red", "red4")
pDATA<-ggplot()+
  geom_sf(data=merged_KaltimIndonesia,aes(fill=DATA))+
  scale_fill_gradientn(colours=mycol)+
  labs(title="Kaltim dan Indonesia")
pDATA

KaltimKalimantan<- readxl::read_xlsx("KaltimKalimantan.xlsx")
merged_KaltimKalimantan <- geo_join(spatial_data=Admin2, 
                             data_frame=KaltimKalimantan, by_sp="ADM2_PCODE", 
                             by_df="ADM2_PCODE", how = "inner")
## Warning: We recommend using the dplyr::*_join() family of functions instead.
pDATA<-ggplot()+
  geom_sf(data=merged_KaltimKalimantan,aes(fill=DATA))+
  scale_fill_gradientn(colours=mycol)+
  labs(title="Kaltim dan Pulau Kalimantan")
pDATA

SamarindaKaltim<- readxl::read_xlsx("SamarindaKaltim.xlsx")
merged_SamarindaKaltim <- geo_join(spatial_data=Admin2, 
                             data_frame=SamarindaKaltim, by_sp="ADM2_PCODE", 
                             by_df="ADM2_PCODE", how = "inner")
## Warning: We recommend using the dplyr::*_join() family of functions instead.
pDATA<-ggplot()+
  geom_sf(data=merged_SamarindaKaltim,aes(fill=DATA))+
  scale_fill_gradientn(colours=mycol)+
  labs(title="Samarinda dan Kalimantan Timur")
pDATA