db<-DBI::dbConnect(RSQLite::SQLite(), "C:/MANAJEMEN DATA RELASIONAL/company.db")
Memilih Semua Kolom dengan Filter Tertentu
SELECT
*
FROM
employee
WHERE
SALARY > 30000;
| Fname | Minit | Lname | Ssn | Bdate | Address | Sex | Salary | Super_ssn | Dno |
|---|---|---|---|---|---|---|---|---|---|
| Franklin | T | Wong | 333445555 | 1965-12-08 | 638 Voss, Houston TX | M | 40000 | 888665555 | 5 |
| Jennifer | S | Wallace | 987654321 | 1941-06-20 | 291 Berry, Bellaire TX | F | 43000 | 888665555 | 4 |
| Ramesh | K | Narayan | 666884444 | 1962-09-15 | 975 Fire Oak, Humble TX | M | 38000 | 333445555 | 5 |
| James | E | Borg | 888665555 | 1937-11-10 | 450 Stone, Houston TX | M | 55000 | NA | 1 |
Fungsi dapat dijalankankan tanpa memanggil package dengan perintah library() atau require(), jika syntax yang ditulis menyertakan nama package tempat fungsi tersebut berasal.
Untuk menjalankan syntax berikut ini, kita perlu mengunduh Database company.db dan menyimpannya pada C:/sqlite/
company<-DBI::dbConnect(RSQLite::SQLite(), "C:/sqlite/company.db")
class(company)
## [1] "SQLiteConnection"
## attr(,"package")
## [1] "RSQLite"
Selanjutnya, fungsi dbListTables dapat digunakan untuk melihat tables yang terdapat di dalam database.
RSQLite::dbListTables(company)
## [1] "DEPARTMENT" "DEPENDENT" "DEPT_LOCATIONS" "EMPLOYEE"
## [5] "PROJECT" "WORKS_ON"
dplyr::tbl(company, "works_on")
## # Source: table<works_on> [?? x 3]
## # Database: sqlite 3.37.0 [C:\sqlite\company.db]
## Essn Pno Hours
## <chr> <int> <dbl>
## 1 123456789 1 32.5
## 2 123456789 2 7.5
## 3 666884444 3 40
## 4 453453453 1 20
## 5 453453453 2 20
## 6 333445555 2 10
## 7 333445555 3 10
## 8 333445555 10 10
## 9 333445555 20 10
## 10 999887777 30 30
## # ... with more rows
works_on <- dplyr::tbl(company, "works_on")
class("works_on")
## [1] "character"
works_on
## # Source: table<works_on> [?? x 3]
## # Database: sqlite 3.37.0 [C:\sqlite\company.db]
## Essn Pno Hours
## <chr> <int> <dbl>
## 1 123456789 1 32.5
## 2 123456789 2 7.5
## 3 666884444 3 40
## 4 453453453 1 20
## 5 453453453 2 20
## 6 333445555 2 10
## 7 333445555 3 10
## 8 333445555 10 10
## 9 333445555 20 10
## 10 999887777 30 30
## # ... with more rows
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
q<-works_on %>%
select(Essn, Pno, Hours) %>%
filter(Hours>10)
q
## # Source: lazy query [?? x 3]
## # Database: sqlite 3.37.0 [C:\sqlite\company.db]
## Essn Pno Hours
## <chr> <int> <dbl>
## 1 123456789 1 32.5
## 2 666884444 3 40
## 3 453453453 1 20
## 4 453453453 2 20
## 5 999887777 30 30
## 6 987987987 10 35
## 7 987654321 30 20
## 8 987654321 20 15
## 9 888665555 20 16
dplyr::show_query(q)
## <SQL>
## SELECT *
## FROM (SELECT `Essn`, `Pno`, `Hours`
## FROM `works_on`)
## WHERE (`Hours` > 10.0)
Load package jika sudah diinstall:
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.5 v stringr 1.4.0
## v tidyr 1.1.4 v forcats 0.5.1
## v readr 2.1.0
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'stringr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Untuk melihat isi dari package, kita dapat menggunakan perintah help() atau dengan menuliskan ?? yang diikuti dengan keyword yang akan dicari.
?tidyverse
## starting httpd help server ... done
Digunakan data ChickWeight dari package datasets
library(datasets)
data("ChickWeight")
ChickWeight<-tibble::as_tibble(ChickWeight)
class(ChickWeight)
## [1] "tbl_df" "tbl" "data.frame"
View(ChickWeight)
glimpse(ChickWeight)
## Rows: 578
## Columns: 4
## $ weight <dbl> 42, 51, 59, 64, 76, 93, 106, 125, 149, 171, 199, 205, 40, 49, 5~
## $ Time <dbl> 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 21, 0, 2, 4, 6, 8, 10, 1~
## $ Chick <ord> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, ~
## $ Diet <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
Menggunaan fungsi summarise() untuk meringkas data.
ChickWeight %>%
group_by(Diet) %>%
summarise(mean_weight=mean(weight), .groups='drop')
## # A tibble: 4 x 2
## Diet mean_weight
## <fct> <dbl>
## 1 1 103.
## 2 2 123.
## 3 3 143.
## 4 4 135.
Menggunakan fungsi arrange() untuk mengurutkan data.
ChickWeight %>%
arrange(weight)
## # A tibble: 578 x 4
## weight Time Chick Diet
## <dbl> <dbl> <ord> <fct>
## 1 35 2 18 1
## 2 39 2 3 1
## 3 39 0 18 1
## 4 39 0 27 2
## 5 39 0 28 2
## 6 39 0 29 2
## 7 39 0 33 3
## 8 39 0 36 3
## 9 39 0 48 4
## 10 40 0 2 1
## # ... with 568 more rows
Menggunakan fungsi filter() untuk memilih sebagian data berdasarkan nilai tertentu.
ChickWeight %>%
filter(weight>100)
## # A tibble: 300 x 4
## weight Time Chick Diet
## <dbl> <dbl> <ord> <fct>
## 1 106 12 1 1
## 2 125 14 1 1
## 3 149 16 1 1
## 4 171 18 1 1
## 5 199 20 1 1
## 6 205 21 1 1
## 7 103 10 2 1
## 8 122 12 2 1
## 9 138 14 2 1
## 10 162 16 2 1
## # ... with 290 more rows
Menggunakan fungsi mutate() untuk menambahkan peubah baru pada data.
ChickWeight %>%
mutate(weight2=weight*2)
## # A tibble: 578 x 5
## weight Time Chick Diet weight2
## <dbl> <dbl> <ord> <fct> <dbl>
## 1 42 0 1 1 84
## 2 51 2 1 1 102
## 3 59 4 1 1 118
## 4 64 6 1 1 128
## 5 76 8 1 1 152
## 6 93 10 1 1 186
## 7 106 12 1 1 212
## 8 125 14 1 1 250
## 9 149 16 1 1 298
## 10 171 18 1 1 342
## # ... with 568 more rows
Fungsi select() dapat digunakan untuk memilih subset data berdasarkan peubah tertentu.
ChickWeight %>%
select(weight, Time, Chick)
## # A tibble: 578 x 3
## weight Time Chick
## <dbl> <dbl> <ord>
## 1 42 0 1
## 2 51 2 1
## 3 59 4 1
## 4 64 6 1
## 5 76 8 1
## 6 93 10 1
## 7 106 12 1
## 8 125 14 1
## 9 149 16 1
## 10 171 18 1
## # ... with 568 more rows
ChickWeight %>%
select(weight, Time, Chick) %>%
mutate(weight2=weight*2)
## # A tibble: 578 x 4
## weight Time Chick weight2
## <dbl> <dbl> <ord> <dbl>
## 1 42 0 1 84
## 2 51 2 1 102
## 3 59 4 1 118
## 4 64 6 1 128
## 5 76 8 1 152
## 6 93 10 1 186
## 7 106 12 1 212
## 8 125 14 1 250
## 9 149 16 1 298
## 10 171 18 1 342
## # ... with 568 more rows
ChickWeight_baru1 <- ChickWeight %>%
select(weight, Time, Chick) %>%
mutate(weight2=weight*2)
ChickWeight_baru1
## # A tibble: 578 x 4
## weight Time Chick weight2
## <dbl> <dbl> <ord> <dbl>
## 1 42 0 1 84
## 2 51 2 1 102
## 3 59 4 1 118
## 4 64 6 1 128
## 5 76 8 1 152
## 6 93 10 1 186
## 7 106 12 1 212
## 8 125 14 1 250
## 9 149 16 1 298
## 10 171 18 1 342
## # ... with 568 more rows
ChickWeight %>%
select(Diet, weight) %>%
mutate(weight2=weight*2) %>%
arrange(weight2) %>%
filter(weight2>100) %>%
group_by(Diet) %>% summarise(mean_weight=mean(weight2), .groups='drop')
## # A tibble: 4 x 2
## Diet mean_weight
## <fct> <dbl>
## 1 1 230.
## 2 2 271.
## 3 3 319.
## 4 4 293.
ChickWeight_baru2 <- ChickWeight %>%
select(Diet, weight) %>%
mutate(weight2=weight*2) %>%
arrange(weight2) %>%
filter(weight2>100) %>%
group_by(Diet) %>% summarise(mean_weight=mean(weight2), .groups='drop')
ChickWeight_baru2
## # A tibble: 4 x 2
## Diet mean_weight
## <fct> <dbl>
## 1 1 230.
## 2 2 271.
## 3 3 319.
## 4 4 293.