A.Akses Database Company

1. Engine SQL

db<-DBI::dbConnect(RSQLite::SQLite(), "D:/STATISTIKA IPB/Manajemen Data Relasional/company.db")
SELECT
  *
FROM
  employee;
8 records
Fname Minit Lname Ssn Bdate Address Sex Salary Super_ssn Dno
John B Smith 123456789 1965-01-09 731 Fondren, Houston TX M 30000 333445555 5
Franklin T Wong 333445555 1965-12-08 638 Voss, Houston TX M 40000 888665555 5
Alicia J Zelaya 999887777 1968-01-19 3321 Castle, Spring TX F 25000 987654321 4
Jennifer S Wallace 987654321 1941-06-20 291 Berry, Bellaire TX F 43000 888665555 4
Ramesh K Narayan 666884444 1962-09-15 975 Fire Oak, Humble TX M 38000 333445555 5
Joyce A English 453453453 1972-07-31 5631 Rice, Houston TX F 25000 333445555 5
Ahmad V Jabbar 987987987 1969-03-29 980 Dallas, Houston TX M 25000 987654321 4
James E Borg 888665555 1937-11-10 450 Stone, Houston TX M 55000 NA 1

2. Menggunakan DPLYR

company<-DBI::dbConnect(RSQLite::SQLite(), "D:/STATISTIKA IPB/Manajemen Data Relasional/company.db")
company<-DBI::dbConnect(RSQLite::SQLite(), "D:/STATISTIKA IPB/Manajemen Data Relasional/company.db")
class(company)
## [1] "SQLiteConnection"
## attr(,"package")
## [1] "RSQLite"
RSQLite::dbListTables(company)
## [1] "DEPARTMENT"     "DEPENDENT"      "DEPT_LOCATIONS" "EMPLOYEE"      
## [5] "PROJECT"        "WORKS_ON"
dplyr::tbl(company, "DEPT_LOCATIONS")
## # Source:   table<DEPT_LOCATIONS> [?? x 2]
## # Database: sqlite 3.36.0 [D:\STATISTIKA IPB\Manajemen Data
## #   Relasional\company.db]
##   Dnumber Dlocation
##     <int> <chr>    
## 1       1 Houston  
## 2       4 Stafford 
## 3       5 Bellaire 
## 4       5 Sugarland
## 5       5 Houston
DEPT_LOCATIONS <- dplyr::tbl(company, "DEPT_LOCATIONS")
class(DEPT_LOCATIONS)
## [1] "tbl_SQLiteConnection" "tbl_dbi"              "tbl_sql"             
## [4] "tbl_lazy"             "tbl"
DEPT_LOCATIONS
## # Source:   table<DEPT_LOCATIONS> [?? x 2]
## # Database: sqlite 3.36.0 [D:\STATISTIKA IPB\Manajemen Data
## #   Relasional\company.db]
##   Dnumber Dlocation
##     <int> <chr>    
## 1       1 Houston  
## 2       4 Stafford 
## 3       5 Bellaire 
## 4       5 Sugarland
## 5       5 Houston
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
q<-DEPT_LOCATIONS %>%
  select(Dnumber, Dlocation) %>%
  filter(DNumber>4)
q
## # Source:   lazy query [?? x 2]
## # Database: sqlite 3.36.0 [D:\STATISTIKA IPB\Manajemen Data
## #   Relasional\company.db]
##   Dnumber Dlocation
##     <int> <chr>    
## 1       5 Bellaire 
## 2       5 Houston  
## 3       5 Sugarland
dplyr::show_query(q)
## <SQL>
## SELECT *
## FROM (SELECT `Dnumber`, `Dlocation`
## FROM `DEPT_LOCATIONS`)
## WHERE (`DNumber` > 4.0)

B.Data Wrangling Datasets Quakes

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v stringr 1.4.0
## v tidyr   1.1.4     v forcats 0.5.1
## v readr   2.1.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(datasets)
data(quakes)
quakes<-tibble::as_tibble(quakes)
class(quakes)
## [1] "tbl_df"     "tbl"        "data.frame"
mean(quakes$long) 
## [1] 179.462
mean(quakes$long) == quakes$long %>% mean()
## [1] TRUE
quakes %>% group_by(stations) %>% summarise(mean=mean(mag), .groups='drop')
## # A tibble: 102 x 2
##    stations  mean
##       <int> <dbl>
##  1       10  4.23
##  2       11  4.23
##  3       12  4.20
##  4       13  4.33
##  5       14  4.28
##  6       15  4.28
##  7       16  4.27
##  8       17  4.35
##  9       18  4.44
## 10       19  4.38
## # ... with 92 more rows
quakes %>% arrange(mag)
## # A tibble: 1,000 x 5
##      lat  long depth   mag stations
##    <dbl> <dbl> <int> <dbl>    <int>
##  1 -20.4  182.   649     4       11
##  2 -19.7  184.   195     4       12
##  3 -17.9  181.   537     4       15
##  4 -23.6  181.   349     4       10
##  5 -19.3  184.   223     4       15
##  6 -22.1  181.   584     4       11
##  7 -15.3  186.   152     4       11
##  8 -17.7  182.   450     4       11
##  9 -19.7  182.   375     4       18
## 10 -19.1  182.   477     4       16
## # ... with 990 more rows
quakes %>% arrange(desc(mag))
## # A tibble: 1,000 x 5
##      lat  long depth   mag stations
##    <dbl> <dbl> <int> <dbl>    <int>
##  1 -15.6  168.   127   6.4      122
##  2 -20.7  170.   139   6.1       94
##  3 -13.6  166.    50   6         83
##  4 -12.2  167.   242   6        132
##  5 -21.6  171.   165   6        119
##  6 -22.9  184.    64   5.9      118
##  7 -21.1  181.   627   5.9      119
##  8 -22.6  186.    42   5.7       76
##  9 -23.3  184.    56   5.7      106
## 10 -32.2  180.   216   5.7       90
## # ... with 990 more rows
quakes %>% filter(mag >= "5.0")
## # A tibble: 151 x 5
##      lat  long depth   mag stations
##    <dbl> <dbl> <int> <dbl>    <int>
##  1 -26    184.    42   5.4       43
##  2 -20.7  170.   139   6.1       94
##  3 -13.6  166.    50   6         83
##  4 -19.7  180.   431   5.4       57
##  5 -16.5  181.   498   5.2       79
##  6 -19.0  185.   129   5.1       73
##  7 -15.5  188.    40   5.5       91
##  8 -23.7  180.   506   5.2       75
##  9 -29.0  181.   304   5.3       60
## 10 -34.0  180.    75   5.2       65
## # ... with 141 more rows
quakes %>% select(lat,long,mag,stations)
## # A tibble: 1,000 x 4
##      lat  long   mag stations
##    <dbl> <dbl> <dbl>    <int>
##  1 -20.4  182.   4.8       41
##  2 -20.6  181.   4.2       15
##  3 -26    184.   5.4       43
##  4 -18.0  182.   4.1       19
##  5 -20.4  182.   4         11
##  6 -19.7  184.   4         12
##  7 -11.7  166.   4.8       43
##  8 -28.1  182.   4.4       15
##  9 -28.7  182.   4.7       35
## 10 -17.5  180.   4.3       19
## # ... with 990 more rows
quakes %>% select(-depth)
## # A tibble: 1,000 x 4
##      lat  long   mag stations
##    <dbl> <dbl> <dbl>    <int>
##  1 -20.4  182.   4.8       41
##  2 -20.6  181.   4.2       15
##  3 -26    184.   5.4       43
##  4 -18.0  182.   4.1       19
##  5 -20.4  182.   4         11
##  6 -19.7  184.   4         12
##  7 -11.7  166.   4.8       43
##  8 -28.1  182.   4.4       15
##  9 -28.7  182.   4.7       35
## 10 -17.5  180.   4.3       19
## # ... with 990 more rows
quakes %>% mutate(stations=lat+long)
## # A tibble: 1,000 x 5
##      lat  long depth   mag stations
##    <dbl> <dbl> <int> <dbl>    <dbl>
##  1 -20.4  182.   562   4.8     161.
##  2 -20.6  181.   650   4.2     160.
##  3 -26    184.    42   5.4     158.
##  4 -18.0  182.   626   4.1     164.
##  5 -20.4  182.   649   4       162.
##  6 -19.7  184.   195   4       165.
##  7 -11.7  166.    82   4.8     154.
##  8 -28.1  182.   194   4.4     154.
##  9 -28.7  182.   211   4.7     153 
## 10 -17.5  180.   622   4.3     162.
## # ... with 990 more rows
quakes %>% select(-depth) %>% mutate(stations=lat+long)
## # A tibble: 1,000 x 4
##      lat  long   mag stations
##    <dbl> <dbl> <dbl>    <dbl>
##  1 -20.4  182.   4.8     161.
##  2 -20.6  181.   4.2     160.
##  3 -26    184.   5.4     158.
##  4 -18.0  182.   4.1     164.
##  5 -20.4  182.   4       162.
##  6 -19.7  184.   4       165.
##  7 -11.7  166.   4.8     154.
##  8 -28.1  182.   4.4     154.
##  9 -28.7  182.   4.7     153 
## 10 -17.5  180.   4.3     162.
## # ... with 990 more rows
quakesbaru <- quakes %>% select(-depth) %>% mutate(stations=lat+long)
quakesbaru
## # A tibble: 1,000 x 4
##      lat  long   mag stations
##    <dbl> <dbl> <dbl>    <dbl>
##  1 -20.4  182.   4.8     161.
##  2 -20.6  181.   4.2     160.
##  3 -26    184.   5.4     158.
##  4 -18.0  182.   4.1     164.
##  5 -20.4  182.   4       162.
##  6 -19.7  184.   4       165.
##  7 -11.7  166.   4.8     154.
##  8 -28.1  182.   4.4     154.
##  9 -28.7  182.   4.7     153 
## 10 -17.5  180.   4.3     162.
## # ... with 990 more rows