library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'ggplot2' was built under R version 4.5.3
## Warning: package 'tibble' was built under R version 4.5.3
## Warning: package 'tidyr' was built under R version 4.5.3
## Warning: package 'purrr' was built under R version 4.5.3
## Warning: package 'dplyr' was built under R version 4.5.3
## Warning: package 'stringr' was built under R version 4.5.3
## Warning: package 'forcats' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tibble)
tibble_karyawan <- tibble(
  `nama karyawan` = c("Andi", "Budi", "Cici"),
  gaji = c(5000000, 6000000, 5500000),
  tunjangan = list(c(100, 200), 150, c(150, 250, 50))
)

tibble_karyawan
## # A tibble: 3 × 3
##   `nama karyawan`    gaji tunjangan
##   <chr>             <dbl> <list>   
## 1 Andi            5000000 <dbl [2]>
## 2 Budi            6000000 <dbl [1]>
## 3 Cici            5500000 <dbl [3]>

Memanggil Isi Tibble

tibble_karyawan$tunjangan[[3]]
## [1] 150 250  50

Data Flights dengan Tibble

# install.packages("nycflights13")
library(nycflights13)

flights
## # A tibble: 336,776 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # ℹ 336,766 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>

Merubah Tibble Menjadi Data Frame

df_flights <- as.data.frame(flights)

head(df_flights)
##   year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## 1 2013     1   1      517            515         2      830            819
## 2 2013     1   1      533            529         4      850            830
## 3 2013     1   1      542            540         2      923            850
## 4 2013     1   1      544            545        -1     1004           1022
## 5 2013     1   1      554            600        -6      812            837
## 6 2013     1   1      554            558        -4      740            728
##   arr_delay carrier flight tailnum origin dest air_time distance hour minute
## 1        11      UA   1545  N14228    EWR  IAH      227     1400    5     15
## 2        20      UA   1714  N24211    LGA  IAH      227     1416    5     29
## 3        33      AA   1141  N619AA    JFK  MIA      160     1089    5     40
## 4       -18      B6    725  N804JB    JFK  BQN      183     1576    5     45
## 5       -25      DL    461  N668DN    LGA  ATL      116      762    6      0
## 6        12      UA   1696  N39463    EWR  ORD      150      719    5     58
##             time_hour
## 1 2013-01-01 05:00:00
## 2 2013-01-01 05:00:00
## 3 2013-01-01 05:00:00
## 4 2013-01-01 05:00:00
## 5 2013-01-01 06:00:00
## 6 2013-01-01 05:00:00

Memanggil Nama Kolom Secara Parsial

head(df_flights$y)
## [1] 2013 2013 2013 2013 2013 2013

Merubah Data Frame Menjadi Tibble

df_people <- data.frame(
  id = c(1, 2, 3, 4, 5),
  name = c("Adam", "Eva", "Miki", "Yola", "Jack"),
  age = c(46, 48, 21, 19, 17),
  gender = c("male", rep("female", 3), "male"),
  drives = c(TRUE, TRUE, FALSE, TRUE, FALSE)
)

tibble_people <- as_tibble(df_people)

tibble_people
## # A tibble: 5 × 5
##      id name    age gender drives
##   <dbl> <chr> <dbl> <chr>  <lgl> 
## 1     1 Adam     46 male   TRUE  
## 2     2 Eva      48 female TRUE  
## 3     3 Miki     21 female FALSE 
## 4     4 Yola     19 female TRUE  
## 5     5 Jack     17 male   FALSE

Melihat Summary

summary(tibble_people)
##        id        name                age          gender         
##  Min.   :1   Length:5           Min.   :17.0   Length:5          
##  1st Qu.:2   Class :character   1st Qu.:19.0   Class :character  
##  Median :3   Mode  :character   Median :21.0   Mode  :character  
##  Mean   :3                      Mean   :30.2                     
##  3rd Qu.:4                      3rd Qu.:46.0                     
##  Max.   :5                      Max.   :48.0                     
##    drives       
##  Mode :logical  
##  FALSE:2        
##  TRUE :3        
##                 
##                 
## 

Melihat Tipe Variabel

glimpse(tibble_people)
## Rows: 5
## Columns: 5
## $ id     <dbl> 1, 2, 3, 4, 5
## $ name   <chr> "Adam", "Eva", "Miki", "Yola", "Jack"
## $ age    <dbl> 46, 48, 21, 19, 17
## $ gender <chr> "male", "female", "female", "female", "male"
## $ drives <lgl> TRUE, TRUE, FALSE, TRUE, FALSE

Merubah Gender Menjadi Factor

tibble_people$gender <- factor(tibble_people$gender)

summary(tibble_people)
##        id        name                age          gender    drives       
##  Min.   :1   Length:5           Min.   :17.0   female:3   Mode :logical  
##  1st Qu.:2   Class :character   1st Qu.:19.0   male  :2   FALSE:2        
##  Median :3   Mode  :character   Median :21.0              TRUE :3        
##  Mean   :3                      Mean   :30.2                             
##  3rd Qu.:4                      3rd Qu.:46.0                             
##  Max.   :5                      Max.   :48.0

Memanggil Kolom pada Tibble

tibble_people[,2]
## # A tibble: 5 × 1
##   name 
##   <chr>
## 1 Adam 
## 2 Eva  
## 3 Miki 
## 4 Yola 
## 5 Jack

Menyaring Baris pada Tibble

tibble_people[tibble_people$age < 30, ]
## # A tibble: 3 × 5
##      id name    age gender drives
##   <dbl> <chr> <dbl> <fct>  <lgl> 
## 1     3 Miki     21 female FALSE 
## 2     4 Yola     19 female TRUE  
## 3     5 Jack     17 male   FALSE

Readr

Package readr digunakan untuk membaca file data.

library(readr)

Membaca File CSV

data_csv <- read_csv("D:/r/iris_csv.csv")
## Rows: 150 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Species
## dbl (4): Sepal.Length, Sepal.Width, Petal.Length, Petal.Width
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data_csv
## # A tibble: 150 × 5
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
##           <dbl>       <dbl>        <dbl>       <dbl> <chr>  
##  1          5.1         3.5          1.4         0.2 setosa 
##  2          4.9         3            1.4         0.2 setosa 
##  3          4.7         3.2          1.3         0.2 setosa 
##  4          4.6         3.1          1.5         0.2 setosa 
##  5          5           3.6          1.4         0.2 setosa 
##  6          5.4         3.9          1.7         0.4 setosa 
##  7          4.6         3.4          1.4         0.3 setosa 
##  8          5           3.4          1.5         0.2 setosa 
##  9          4.4         2.9          1.4         0.2 setosa 
## 10          4.9         3.1          1.5         0.1 setosa 
## # ℹ 140 more rows

Merubah CSV Menjadi Tibble

data_tibble <- as_tibble(data_csv)

data_tibble
## # A tibble: 150 × 5
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
##           <dbl>       <dbl>        <dbl>       <dbl> <chr>  
##  1          5.1         3.5          1.4         0.2 setosa 
##  2          4.9         3            1.4         0.2 setosa 
##  3          4.7         3.2          1.3         0.2 setosa 
##  4          4.6         3.1          1.5         0.2 setosa 
##  5          5           3.6          1.4         0.2 setosa 
##  6          5.4         3.9          1.7         0.4 setosa 
##  7          4.6         3.4          1.4         0.3 setosa 
##  8          5           3.4          1.5         0.2 setosa 
##  9          4.4         2.9          1.4         0.2 setosa 
## 10          4.9         3.1          1.5         0.1 setosa 
## # ℹ 140 more rows

Melihat Summary Data CSV

summary(data_tibble)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##    Species         
##  Length:150        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 

Melihat Struktur Data CSV

glimpse(data_tibble)
## Rows: 150
## Columns: 5
## $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.…
## $ Sepal.Width  <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.…
## $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.…
## $ Petal.Width  <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.…
## $ Species      <chr> "setosa", "setosa", "setosa", "setosa", "setosa", "setosa…

Mengubah Species Menjadi Factor

data_tibble$Species <- factor(data_tibble$Species)

summary(data_tibble)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 

Memanggil Kolom Species

data_tibble$Species
##   [1] setosa     setosa     setosa     setosa     setosa     setosa    
##   [7] setosa     setosa     setosa     setosa     setosa     setosa    
##  [13] setosa     setosa     setosa     setosa     setosa     setosa    
##  [19] setosa     setosa     setosa     setosa     setosa     setosa    
##  [25] setosa     setosa     setosa     setosa     setosa     setosa    
##  [31] setosa     setosa     setosa     setosa     setosa     setosa    
##  [37] setosa     setosa     setosa     setosa     setosa     setosa    
##  [43] setosa     setosa     setosa     setosa     setosa     setosa    
##  [49] setosa     setosa     versicolor versicolor versicolor versicolor
##  [55] versicolor versicolor versicolor versicolor versicolor versicolor
##  [61] versicolor versicolor versicolor versicolor versicolor versicolor
##  [67] versicolor versicolor versicolor versicolor versicolor versicolor
##  [73] versicolor versicolor versicolor versicolor versicolor versicolor
##  [79] versicolor versicolor versicolor versicolor versicolor versicolor
##  [85] versicolor versicolor versicolor versicolor versicolor versicolor
##  [91] versicolor versicolor versicolor versicolor versicolor versicolor
##  [97] versicolor versicolor versicolor versicolor virginica  virginica 
## [103] virginica  virginica  virginica  virginica  virginica  virginica 
## [109] virginica  virginica  virginica  virginica  virginica  virginica 
## [115] virginica  virginica  virginica  virginica  virginica  virginica 
## [121] virginica  virginica  virginica  virginica  virginica  virginica 
## [127] virginica  virginica  virginica  virginica  virginica  virginica 
## [133] virginica  virginica  virginica  virginica  virginica  virginica 
## [139] virginica  virginica  virginica  virginica  virginica  virginica 
## [145] virginica  virginica  virginica  virginica  virginica  virginica 
## Levels: setosa versicolor virginica

Filter Data

filter_data <- data_tibble %>%
  filter(Sepal.Length > 6)

filter_data
## # A tibble: 61 × 5
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species   
##           <dbl>       <dbl>        <dbl>       <dbl> <fct>     
##  1          7           3.2          4.7         1.4 versicolor
##  2          6.4         3.2          4.5         1.5 versicolor
##  3          6.9         3.1          4.9         1.5 versicolor
##  4          6.5         2.8          4.6         1.5 versicolor
##  5          6.3         3.3          4.7         1.6 versicolor
##  6          6.6         2.9          4.6         1.3 versicolor
##  7          6.1         2.9          4.7         1.4 versicolor
##  8          6.7         3.1          4.4         1.4 versicolor
##  9          6.2         2.2          4.5         1.5 versicolor
## 10          6.1         2.8          4           1.3 versicolor
## # ℹ 51 more rows

Menampilkan Nama Kolom

names(data_tibble)
## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"

Menampilkan Data di Viewer

View(data_tibble)