#BAB Package Dplyr –> manipulasi data frame
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.0 v stringr 1.4.0
## v tidyr 1.1.3 v forcats 0.5.1
## v readr 1.4.0
## Warning: package 'ggplot2' was built under R version 3.6.3
## Warning: package 'tibble' was built under R version 3.6.3
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'readr' was built under R version 3.6.3
## Warning: package 'purrr' was built under R version 3.6.3
## Warning: package 'stringr' was built under R version 3.6.3
## Warning: package 'forcats' was built under R version 3.6.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
#5 Fungsi dasar #1. filter(): untuk memfilter baris (row) dari data frame berdasarkan beberapa kriteria yang anda miliki. #2. select(): untuk memilih satu atau dua kolom (column) yang anda perlukan. #3. arrange(): untuk menyortir (sorting), yaitu untuk mengurutkan baris berdasarkan nilai dari satu atau lebih kolom secara ascending atau descending. #4. mutate(): untuk menambah kolom baru yang merupakan hasil transformasi dari kolom yang lain. #5. summarise(): untuk memperlihatkan informasi dasar dari suatu data frame, misal: rata-rata (mean), standar deviasi (SD), dll. Fungsi ini sering digabung dengan fungsi group()
#filter
#filter(.data, ..., .preserve = FALSE)
#Data masking
#Misal dataset Diamonds
head(diamonds,10)
## # A tibble: 10 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
## 7 0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47
## 8 0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53
## 9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49
## 10 0.23 Very Good H VS1 59.4 61 338 4 4.05 2.39
#filter : cari yang variabel x,y,z nya bernilai 0
filter(diamonds, x == 0 & y == 0 & cut=="Very Good")
## # A tibble: 1 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 1 Very Good H VS2 63.3 53 5139 0 0 0
#Syntax lain dari: diamonds[diamonds$x == 0 & diamonds$y == 0 & diamonds$cut == "Very Good", ]
#dist_summary <- function(df, var) {
#df %>%
#summarise(n = n(), min = min({{ var }}), max = max({{ var }}))
#}
#Dataset
head(mtcars,10)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
#Ingin dicari n, max, dan min dari variabel mpg
#mtcars %>% dist_summary(mpg)
#Ingin dicari n, min, max dari variabel mpg yang dikelompokkan m enurut variabel cyl
#mtcars %>% group_by(cyl) %>% dist_summary(mpg)
#Ingin dicari n,rata-rata disp,sd drat yang dikelompokkan berdasarkan cyl
mtcars%>%
group_by(cyl)%>%
summarise(n=n(),average_disp=mean(disp),SD_drat=sd(drat))
## # A tibble: 3 x 4
## cyl n average_disp SD_drat
## <dbl> <int> <dbl> <dbl>
## 1 4 11 105. 0.365
## 2 6 7 183. 0.476
## 3 8 14 353. 0.372
#Dataset
head(starwars,10)
## # A tibble: 10 x 14
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Luke S~ 172 77 blond fair blue 19 male mascu~
## 2 C-3PO 167 75 <NA> gold yellow 112 none mascu~
## 3 R2-D2 96 32 <NA> white, bl~ red 33 none mascu~
## 4 Darth ~ 202 136 none white yellow 41.9 male mascu~
## 5 Leia O~ 150 49 brown light brown 19 fema~ femin~
## 6 Owen L~ 178 120 brown, grey light blue 52 male mascu~
## 7 Beru W~ 165 75 brown light blue 47 fema~ femin~
## 8 R5-D4 97 32 <NA> white, red red NA none mascu~
## 9 Biggs ~ 183 84 black light brown 24 male mascu~
## 10 Obi-Wa~ 182 77 auburn, wh~ fair blue-gray 57 male mascu~
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
#Filter variabel mass yang memiliki nilai diatas rata2
starwars %>% filter(mass > mean(mass,na.rm = TRUE)) #na.rm : abaikan NA
## # A tibble: 10 x 14
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Darth ~ 202 136 none white yellow 41.9 male mascu~
## 2 Owen L~ 178 120 brown, grey light blue 52 male mascu~
## 3 Chewba~ 228 112 brown unknown blue 200 male mascu~
## 4 Jabba ~ 175 1358 <NA> green-tan~ orange 600 herm~ mascu~
## 5 Jek To~ 180 110 brown fair blue NA male mascu~
## 6 IG-88 200 140 none metal red 15 none mascu~
## 7 Bossk 190 113 none green red 53 male mascu~
## 8 Dexter~ 198 102 none brown yellow NA male mascu~
## 9 Grievo~ 216 159 none brown, wh~ green, y~ NA male mascu~
## 10 Tarfful 234 136 brown brown blue NA male mascu~
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
#Tampilkan variabel mass>80 dan height > 150, bisa pake summarise
vars <- c("mass", "height")
cond <- c(80, 150)
starwars %>%
filter(
.data[[vars[[1]]]] > cond[[1]],
.data[[vars[[2]]]] > cond[[2]]
)
## # A tibble: 21 x 14
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Darth ~ 202 136 none white yellow 41.9 male mascu~
## 2 Owen L~ 178 120 brown, grey light blue 52 male mascu~
## 3 Biggs ~ 183 84 black light brown 24 male mascu~
## 4 Anakin~ 188 84 blond fair blue 41.9 male mascu~
## 5 Chewba~ 228 112 brown unknown blue 200 male mascu~
## 6 Jabba ~ 175 1358 <NA> green-tan~ orange 600 herm~ mascu~
## 7 Jek To~ 180 110 brown fair blue NA male mascu~
## 8 IG-88 200 140 none metal red 15 none mascu~
## 9 Bossk 190 113 none green red 53 male mascu~
## 10 Ackbar 180 83 none brown mot~ orange 41 male mascu~
## # ... with 11 more rows, and 5 more variables: homeworld <chr>, species <chr>,
## # films <list>, vehicles <list>, starships <list>
#Select
#Fitur-fitur Seleksi
#a. : --> untuk memilih berbagai variabel yang berurutan.
#b. ! --> untuk mengambil komplemen dari satu set variabel.
#c. & dan | untuk memilih perpotongan atau gabungan dua set variabel.
#d. c() --> untuk menggabungkan pilihan.
#e. everything(): memilih semua variabel. Ini juga berguna dalam kombinasi dengan operator tidyselect lainnya.
#f. last_col () memilih variabel terakhir.
#g. starts_with (): Dimulai dengan prefiks/kata dasar.
head(iris,10)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
#pilih variabel yang kata pertamanya Sepal
iris %>% select(starts_with("Sepal"))
## Sepal.Length Sepal.Width
## 1 5.1 3.5
## 2 4.9 3.0
## 3 4.7 3.2
## 4 4.6 3.1
## 5 5.0 3.6
## 6 5.4 3.9
## 7 4.6 3.4
## 8 5.0 3.4
## 9 4.4 2.9
## 10 4.9 3.1
## 11 5.4 3.7
## 12 4.8 3.4
## 13 4.8 3.0
## 14 4.3 3.0
## 15 5.8 4.0
## 16 5.7 4.4
## 17 5.4 3.9
## 18 5.1 3.5
## 19 5.7 3.8
## 20 5.1 3.8
## 21 5.4 3.4
## 22 5.1 3.7
## 23 4.6 3.6
## 24 5.1 3.3
## 25 4.8 3.4
## 26 5.0 3.0
## 27 5.0 3.4
## 28 5.2 3.5
## 29 5.2 3.4
## 30 4.7 3.2
## 31 4.8 3.1
## 32 5.4 3.4
## 33 5.2 4.1
## 34 5.5 4.2
## 35 4.9 3.1
## 36 5.0 3.2
## 37 5.5 3.5
## 38 4.9 3.6
## 39 4.4 3.0
## 40 5.1 3.4
## 41 5.0 3.5
## 42 4.5 2.3
## 43 4.4 3.2
## 44 5.0 3.5
## 45 5.1 3.8
## 46 4.8 3.0
## 47 5.1 3.8
## 48 4.6 3.2
## 49 5.3 3.7
## 50 5.0 3.3
## 51 7.0 3.2
## 52 6.4 3.2
## 53 6.9 3.1
## 54 5.5 2.3
## 55 6.5 2.8
## 56 5.7 2.8
## 57 6.3 3.3
## 58 4.9 2.4
## 59 6.6 2.9
## 60 5.2 2.7
## 61 5.0 2.0
## 62 5.9 3.0
## 63 6.0 2.2
## 64 6.1 2.9
## 65 5.6 2.9
## 66 6.7 3.1
## 67 5.6 3.0
## 68 5.8 2.7
## 69 6.2 2.2
## 70 5.6 2.5
## 71 5.9 3.2
## 72 6.1 2.8
## 73 6.3 2.5
## 74 6.1 2.8
## 75 6.4 2.9
## 76 6.6 3.0
## 77 6.8 2.8
## 78 6.7 3.0
## 79 6.0 2.9
## 80 5.7 2.6
## 81 5.5 2.4
## 82 5.5 2.4
## 83 5.8 2.7
## 84 6.0 2.7
## 85 5.4 3.0
## 86 6.0 3.4
## 87 6.7 3.1
## 88 6.3 2.3
## 89 5.6 3.0
## 90 5.5 2.5
## 91 5.5 2.6
## 92 6.1 3.0
## 93 5.8 2.6
## 94 5.0 2.3
## 95 5.6 2.7
## 96 5.7 3.0
## 97 5.7 2.9
## 98 6.2 2.9
## 99 5.1 2.5
## 100 5.7 2.8
## 101 6.3 3.3
## 102 5.8 2.7
## 103 7.1 3.0
## 104 6.3 2.9
## 105 6.5 3.0
## 106 7.6 3.0
## 107 4.9 2.5
## 108 7.3 2.9
## 109 6.7 2.5
## 110 7.2 3.6
## 111 6.5 3.2
## 112 6.4 2.7
## 113 6.8 3.0
## 114 5.7 2.5
## 115 5.8 2.8
## 116 6.4 3.2
## 117 6.5 3.0
## 118 7.7 3.8
## 119 7.7 2.6
## 120 6.0 2.2
## 121 6.9 3.2
## 122 5.6 2.8
## 123 7.7 2.8
## 124 6.3 2.7
## 125 6.7 3.3
## 126 7.2 3.2
## 127 6.2 2.8
## 128 6.1 3.0
## 129 6.4 2.8
## 130 7.2 3.0
## 131 7.4 2.8
## 132 7.9 3.8
## 133 6.4 2.8
## 134 6.3 2.8
## 135 6.1 2.6
## 136 7.7 3.0
## 137 6.3 3.4
## 138 6.4 3.1
## 139 6.0 3.0
## 140 6.9 3.1
## 141 6.7 3.1
## 142 6.9 3.1
## 143 5.8 2.7
## 144 6.8 3.2
## 145 6.7 3.3
## 146 6.7 3.0
## 147 6.3 2.5
## 148 6.5 3.0
## 149 6.2 3.4
## 150 5.9 3.0
#pilih variabel yang kata terakhirnya width
iris %>% select(ends_with("Width"))
## Sepal.Width Petal.Width
## 1 3.5 0.2
## 2 3.0 0.2
## 3 3.2 0.2
## 4 3.1 0.2
## 5 3.6 0.2
## 6 3.9 0.4
## 7 3.4 0.3
## 8 3.4 0.2
## 9 2.9 0.2
## 10 3.1 0.1
## 11 3.7 0.2
## 12 3.4 0.2
## 13 3.0 0.1
## 14 3.0 0.1
## 15 4.0 0.2
## 16 4.4 0.4
## 17 3.9 0.4
## 18 3.5 0.3
## 19 3.8 0.3
## 20 3.8 0.3
## 21 3.4 0.2
## 22 3.7 0.4
## 23 3.6 0.2
## 24 3.3 0.5
## 25 3.4 0.2
## 26 3.0 0.2
## 27 3.4 0.4
## 28 3.5 0.2
## 29 3.4 0.2
## 30 3.2 0.2
## 31 3.1 0.2
## 32 3.4 0.4
## 33 4.1 0.1
## 34 4.2 0.2
## 35 3.1 0.2
## 36 3.2 0.2
## 37 3.5 0.2
## 38 3.6 0.1
## 39 3.0 0.2
## 40 3.4 0.2
## 41 3.5 0.3
## 42 2.3 0.3
## 43 3.2 0.2
## 44 3.5 0.6
## 45 3.8 0.4
## 46 3.0 0.3
## 47 3.8 0.2
## 48 3.2 0.2
## 49 3.7 0.2
## 50 3.3 0.2
## 51 3.2 1.4
## 52 3.2 1.5
## 53 3.1 1.5
## 54 2.3 1.3
## 55 2.8 1.5
## 56 2.8 1.3
## 57 3.3 1.6
## 58 2.4 1.0
## 59 2.9 1.3
## 60 2.7 1.4
## 61 2.0 1.0
## 62 3.0 1.5
## 63 2.2 1.0
## 64 2.9 1.4
## 65 2.9 1.3
## 66 3.1 1.4
## 67 3.0 1.5
## 68 2.7 1.0
## 69 2.2 1.5
## 70 2.5 1.1
## 71 3.2 1.8
## 72 2.8 1.3
## 73 2.5 1.5
## 74 2.8 1.2
## 75 2.9 1.3
## 76 3.0 1.4
## 77 2.8 1.4
## 78 3.0 1.7
## 79 2.9 1.5
## 80 2.6 1.0
## 81 2.4 1.1
## 82 2.4 1.0
## 83 2.7 1.2
## 84 2.7 1.6
## 85 3.0 1.5
## 86 3.4 1.6
## 87 3.1 1.5
## 88 2.3 1.3
## 89 3.0 1.3
## 90 2.5 1.3
## 91 2.6 1.2
## 92 3.0 1.4
## 93 2.6 1.2
## 94 2.3 1.0
## 95 2.7 1.3
## 96 3.0 1.2
## 97 2.9 1.3
## 98 2.9 1.3
## 99 2.5 1.1
## 100 2.8 1.3
## 101 3.3 2.5
## 102 2.7 1.9
## 103 3.0 2.1
## 104 2.9 1.8
## 105 3.0 2.2
## 106 3.0 2.1
## 107 2.5 1.7
## 108 2.9 1.8
## 109 2.5 1.8
## 110 3.6 2.5
## 111 3.2 2.0
## 112 2.7 1.9
## 113 3.0 2.1
## 114 2.5 2.0
## 115 2.8 2.4
## 116 3.2 2.3
## 117 3.0 1.8
## 118 3.8 2.2
## 119 2.6 2.3
## 120 2.2 1.5
## 121 3.2 2.3
## 122 2.8 2.0
## 123 2.8 2.0
## 124 2.7 1.8
## 125 3.3 2.1
## 126 3.2 1.8
## 127 2.8 1.8
## 128 3.0 1.8
## 129 2.8 2.1
## 130 3.0 1.6
## 131 2.8 1.9
## 132 3.8 2.0
## 133 2.8 2.2
## 134 2.8 1.5
## 135 2.6 1.4
## 136 3.0 2.3
## 137 3.4 2.4
## 138 3.1 1.8
## 139 3.0 1.8
## 140 3.1 2.1
## 141 3.1 2.4
## 142 3.1 2.3
## 143 2.7 1.9
## 144 3.2 2.3
## 145 3.3 2.5
## 146 3.0 2.3
## 147 2.5 1.9
## 148 3.0 2.0
## 149 3.4 2.3
## 150 3.0 1.8
#pilih yang kata pertamanya Petal dan Sepal
iris %>% select(starts_with(c("Petal", "Sepal")))
## Petal.Length Petal.Width Sepal.Length Sepal.Width
## 1 1.4 0.2 5.1 3.5
## 2 1.4 0.2 4.9 3.0
## 3 1.3 0.2 4.7 3.2
## 4 1.5 0.2 4.6 3.1
## 5 1.4 0.2 5.0 3.6
## 6 1.7 0.4 5.4 3.9
## 7 1.4 0.3 4.6 3.4
## 8 1.5 0.2 5.0 3.4
## 9 1.4 0.2 4.4 2.9
## 10 1.5 0.1 4.9 3.1
## 11 1.5 0.2 5.4 3.7
## 12 1.6 0.2 4.8 3.4
## 13 1.4 0.1 4.8 3.0
## 14 1.1 0.1 4.3 3.0
## 15 1.2 0.2 5.8 4.0
## 16 1.5 0.4 5.7 4.4
## 17 1.3 0.4 5.4 3.9
## 18 1.4 0.3 5.1 3.5
## 19 1.7 0.3 5.7 3.8
## 20 1.5 0.3 5.1 3.8
## 21 1.7 0.2 5.4 3.4
## 22 1.5 0.4 5.1 3.7
## 23 1.0 0.2 4.6 3.6
## 24 1.7 0.5 5.1 3.3
## 25 1.9 0.2 4.8 3.4
## 26 1.6 0.2 5.0 3.0
## 27 1.6 0.4 5.0 3.4
## 28 1.5 0.2 5.2 3.5
## 29 1.4 0.2 5.2 3.4
## 30 1.6 0.2 4.7 3.2
## 31 1.6 0.2 4.8 3.1
## 32 1.5 0.4 5.4 3.4
## 33 1.5 0.1 5.2 4.1
## 34 1.4 0.2 5.5 4.2
## 35 1.5 0.2 4.9 3.1
## 36 1.2 0.2 5.0 3.2
## 37 1.3 0.2 5.5 3.5
## 38 1.4 0.1 4.9 3.6
## 39 1.3 0.2 4.4 3.0
## 40 1.5 0.2 5.1 3.4
## 41 1.3 0.3 5.0 3.5
## 42 1.3 0.3 4.5 2.3
## 43 1.3 0.2 4.4 3.2
## 44 1.6 0.6 5.0 3.5
## 45 1.9 0.4 5.1 3.8
## 46 1.4 0.3 4.8 3.0
## 47 1.6 0.2 5.1 3.8
## 48 1.4 0.2 4.6 3.2
## 49 1.5 0.2 5.3 3.7
## 50 1.4 0.2 5.0 3.3
## 51 4.7 1.4 7.0 3.2
## 52 4.5 1.5 6.4 3.2
## 53 4.9 1.5 6.9 3.1
## 54 4.0 1.3 5.5 2.3
## 55 4.6 1.5 6.5 2.8
## 56 4.5 1.3 5.7 2.8
## 57 4.7 1.6 6.3 3.3
## 58 3.3 1.0 4.9 2.4
## 59 4.6 1.3 6.6 2.9
## 60 3.9 1.4 5.2 2.7
## 61 3.5 1.0 5.0 2.0
## 62 4.2 1.5 5.9 3.0
## 63 4.0 1.0 6.0 2.2
## 64 4.7 1.4 6.1 2.9
## 65 3.6 1.3 5.6 2.9
## 66 4.4 1.4 6.7 3.1
## 67 4.5 1.5 5.6 3.0
## 68 4.1 1.0 5.8 2.7
## 69 4.5 1.5 6.2 2.2
## 70 3.9 1.1 5.6 2.5
## 71 4.8 1.8 5.9 3.2
## 72 4.0 1.3 6.1 2.8
## 73 4.9 1.5 6.3 2.5
## 74 4.7 1.2 6.1 2.8
## 75 4.3 1.3 6.4 2.9
## 76 4.4 1.4 6.6 3.0
## 77 4.8 1.4 6.8 2.8
## 78 5.0 1.7 6.7 3.0
## 79 4.5 1.5 6.0 2.9
## 80 3.5 1.0 5.7 2.6
## 81 3.8 1.1 5.5 2.4
## 82 3.7 1.0 5.5 2.4
## 83 3.9 1.2 5.8 2.7
## 84 5.1 1.6 6.0 2.7
## 85 4.5 1.5 5.4 3.0
## 86 4.5 1.6 6.0 3.4
## 87 4.7 1.5 6.7 3.1
## 88 4.4 1.3 6.3 2.3
## 89 4.1 1.3 5.6 3.0
## 90 4.0 1.3 5.5 2.5
## 91 4.4 1.2 5.5 2.6
## 92 4.6 1.4 6.1 3.0
## 93 4.0 1.2 5.8 2.6
## 94 3.3 1.0 5.0 2.3
## 95 4.2 1.3 5.6 2.7
## 96 4.2 1.2 5.7 3.0
## 97 4.2 1.3 5.7 2.9
## 98 4.3 1.3 6.2 2.9
## 99 3.0 1.1 5.1 2.5
## 100 4.1 1.3 5.7 2.8
## 101 6.0 2.5 6.3 3.3
## 102 5.1 1.9 5.8 2.7
## 103 5.9 2.1 7.1 3.0
## 104 5.6 1.8 6.3 2.9
## 105 5.8 2.2 6.5 3.0
## 106 6.6 2.1 7.6 3.0
## 107 4.5 1.7 4.9 2.5
## 108 6.3 1.8 7.3 2.9
## 109 5.8 1.8 6.7 2.5
## 110 6.1 2.5 7.2 3.6
## 111 5.1 2.0 6.5 3.2
## 112 5.3 1.9 6.4 2.7
## 113 5.5 2.1 6.8 3.0
## 114 5.0 2.0 5.7 2.5
## 115 5.1 2.4 5.8 2.8
## 116 5.3 2.3 6.4 3.2
## 117 5.5 1.8 6.5 3.0
## 118 6.7 2.2 7.7 3.8
## 119 6.9 2.3 7.7 2.6
## 120 5.0 1.5 6.0 2.2
## 121 5.7 2.3 6.9 3.2
## 122 4.9 2.0 5.6 2.8
## 123 6.7 2.0 7.7 2.8
## 124 4.9 1.8 6.3 2.7
## 125 5.7 2.1 6.7 3.3
## 126 6.0 1.8 7.2 3.2
## 127 4.8 1.8 6.2 2.8
## 128 4.9 1.8 6.1 3.0
## 129 5.6 2.1 6.4 2.8
## 130 5.8 1.6 7.2 3.0
## 131 6.1 1.9 7.4 2.8
## 132 6.4 2.0 7.9 3.8
## 133 5.6 2.2 6.4 2.8
## 134 5.1 1.5 6.3 2.8
## 135 5.6 1.4 6.1 2.6
## 136 6.1 2.3 7.7 3.0
## 137 5.6 2.4 6.3 3.4
## 138 5.5 1.8 6.4 3.1
## 139 4.8 1.8 6.0 3.0
## 140 5.4 2.1 6.9 3.1
## 141 5.6 2.4 6.7 3.1
## 142 5.1 2.3 6.9 3.1
## 143 5.1 1.9 5.8 2.7
## 144 5.9 2.3 6.8 3.2
## 145 5.7 2.5 6.7 3.3
## 146 5.2 2.3 6.7 3.0
## 147 5.0 1.9 6.3 2.5
## 148 5.2 2.0 6.5 3.0
## 149 5.4 2.3 6.2 3.4
## 150 5.1 1.8 5.9 3.0
#h. contains() selects columns whose names contain a word:
#cari kata yang mengandung kata leng
iris %>% select(contains("leng"))
## Sepal.Length Petal.Length
## 1 5.1 1.4
## 2 4.9 1.4
## 3 4.7 1.3
## 4 4.6 1.5
## 5 5.0 1.4
## 6 5.4 1.7
## 7 4.6 1.4
## 8 5.0 1.5
## 9 4.4 1.4
## 10 4.9 1.5
## 11 5.4 1.5
## 12 4.8 1.6
## 13 4.8 1.4
## 14 4.3 1.1
## 15 5.8 1.2
## 16 5.7 1.5
## 17 5.4 1.3
## 18 5.1 1.4
## 19 5.7 1.7
## 20 5.1 1.5
## 21 5.4 1.7
## 22 5.1 1.5
## 23 4.6 1.0
## 24 5.1 1.7
## 25 4.8 1.9
## 26 5.0 1.6
## 27 5.0 1.6
## 28 5.2 1.5
## 29 5.2 1.4
## 30 4.7 1.6
## 31 4.8 1.6
## 32 5.4 1.5
## 33 5.2 1.5
## 34 5.5 1.4
## 35 4.9 1.5
## 36 5.0 1.2
## 37 5.5 1.3
## 38 4.9 1.4
## 39 4.4 1.3
## 40 5.1 1.5
## 41 5.0 1.3
## 42 4.5 1.3
## 43 4.4 1.3
## 44 5.0 1.6
## 45 5.1 1.9
## 46 4.8 1.4
## 47 5.1 1.6
## 48 4.6 1.4
## 49 5.3 1.5
## 50 5.0 1.4
## 51 7.0 4.7
## 52 6.4 4.5
## 53 6.9 4.9
## 54 5.5 4.0
## 55 6.5 4.6
## 56 5.7 4.5
## 57 6.3 4.7
## 58 4.9 3.3
## 59 6.6 4.6
## 60 5.2 3.9
## 61 5.0 3.5
## 62 5.9 4.2
## 63 6.0 4.0
## 64 6.1 4.7
## 65 5.6 3.6
## 66 6.7 4.4
## 67 5.6 4.5
## 68 5.8 4.1
## 69 6.2 4.5
## 70 5.6 3.9
## 71 5.9 4.8
## 72 6.1 4.0
## 73 6.3 4.9
## 74 6.1 4.7
## 75 6.4 4.3
## 76 6.6 4.4
## 77 6.8 4.8
## 78 6.7 5.0
## 79 6.0 4.5
## 80 5.7 3.5
## 81 5.5 3.8
## 82 5.5 3.7
## 83 5.8 3.9
## 84 6.0 5.1
## 85 5.4 4.5
## 86 6.0 4.5
## 87 6.7 4.7
## 88 6.3 4.4
## 89 5.6 4.1
## 90 5.5 4.0
## 91 5.5 4.4
## 92 6.1 4.6
## 93 5.8 4.0
## 94 5.0 3.3
## 95 5.6 4.2
## 96 5.7 4.2
## 97 5.7 4.2
## 98 6.2 4.3
## 99 5.1 3.0
## 100 5.7 4.1
## 101 6.3 6.0
## 102 5.8 5.1
## 103 7.1 5.9
## 104 6.3 5.6
## 105 6.5 5.8
## 106 7.6 6.6
## 107 4.9 4.5
## 108 7.3 6.3
## 109 6.7 5.8
## 110 7.2 6.1
## 111 6.5 5.1
## 112 6.4 5.3
## 113 6.8 5.5
## 114 5.7 5.0
## 115 5.8 5.1
## 116 6.4 5.3
## 117 6.5 5.5
## 118 7.7 6.7
## 119 7.7 6.9
## 120 6.0 5.0
## 121 6.9 5.7
## 122 5.6 4.9
## 123 7.7 6.7
## 124 6.3 4.9
## 125 6.7 5.7
## 126 7.2 6.0
## 127 6.2 4.8
## 128 6.1 4.9
## 129 6.4 5.6
## 130 7.2 5.8
## 131 7.4 6.1
## 132 7.9 6.4
## 133 6.4 5.6
## 134 6.3 5.1
## 135 6.1 5.6
## 136 7.7 6.1
## 137 6.3 5.6
## 138 6.4 5.5
## 139 6.0 4.8
## 140 6.9 5.4
## 141 6.7 5.6
## 142 6.9 5.1
## 143 5.8 5.1
## 144 6.8 5.9
## 145 6.7 5.7
## 146 6.7 5.2
## 147 6.3 5.0
## 148 6.5 5.2
## 149 6.2 5.4
## 150 5.9 5.1
#mencari ekspresi regular, misal petal cari dengan pt, gunakan matches
#Cari kata yg regular ekspresinya mengandung pt dan al
iris %>% select(matches("[pt]al"))
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 5.1 3.5 1.4 0.2
## 2 4.9 3.0 1.4 0.2
## 3 4.7 3.2 1.3 0.2
## 4 4.6 3.1 1.5 0.2
## 5 5.0 3.6 1.4 0.2
## 6 5.4 3.9 1.7 0.4
## 7 4.6 3.4 1.4 0.3
## 8 5.0 3.4 1.5 0.2
## 9 4.4 2.9 1.4 0.2
## 10 4.9 3.1 1.5 0.1
## 11 5.4 3.7 1.5 0.2
## 12 4.8 3.4 1.6 0.2
## 13 4.8 3.0 1.4 0.1
## 14 4.3 3.0 1.1 0.1
## 15 5.8 4.0 1.2 0.2
## 16 5.7 4.4 1.5 0.4
## 17 5.4 3.9 1.3 0.4
## 18 5.1 3.5 1.4 0.3
## 19 5.7 3.8 1.7 0.3
## 20 5.1 3.8 1.5 0.3
## 21 5.4 3.4 1.7 0.2
## 22 5.1 3.7 1.5 0.4
## 23 4.6 3.6 1.0 0.2
## 24 5.1 3.3 1.7 0.5
## 25 4.8 3.4 1.9 0.2
## 26 5.0 3.0 1.6 0.2
## 27 5.0 3.4 1.6 0.4
## 28 5.2 3.5 1.5 0.2
## 29 5.2 3.4 1.4 0.2
## 30 4.7 3.2 1.6 0.2
## 31 4.8 3.1 1.6 0.2
## 32 5.4 3.4 1.5 0.4
## 33 5.2 4.1 1.5 0.1
## 34 5.5 4.2 1.4 0.2
## 35 4.9 3.1 1.5 0.2
## 36 5.0 3.2 1.2 0.2
## 37 5.5 3.5 1.3 0.2
## 38 4.9 3.6 1.4 0.1
## 39 4.4 3.0 1.3 0.2
## 40 5.1 3.4 1.5 0.2
## 41 5.0 3.5 1.3 0.3
## 42 4.5 2.3 1.3 0.3
## 43 4.4 3.2 1.3 0.2
## 44 5.0 3.5 1.6 0.6
## 45 5.1 3.8 1.9 0.4
## 46 4.8 3.0 1.4 0.3
## 47 5.1 3.8 1.6 0.2
## 48 4.6 3.2 1.4 0.2
## 49 5.3 3.7 1.5 0.2
## 50 5.0 3.3 1.4 0.2
## 51 7.0 3.2 4.7 1.4
## 52 6.4 3.2 4.5 1.5
## 53 6.9 3.1 4.9 1.5
## 54 5.5 2.3 4.0 1.3
## 55 6.5 2.8 4.6 1.5
## 56 5.7 2.8 4.5 1.3
## 57 6.3 3.3 4.7 1.6
## 58 4.9 2.4 3.3 1.0
## 59 6.6 2.9 4.6 1.3
## 60 5.2 2.7 3.9 1.4
## 61 5.0 2.0 3.5 1.0
## 62 5.9 3.0 4.2 1.5
## 63 6.0 2.2 4.0 1.0
## 64 6.1 2.9 4.7 1.4
## 65 5.6 2.9 3.6 1.3
## 66 6.7 3.1 4.4 1.4
## 67 5.6 3.0 4.5 1.5
## 68 5.8 2.7 4.1 1.0
## 69 6.2 2.2 4.5 1.5
## 70 5.6 2.5 3.9 1.1
## 71 5.9 3.2 4.8 1.8
## 72 6.1 2.8 4.0 1.3
## 73 6.3 2.5 4.9 1.5
## 74 6.1 2.8 4.7 1.2
## 75 6.4 2.9 4.3 1.3
## 76 6.6 3.0 4.4 1.4
## 77 6.8 2.8 4.8 1.4
## 78 6.7 3.0 5.0 1.7
## 79 6.0 2.9 4.5 1.5
## 80 5.7 2.6 3.5 1.0
## 81 5.5 2.4 3.8 1.1
## 82 5.5 2.4 3.7 1.0
## 83 5.8 2.7 3.9 1.2
## 84 6.0 2.7 5.1 1.6
## 85 5.4 3.0 4.5 1.5
## 86 6.0 3.4 4.5 1.6
## 87 6.7 3.1 4.7 1.5
## 88 6.3 2.3 4.4 1.3
## 89 5.6 3.0 4.1 1.3
## 90 5.5 2.5 4.0 1.3
## 91 5.5 2.6 4.4 1.2
## 92 6.1 3.0 4.6 1.4
## 93 5.8 2.6 4.0 1.2
## 94 5.0 2.3 3.3 1.0
## 95 5.6 2.7 4.2 1.3
## 96 5.7 3.0 4.2 1.2
## 97 5.7 2.9 4.2 1.3
## 98 6.2 2.9 4.3 1.3
## 99 5.1 2.5 3.0 1.1
## 100 5.7 2.8 4.1 1.3
## 101 6.3 3.3 6.0 2.5
## 102 5.8 2.7 5.1 1.9
## 103 7.1 3.0 5.9 2.1
## 104 6.3 2.9 5.6 1.8
## 105 6.5 3.0 5.8 2.2
## 106 7.6 3.0 6.6 2.1
## 107 4.9 2.5 4.5 1.7
## 108 7.3 2.9 6.3 1.8
## 109 6.7 2.5 5.8 1.8
## 110 7.2 3.6 6.1 2.5
## 111 6.5 3.2 5.1 2.0
## 112 6.4 2.7 5.3 1.9
## 113 6.8 3.0 5.5 2.1
## 114 5.7 2.5 5.0 2.0
## 115 5.8 2.8 5.1 2.4
## 116 6.4 3.2 5.3 2.3
## 117 6.5 3.0 5.5 1.8
## 118 7.7 3.8 6.7 2.2
## 119 7.7 2.6 6.9 2.3
## 120 6.0 2.2 5.0 1.5
## 121 6.9 3.2 5.7 2.3
## 122 5.6 2.8 4.9 2.0
## 123 7.7 2.8 6.7 2.0
## 124 6.3 2.7 4.9 1.8
## 125 6.7 3.3 5.7 2.1
## 126 7.2 3.2 6.0 1.8
## 127 6.2 2.8 4.8 1.8
## 128 6.1 3.0 4.9 1.8
## 129 6.4 2.8 5.6 2.1
## 130 7.2 3.0 5.8 1.6
## 131 7.4 2.8 6.1 1.9
## 132 7.9 3.8 6.4 2.0
## 133 6.4 2.8 5.6 2.2
## 134 6.3 2.8 5.1 1.5
## 135 6.1 2.6 5.6 1.4
## 136 7.7 3.0 6.1 2.3
## 137 6.3 3.4 5.6 2.4
## 138 6.4 3.1 5.5 1.8
## 139 6.0 3.0 4.8 1.8
## 140 6.9 3.1 5.4 2.1
## 141 6.7 3.1 5.6 2.4
## 142 6.9 3.1 5.1 2.3
## 143 5.8 2.7 5.1 1.9
## 144 6.8 3.2 5.9 2.3
## 145 6.7 3.3 5.7 2.5
## 146 6.7 3.0 5.2 2.3
## 147 6.3 2.5 5.0 1.9
## 148 6.5 3.0 5.2 2.0
## 149 6.2 3.4 5.4 2.3
## 150 5.9 3.0 5.1 1.8
#i. To select a range, use num_range()
library(tidyverse)
head(billboard,10)
## # A tibble: 10 x 79
## artist track date.entered wk1 wk2 wk3 wk4 wk5 wk6 wk7 wk8
## <chr> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2 Pac Baby D~ 2000-02-26 87 82 72 77 87 94 99 NA
## 2 2Ge+her The Ha~ 2000-09-02 91 87 92 NA NA NA NA NA
## 3 3 Doors~ Krypto~ 2000-04-08 81 70 68 67 66 57 54 53
## 4 3 Doors~ Loser 2000-10-21 76 76 72 69 67 65 55 59
## 5 504 Boyz Wobble~ 2000-04-15 57 34 25 17 17 31 36 49
## 6 98^0 Give M~ 2000-08-19 51 39 34 26 26 19 2 2
## 7 A*Teens Dancin~ 2000-07-08 97 97 96 95 100 NA NA NA
## 8 Aaliyah I Don'~ 2000-01-29 84 62 51 41 38 35 35 38
## 9 Aaliyah Try Ag~ 2000-03-18 59 53 38 28 21 18 16 14
## 10 Adams, ~ Open M~ 2000-08-26 76 76 74 69 68 67 61 58
## # ... with 68 more variables: wk9 <dbl>, wk10 <dbl>, wk11 <dbl>, wk12 <dbl>,
## # wk13 <dbl>, wk14 <dbl>, wk15 <dbl>, wk16 <dbl>, wk17 <dbl>, wk18 <dbl>,
## # wk19 <dbl>, wk20 <dbl>, wk21 <dbl>, wk22 <dbl>, wk23 <dbl>, wk24 <dbl>,
## # wk25 <dbl>, wk26 <dbl>, wk27 <dbl>, wk28 <dbl>, wk29 <dbl>, wk30 <dbl>,
## # wk31 <dbl>, wk32 <dbl>, wk33 <dbl>, wk34 <dbl>, wk35 <dbl>, wk36 <dbl>,
## # wk37 <dbl>, wk38 <dbl>, wk39 <dbl>, wk40 <dbl>, wk41 <dbl>, wk42 <dbl>,
## # wk43 <dbl>, wk44 <dbl>, wk45 <dbl>, wk46 <dbl>, wk47 <dbl>, wk48 <dbl>,
## # wk49 <dbl>, wk50 <dbl>, wk51 <dbl>, wk52 <dbl>, wk53 <dbl>, wk54 <dbl>,
## # wk55 <dbl>, wk56 <dbl>, wk57 <dbl>, wk58 <dbl>, wk59 <dbl>, wk60 <dbl>,
## # wk61 <dbl>, wk62 <dbl>, wk63 <dbl>, wk64 <dbl>, wk65 <dbl>, wk66 <lgl>,
## # wk67 <lgl>, wk68 <lgl>, wk69 <lgl>, wk70 <lgl>, wk71 <lgl>, wk72 <lgl>,
## # wk73 <lgl>, wk74 <lgl>, wk75 <lgl>, wk76 <lgl>
#pilih variabel yang bertuliskan wk kolom 2 s/d 5
billboard%>%select(num_range("wk",2:5))
## # A tibble: 317 x 4
## wk2 wk3 wk4 wk5
## <dbl> <dbl> <dbl> <dbl>
## 1 82 72 77 87
## 2 87 92 NA NA
## 3 70 68 67 66
## 4 76 72 69 67
## 5 34 25 17 17
## 6 39 34 26 26
## 7 97 96 95 100
## 8 62 51 41 38
## 9 53 38 28 21
## 10 76 74 69 68
## # ... with 307 more rows
#j. where : dimana ?
#dimanakah tipe data factor?
iris %>% select(where(is.factor)) #variabel spesies
## Species
## 1 setosa
## 2 setosa
## 3 setosa
## 4 setosa
## 5 setosa
## 6 setosa
## 7 setosa
## 8 setosa
## 9 setosa
## 10 setosa
## 11 setosa
## 12 setosa
## 13 setosa
## 14 setosa
## 15 setosa
## 16 setosa
## 17 setosa
## 18 setosa
## 19 setosa
## 20 setosa
## 21 setosa
## 22 setosa
## 23 setosa
## 24 setosa
## 25 setosa
## 26 setosa
## 27 setosa
## 28 setosa
## 29 setosa
## 30 setosa
## 31 setosa
## 32 setosa
## 33 setosa
## 34 setosa
## 35 setosa
## 36 setosa
## 37 setosa
## 38 setosa
## 39 setosa
## 40 setosa
## 41 setosa
## 42 setosa
## 43 setosa
## 44 setosa
## 45 setosa
## 46 setosa
## 47 setosa
## 48 setosa
## 49 setosa
## 50 setosa
## 51 versicolor
## 52 versicolor
## 53 versicolor
## 54 versicolor
## 55 versicolor
## 56 versicolor
## 57 versicolor
## 58 versicolor
## 59 versicolor
## 60 versicolor
## 61 versicolor
## 62 versicolor
## 63 versicolor
## 64 versicolor
## 65 versicolor
## 66 versicolor
## 67 versicolor
## 68 versicolor
## 69 versicolor
## 70 versicolor
## 71 versicolor
## 72 versicolor
## 73 versicolor
## 74 versicolor
## 75 versicolor
## 76 versicolor
## 77 versicolor
## 78 versicolor
## 79 versicolor
## 80 versicolor
## 81 versicolor
## 82 versicolor
## 83 versicolor
## 84 versicolor
## 85 versicolor
## 86 versicolor
## 87 versicolor
## 88 versicolor
## 89 versicolor
## 90 versicolor
## 91 versicolor
## 92 versicolor
## 93 versicolor
## 94 versicolor
## 95 versicolor
## 96 versicolor
## 97 versicolor
## 98 versicolor
## 99 versicolor
## 100 versicolor
## 101 virginica
## 102 virginica
## 103 virginica
## 104 virginica
## 105 virginica
## 106 virginica
## 107 virginica
## 108 virginica
## 109 virginica
## 110 virginica
## 111 virginica
## 112 virginica
## 113 virginica
## 114 virginica
## 115 virginica
## 116 virginica
## 117 virginica
## 118 virginica
## 119 virginica
## 120 virginica
## 121 virginica
## 122 virginica
## 123 virginica
## 124 virginica
## 125 virginica
## 126 virginica
## 127 virginica
## 128 virginica
## 129 virginica
## 130 virginica
## 131 virginica
## 132 virginica
## 133 virginica
## 134 virginica
## 135 virginica
## 136 virginica
## 137 virginica
## 138 virginica
## 139 virginica
## 140 virginica
## 141 virginica
## 142 virginica
## 143 virginica
## 144 virginica
## 145 virginica
## 146 virginica
## 147 virginica
## 148 virginica
## 149 virginica
## 150 virginica
#Pivot Longer : “memperpanjang” data, menambah jumlah baris dan mengurangi jumlah kolom. Transformasi kebalikannya adalah pivot_wider ()
library(tidyverse)
head(relig_income,10) #Pendapatan berdasarkan agama
## # A tibble: 10 x 11
## religion `<$10k` `$10-20k` `$20-30k` `$30-40k` `$40-50k` `$50-75k` `$75-100k`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Agnostic 27 34 60 81 76 137 122
## 2 Atheist 12 27 37 52 35 70 73
## 3 Buddhist 27 21 30 34 33 58 62
## 4 Catholic 418 617 732 670 638 1116 949
## 5 Don’t k~ 15 14 15 11 10 35 21
## 6 Evangel~ 575 869 1064 982 881 1486 949
## 7 Hindu 1 9 7 9 11 34 47
## 8 Histori~ 228 244 236 238 197 223 131
## 9 Jehovah~ 20 27 24 24 21 30 15
## 10 Jewish 19 19 25 25 30 95 69
## # ... with 3 more variables: $100-150k <dbl>, >150k <dbl>,
## # Don't know/refused <dbl>
#variabel religion, kemudian variabel lainnya akan disusun jadi kolom dengan nama variabel baru income dan hasil nilainnya dimasukkan variabel count
a<-relig_income %>%
pivot_longer(!religion, names_to = "income", values_to = "count")
head(a,15)
## # A tibble: 15 x 3
## religion income count
## <chr> <chr> <dbl>
## 1 Agnostic <$10k 27
## 2 Agnostic $10-20k 34
## 3 Agnostic $20-30k 60
## 4 Agnostic $30-40k 81
## 5 Agnostic $40-50k 76
## 6 Agnostic $50-75k 137
## 7 Agnostic $75-100k 122
## 8 Agnostic $100-150k 109
## 9 Agnostic >150k 84
## 10 Agnostic Don't know/refused 96
## 11 Atheist <$10k 12
## 12 Atheist $10-20k 27
## 13 Atheist $20-30k 37
## 14 Atheist $30-40k 52
## 15 Atheist $40-50k 35
#Data yang terdapat NA
head(billboard,10)
## # A tibble: 10 x 79
## artist track date.entered wk1 wk2 wk3 wk4 wk5 wk6 wk7 wk8
## <chr> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2 Pac Baby D~ 2000-02-26 87 82 72 77 87 94 99 NA
## 2 2Ge+her The Ha~ 2000-09-02 91 87 92 NA NA NA NA NA
## 3 3 Doors~ Krypto~ 2000-04-08 81 70 68 67 66 57 54 53
## 4 3 Doors~ Loser 2000-10-21 76 76 72 69 67 65 55 59
## 5 504 Boyz Wobble~ 2000-04-15 57 34 25 17 17 31 36 49
## 6 98^0 Give M~ 2000-08-19 51 39 34 26 26 19 2 2
## 7 A*Teens Dancin~ 2000-07-08 97 97 96 95 100 NA NA NA
## 8 Aaliyah I Don'~ 2000-01-29 84 62 51 41 38 35 35 38
## 9 Aaliyah Try Ag~ 2000-03-18 59 53 38 28 21 18 16 14
## 10 Adams, ~ Open M~ 2000-08-26 76 76 74 69 68 67 61 58
## # ... with 68 more variables: wk9 <dbl>, wk10 <dbl>, wk11 <dbl>, wk12 <dbl>,
## # wk13 <dbl>, wk14 <dbl>, wk15 <dbl>, wk16 <dbl>, wk17 <dbl>, wk18 <dbl>,
## # wk19 <dbl>, wk20 <dbl>, wk21 <dbl>, wk22 <dbl>, wk23 <dbl>, wk24 <dbl>,
## # wk25 <dbl>, wk26 <dbl>, wk27 <dbl>, wk28 <dbl>, wk29 <dbl>, wk30 <dbl>,
## # wk31 <dbl>, wk32 <dbl>, wk33 <dbl>, wk34 <dbl>, wk35 <dbl>, wk36 <dbl>,
## # wk37 <dbl>, wk38 <dbl>, wk39 <dbl>, wk40 <dbl>, wk41 <dbl>, wk42 <dbl>,
## # wk43 <dbl>, wk44 <dbl>, wk45 <dbl>, wk46 <dbl>, wk47 <dbl>, wk48 <dbl>,
## # wk49 <dbl>, wk50 <dbl>, wk51 <dbl>, wk52 <dbl>, wk53 <dbl>, wk54 <dbl>,
## # wk55 <dbl>, wk56 <dbl>, wk57 <dbl>, wk58 <dbl>, wk59 <dbl>, wk60 <dbl>,
## # wk61 <dbl>, wk62 <dbl>, wk63 <dbl>, wk64 <dbl>, wk65 <dbl>, wk66 <lgl>,
## # wk67 <lgl>, wk68 <lgl>, wk69 <lgl>, wk70 <lgl>, wk71 <lgl>, wk72 <lgl>,
## # wk73 <lgl>, wk74 <lgl>, wk75 <lgl>, wk76 <lgl>
#Data week akan kita susun ke dari baris ke bawah dengan nama variabel baru week-p
billboard %>%
pivot_longer(
cols = starts_with("wk"), #transformasi kolom kita mulai dengan prefiks (wk)
names_to = "week-p",
names_prefix = "wk",
values_to = "rank",
values_drop_na = TRUE #Hilangkan NA
)
## # A tibble: 5,307 x 5
## artist track date.entered `week-p` rank
## <chr> <chr> <date> <chr> <dbl>
## 1 2 Pac Baby Don't Cry (Keep... 2000-02-26 1 87
## 2 2 Pac Baby Don't Cry (Keep... 2000-02-26 2 82
## 3 2 Pac Baby Don't Cry (Keep... 2000-02-26 3 72
## 4 2 Pac Baby Don't Cry (Keep... 2000-02-26 4 77
## 5 2 Pac Baby Don't Cry (Keep... 2000-02-26 5 87
## 6 2 Pac Baby Don't Cry (Keep... 2000-02-26 6 94
## 7 2 Pac Baby Don't Cry (Keep... 2000-02-26 7 99
## 8 2Ge+her The Hardest Part Of ... 2000-09-02 1 91
## 9 2Ge+her The Hardest Part Of ... 2000-09-02 2 87
## 10 2Ge+her The Hardest Part Of ... 2000-09-02 3 92
## # ... with 5,297 more rows
#Kasus beberapa variabel disimpan dalam nama kolom
who
## # A tibble: 7,240 x 60
## country iso2 iso3 year new_sp_m014 new_sp_m1524 new_sp_m2534 new_sp_m3544
## <chr> <chr> <chr> <int> <int> <int> <int> <int>
## 1 Afghani~ AF AFG 1980 NA NA NA NA
## 2 Afghani~ AF AFG 1981 NA NA NA NA
## 3 Afghani~ AF AFG 1982 NA NA NA NA
## 4 Afghani~ AF AFG 1983 NA NA NA NA
## 5 Afghani~ AF AFG 1984 NA NA NA NA
## 6 Afghani~ AF AFG 1985 NA NA NA NA
## 7 Afghani~ AF AFG 1986 NA NA NA NA
## 8 Afghani~ AF AFG 1987 NA NA NA NA
## 9 Afghani~ AF AFG 1988 NA NA NA NA
## 10 Afghani~ AF AFG 1989 NA NA NA NA
## # ... with 7,230 more rows, and 52 more variables: new_sp_m4554 <int>,
## # new_sp_m5564 <int>, new_sp_m65 <int>, new_sp_f014 <int>,
## # new_sp_f1524 <int>, new_sp_f2534 <int>, new_sp_f3544 <int>,
## # new_sp_f4554 <int>, new_sp_f5564 <int>, new_sp_f65 <int>,
## # new_sn_m014 <int>, new_sn_m1524 <int>, new_sn_m2534 <int>,
## # new_sn_m3544 <int>, new_sn_m4554 <int>, new_sn_m5564 <int>,
## # new_sn_m65 <int>, new_sn_f014 <int>, new_sn_f1524 <int>,
## # new_sn_f2534 <int>, new_sn_f3544 <int>, new_sn_f4554 <int>,
## # new_sn_f5564 <int>, new_sn_f65 <int>, new_ep_m014 <int>,
## # new_ep_m1524 <int>, new_ep_m2534 <int>, new_ep_m3544 <int>,
## # new_ep_m4554 <int>, new_ep_m5564 <int>, new_ep_m65 <int>,
## # new_ep_f014 <int>, new_ep_f1524 <int>, new_ep_f2534 <int>,
## # new_ep_f3544 <int>, new_ep_f4554 <int>, new_ep_f5564 <int>,
## # new_ep_f65 <int>, newrel_m014 <int>, newrel_m1524 <int>,
## # newrel_m2534 <int>, newrel_m3544 <int>, newrel_m4554 <int>,
## # newrel_m5564 <int>, newrel_m65 <int>, newrel_f014 <int>,
## # newrel_f1524 <int>, newrel_f2534 <int>, newrel_f3544 <int>,
## # newrel_f4554 <int>, newrel_f5564 <int>, newrel_f65 <int>
who %>%
pivot_longer(
cols = new_sp_m014:newrel_f65, #transformasi variabel new_sp_m014 sampai newrel_f65
names_to = c("diagnosis", "gender", "age"),
names_pattern = "new_?(.*)_(.)(.*)",
values_to = "count"
)
## # A tibble: 405,440 x 8
## country iso2 iso3 year diagnosis gender age count
## <chr> <chr> <chr> <int> <chr> <chr> <chr> <int>
## 1 Afghanistan AF AFG 1980 sp m 014 NA
## 2 Afghanistan AF AFG 1980 sp m 1524 NA
## 3 Afghanistan AF AFG 1980 sp m 2534 NA
## 4 Afghanistan AF AFG 1980 sp m 3544 NA
## 5 Afghanistan AF AFG 1980 sp m 4554 NA
## 6 Afghanistan AF AFG 1980 sp m 5564 NA
## 7 Afghanistan AF AFG 1980 sp m 65 NA
## 8 Afghanistan AF AFG 1980 sp f 014 NA
## 9 Afghanistan AF AFG 1980 sp f 1524 NA
## 10 Afghanistan AF AFG 1980 sp f 2534 NA
## # ... with 405,430 more rows
#Dataset Bentuk Hubungan x dan y
anscombe
## x1 x2 x3 x4 y1 y2 y3 y4
## 1 10 10 10 8 8.04 9.14 7.46 6.58
## 2 8 8 8 8 6.95 8.14 6.77 5.76
## 3 13 13 13 8 7.58 8.74 12.74 7.71
## 4 9 9 9 8 8.81 8.77 7.11 8.84
## 5 11 11 11 8 8.33 9.26 7.81 8.47
## 6 14 14 14 8 9.96 8.10 8.84 7.04
## 7 6 6 6 8 7.24 6.13 6.08 5.25
## 8 4 4 4 19 4.26 3.10 5.39 12.50
## 9 12 12 12 8 10.84 9.13 8.15 5.56
## 10 7 7 7 8 4.82 7.26 6.42 7.91
## 11 5 5 5 8 5.68 4.74 5.73 6.89
anscombe %>%
pivot_longer(everything(), #transformasi seluruh variabel dalam dataset
names_to = c(".value", "set"),
names_pattern = "(.)(.)"
)
## # A tibble: 44 x 3
## set x y
## <chr> <dbl> <dbl>
## 1 1 10 8.04
## 2 2 10 9.14
## 3 3 10 7.46
## 4 4 8 6.58
## 5 1 8 6.95
## 6 2 8 8.14
## 7 3 8 6.77
## 8 4 8 5.76
## 9 1 13 7.58
## 10 2 13 8.74
## # ... with 34 more rows
head(iris,5)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
iris %>% pivot_longer(c(Sepal.Length, Petal.Length)) #transformasi jadi kolom variabel sepal length dan petal length
## # A tibble: 300 x 5
## Sepal.Width Petal.Width Species name value
## <dbl> <dbl> <fct> <chr> <dbl>
## 1 3.5 0.2 setosa Sepal.Length 5.1
## 2 3.5 0.2 setosa Petal.Length 1.4
## 3 3 0.2 setosa Sepal.Length 4.9
## 4 3 0.2 setosa Petal.Length 1.4
## 5 3.2 0.2 setosa Sepal.Length 4.7
## 6 3.2 0.2 setosa Petal.Length 1.3
## 7 3.1 0.2 setosa Sepal.Length 4.6
## 8 3.1 0.2 setosa Petal.Length 1.5
## 9 3.6 0.2 setosa Sepal.Length 5
## 10 3.6 0.2 setosa Petal.Length 1.4
## # ... with 290 more rows
#Arrange/mengatur #Sort : mengurutkan naik atau turun
head(swiss,4)
## Fertility Agriculture Examination Education Catholic
## Courtelary 80.2 17.0 15 12 9.96
## Delemont 83.1 45.1 6 9 84.84
## Franches-Mnt 92.5 39.7 5 5 93.40
## Moutier 85.8 36.5 12 7 33.77
## Infant.Mortality
## Courtelary 22.2
## Delemont 22.2
## Franches-Mnt 20.2
## Moutier 20.3
#Kita ambil nilai dari variabel education baris 1 s/d 25
x <- swiss$Education[1:25]
x
## [1] 12 9 5 7 15 7 7 8 7 13 6 12 7 12 5 2 8 28 20 9 10 3 12 6 1
sort(x)
## [1] 1 2 3 5 5 6 6 7 7 7 7 7 8 8 9 9 10 12 12 12 12 13 15 20 28
#desc: Ubah vektor menjadi format yang akan diurutkan dalam urutan menurun. Ini berguna di dalam arrange().
desc(1:10)
## [1] -1 -2 -3 -4 -5 -6 -7 -8 -9 -10
#dataset abjad
letters
## [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
## [20] "t" "u" "v" "w" "x" "y" "z"
desc(factor(letters))
## [1] -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16 -17 -18 -19
## [20] -20 -21 -22 -23 -24 -25 -26
head(starwars,5)
## # A tibble: 5 x 14
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Luke Sk~ 172 77 blond fair blue 19 male mascu~
## 2 C-3PO 167 75 <NA> gold yellow 112 none mascu~
## 3 R2-D2 96 32 <NA> white, blue red 33 none mascu~
## 4 Darth V~ 202 136 none white yellow 41.9 male mascu~
## 5 Leia Or~ 150 49 brown light brown 19 fema~ femin~
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
#Urutkan variabel mass dari terbesar
head(starwars %>% arrange(desc(mass)))
## # A tibble: 6 x 14
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Jabba ~ 175 1358 <NA> green-tan,~ orange 600 herm~ mascu~
## 2 Grievo~ 216 159 none brown, whi~ green, y~ NA male mascu~
## 3 IG-88 200 140 none metal red 15 none mascu~
## 4 Darth ~ 202 136 none white yellow 41.9 male mascu~
## 5 Tarfful 234 136 brown brown blue NA male mascu~
## 6 Owen L~ 178 120 brown, grey light blue 52 male mascu~
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
head(mtcars,15)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
#urutkan variabel disp dan cyl, dgn catatan cyl sebagai klasifikasi
head(arrange(mtcars, cyl, disp),15)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
by_cyl <- mtcars %>% group_by(cyl) #dikelompokkan berdasarkan cyl
head(by_cyl %>% arrange(desc(wt)),15)
## # A tibble: 15 x 11
## # Groups: cyl [2]
## mpg cyl disp hp drat wt qsec vs am gear carb
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 10.4 8 460 215 3 5.42 17.8 0 0 3 4
## 2 14.7 8 440 230 3.23 5.34 17.4 0 0 3 4
## 3 10.4 8 472 205 2.93 5.25 18.0 0 0 3 4
## 4 16.4 8 276. 180 3.07 4.07 17.4 0 0 3 3
## 5 19.2 8 400 175 3.08 3.84 17.0 0 0 3 2
## 6 13.3 8 350 245 3.73 3.84 15.4 0 0 3 4
## 7 15.2 8 276. 180 3.07 3.78 18 0 0 3 3
## 8 17.3 8 276. 180 3.07 3.73 17.6 0 0 3 3
## 9 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
## 10 15 8 301 335 3.54 3.57 14.6 0 1 5 8
## 11 15.5 8 318 150 2.76 3.52 16.9 0 0 3 2
## 12 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
## 13 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
## 14 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
## 15 17.8 6 168. 123 3.92 3.44 18.9 1 0 4 4
#Pengurutan didasarkan grup cyl kemudian diurutkan wt-nya
head(by_cyl %>% arrange(desc(wt), .by_group = TRUE),15)
## # A tibble: 15 x 11
## # Groups: cyl [2]
## mpg cyl disp hp drat wt qsec vs am gear carb
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
## 2 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
## 3 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2
## 4 21.5 4 120. 97 3.7 2.46 20.0 1 0 3 1
## 5 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
## 6 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1
## 7 26 4 120. 91 4.43 2.14 16.7 0 1 5 2
## 8 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1
## 9 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1
## 10 30.4 4 75.7 52 4.93 1.62 18.5 1 1 4 2
## 11 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2
## 12 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
## 13 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
## 14 17.8 6 168. 123 3.92 3.44 18.9 1 0 4 4
## 15 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
#Mengurutkan variabel bertuliskan sepal pertama yaitu sepal.length dari terkecil
head(iris %>% arrange(across(starts_with("Sepal"))),7)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 4.3 3.0 1.1 0.1 setosa
## 2 4.4 2.9 1.4 0.2 setosa
## 3 4.4 3.0 1.3 0.2 setosa
## 4 4.4 3.2 1.3 0.2 setosa
## 5 4.5 2.3 1.3 0.3 setosa
## 6 4.6 3.1 1.5 0.2 setosa
## 7 4.6 3.2 1.4 0.2 setosa
#Mengurutkan variabel bertuliskan sepal pertama yaitu sepal.length dari terbesar
head(iris %>% arrange(across(starts_with("Sepal"), desc)))
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 7.9 3.8 6.4 2.0 virginica
## 2 7.7 3.8 6.7 2.2 virginica
## 3 7.7 3.0 6.1 2.3 virginica
## 4 7.7 2.8 6.7 2.0 virginica
## 5 7.7 2.6 6.9 2.3 virginica
## 6 7.6 3.0 6.6 2.1 virginica
#Mutate/mengubah : menambahkan variabel baru yang merupakan fungsi dari variabel yang ada #transmute () menambahkan variabel baru dan menghapus variabel yang sudah ada. Variabel baru menimpa variabel yang ada dengan nama yang sama. Variabel dapat dihapus dengan mengatur nilainya ke NULL.
library(tidyverse)
head(starwars,3)
## # A tibble: 3 x 14
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Luke Sk~ 172 77 blond fair blue 19 male mascu~
## 2 C-3PO 167 75 <NA> gold yellow 112 none mascu~
## 3 R2-D2 96 32 <NA> white, blue red 33 none mascu~
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
#Tampilkan 3 variabel name,mass, dan spesies + variabel baru mass_norm yg berisi mass/average mass, dgn mengabaikan na (na.rm)
head(starwars %>%
select(name, mass, species) %>%
mutate(mass_norm = mass / mean(mass, na.rm = TRUE)),4)
## # A tibble: 4 x 4
## name mass species mass_norm
## <chr> <dbl> <chr> <dbl>
## 1 Luke Skywalker 77 Human 0.791
## 2 C-3PO 75 Droid 0.771
## 3 R2-D2 32 Droid 0.329
## 4 Darth Vader 136 Human 1.40
#Tampilkan 3 variabel name,mass, dan spesies + variabel baru mass_norm yg berisi mass/average mass terhadap kelompok spesies, dgn mengabaikan na (na.rm)
head(starwars %>%
select(name, mass, species) %>%
group_by(species) %>%
mutate(mass_norm = mass / mean(mass, na.rm = TRUE)),4)
## # A tibble: 4 x 4
## # Groups: species [2]
## name mass species mass_norm
## <chr> <dbl> <chr> <dbl>
## 1 Luke Skywalker 77 Human 0.930
## 2 C-3PO 75 Droid 1.08
## 3 R2-D2 32 Droid 0.459
## 4 Darth Vader 136 Human 1.64
#Variabel yang baru dibuat tersedia segera
head(starwars %>%
select(name, mass) %>%
mutate(
kali2 = mass * 2,
kuadrat = kali2 * kali2
),4)
## # A tibble: 4 x 4
## name mass kali2 kuadrat
## <chr> <dbl> <dbl> <dbl>
## 1 Luke Skywalker 77 154 23716
## 2 C-3PO 75 150 22500
## 3 R2-D2 32 64 4096
## 4 Darth Vader 136 272 73984
#pembahasan ttg fungsi across selebihnya dibawah
head(starwars %>%
select(name, homeworld, species) %>%
mutate(across(!name, as.factor)),4)
## # A tibble: 4 x 3
## name homeworld species
## <chr> <fct> <fct>
## 1 Luke Skywalker Tatooine Human
## 2 C-3PO Tatooine Droid
## 3 R2-D2 Naboo Droid
## 4 Darth Vader Tatooine Human
head(starwars %>%
select(name, mass, homeworld) %>%
group_by(homeworld) %>%
mutate(rank = min_rank(desc(mass))),4)
## # A tibble: 4 x 4
## # Groups: homeworld [2]
## name mass homeworld rank
## <chr> <dbl> <chr> <int>
## 1 Luke Skywalker 77 Tatooine 5
## 2 C-3PO 75 Tatooine 6
## 3 R2-D2 32 Naboo 6
## 4 Darth Vader 136 Tatooine 1
#Menambahkan variabel baru logaritma, secara default muncul paling kanan
df <- tibble(Jaha = c(2,3,4), Daga = c(3,6,7))
df
## # A tibble: 3 x 2
## Jaha Daga
## <dbl> <dbl>
## 1 2 3
## 2 3 6
## 3 4 7
df %>% mutate(logaritma=log(Jaha + Daga))
## # A tibble: 3 x 3
## Jaha Daga logaritma
## <dbl> <dbl> <dbl>
## 1 2 3 1.61
## 2 3 6 2.20
## 3 4 7 2.40
#Menempatkan variabel logaritma sebelum variabel ke 1 (Jaha)
df %>% mutate(logaritma = Jaha + Daga, .before = 1)
## # A tibble: 3 x 3
## logaritma Jaha Daga
## <dbl> <dbl> <dbl>
## 1 5 2 3
## 2 9 3 6
## 3 11 4 7
df %>% mutate(Median = (Jaha + Daga)/2, .after = Jaha)
## # A tibble: 3 x 3
## Jaha Median Daga
## <dbl> <dbl> <dbl>
## 1 2 2.5 3
## 2 3 4.5 6
## 3 4 5.5 7
#.keep atau tidak secara default menampilkan keseluruhan variabel
df <- tibble(x = c(4,5,6), y = c(6,7,3), a = c("A","B","C"), b = c("G","J","R"))
df %>% mutate(Penjumlahan = x + y, .keep = "all") # the default
## # A tibble: 3 x 5
## x y a b Penjumlahan
## <dbl> <dbl> <chr> <chr> <dbl>
## 1 4 6 A G 10
## 2 5 7 B J 12
## 3 6 3 C R 9
#used menampilkan hasil transformasi dan variabelnya
df %>% mutate(Penjumlahan = x +y, .keep = "used")
## # A tibble: 3 x 3
## x y Penjumlahan
## <dbl> <dbl> <dbl>
## 1 4 6 10
## 2 5 7 12
## 3 6 3 9
df %>% mutate(Pangkat_2 = y^2, .keep = "used")
## # A tibble: 3 x 2
## y Pangkat_2
## <dbl> <dbl>
## 1 6 36
## 2 7 49
## 3 3 9
#unused menampilkan variabel hasil transformasi dan yang tidak digunakan utk transf
df %>% mutate(z = x + y, .keep = "unused")
## # A tibble: 3 x 3
## a b z
## <chr> <chr> <dbl>
## 1 A G 10
## 2 B J 12
## 3 C R 9
#cuma menampilkan variabel hasil transformasi saja
df %>% mutate(z = x + y, .keep = "none") # same as transmute()
## # A tibble: 3 x 1
## z
## <dbl>
## 1 10
## 2 12
## 3 9
#summarise : mengurangi beberapa nilai menjadi satu ringkasan. #sinonim dengan summarize()
#Tibble : membuat tabel
a<-c(2,3,4)
#tibble(x,y,z)
tibble(nilai=a, kali_dua=a * 2,bagi_dua=a/2)
## # A tibble: 3 x 3
## nilai kali_dua bagi_dua
## <dbl> <dbl> <dbl>
## 1 2 4 1
## 2 3 6 1.5
## 3 4 8 2
#default: menampilkan 1 baris
mtcars %>%
summarise(mean = mean(disp), n = n())
## mean n
## 1 230.7219 32
#menampilkan ringkasan per group
mtcars %>%
group_by(cyl) %>%
summarise(Average = mean(disp), Jumlah = n())
## # A tibble: 3 x 3
## cyl Average Jumlah
## <dbl> <dbl> <int>
## 1 4 105. 11
## 2 6 183. 7
## 3 8 353. 14
mtcars %>%
group_by(cyl) %>%
summarise(qs = quantile(disp, c(0.25, 0.75)), prob = c(0.25, 0.75))
## `summarise()` has grouped output by 'cyl'. You can override using the `.groups` argument.
## # A tibble: 6 x 3
## # Groups: cyl [3]
## cyl qs prob
## <dbl> <dbl> <dbl>
## 1 4 78.8 0.25
## 2 4 121. 0.75
## 3 6 160 0.25
## 4 6 196. 0.75
## 5 8 302. 0.25
## 6 8 390 0.75
#Across:memudahkan untuk menerapkan transformasi yang sama ke beberapa kolom
#ringkas berdasarkan variabel/kolom bernama sepal thd averagenya yg dikelompokkan berdasarkan sesiesnya
iris %>%
group_by(Species) %>%
summarise(across(starts_with("Sepal"), mean))
## # A tibble: 3 x 3
## Species Sepal.Length Sepal.Width
## <fct> <dbl> <dbl>
## 1 setosa 5.01 3.43
## 2 versicolor 5.94 2.77
## 3 virginica 6.59 2.97
iris %>%
group_by(Species) %>%
summarise(across(starts_with("Sepal"), ~mean(.x, na.rm = TRUE)))
## # A tibble: 3 x 3
## Species Sepal.Length Sepal.Width
## <fct> <dbl> <dbl>
## 1 setosa 5.01 3.43
## 2 versicolor 5.94 2.77
## 3 virginica 6.59 2.97
#kita cari average dan sd dari variabel/kolom yang bernama sepal
iris %>%
group_by(Species) %>%
summarise(across(starts_with("Sepal"), list(average = mean, sd = sd)))
## # A tibble: 3 x 5
## Species Sepal.Length_avera~ Sepal.Length_sd Sepal.Width_aver~ Sepal.Width_sd
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 setosa 5.01 0.352 3.43 0.379
## 2 versicol~ 5.94 0.516 2.77 0.314
## 3 virginica 6.59 0.636 2.97 0.322
#Gunakan argumen .names untuk mengontrol nama output
iris %>%
group_by(Species) %>%
summarise(across(starts_with("Sepal"), mean, .names = "mean_{col}"))
## # A tibble: 3 x 3
## Species mean_Sepal.Length mean_Sepal.Width
## <fct> <dbl> <dbl>
## 1 setosa 5.01 3.43
## 2 versicolor 5.94 2.77
## 3 virginica 6.59 2.97
iris %>%
group_by(Species) %>%
summarise(across(starts_with("Sepal"), list(mean = mean, sd = sd), .names = "{col}.{fn}"))
## # A tibble: 3 x 5
## Species Sepal.Length.mean Sepal.Length.sd Sepal.Width.mean Sepal.Width.sd
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 setosa 5.01 0.352 3.43 0.379
## 2 versicolor 5.94 0.516 2.77 0.314
## 3 virginica 6.59 0.636 2.97 0.322
#c_across () dirancang untuk bekerja dengan rowwise () untuk mempermudah melakukan agregasi berdasarkan baris. Ini memiliki dua perbedaan dari c ():
#runif : distribusi seragam/kontinu
df <- tibble(id = 1:4, w = runif(4), x = runif(4), y = runif(4), z = runif(4))
df
## # A tibble: 4 x 5
## id w x y z
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.841 0.891 0.603 0.806
## 2 2 0.397 0.602 0.00763 0.898
## 3 3 0.924 0.308 0.542 0.108
## 4 4 0.586 0.0502 0.977 0.524
df %>%
rowwise() %>% #menghitung baris per baris
mutate(
sum = sum(c_across(w:z)), #penjumlahan w sampai z
sd = sd(c_across(w:z)) #sd w sampai z
)
## # A tibble: 4 x 7
## # Rowwise:
## id w x y z sum sd
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.841 0.891 0.603 0.806 3.14 0.126
## 2 2 0.397 0.602 0.00763 0.898 1.90 0.374
## 3 3 0.924 0.308 0.542 0.108 1.88 0.351
## 4 4 0.586 0.0502 0.977 0.524 2.14 0.380