Dataframe terdiri dari banyak kolom dengan tiap kolom berisi list atau vector yang sama panjangnya. Setiap kolom merepresentasikan satu variabel, sedangkan setiap baris merepresentasikan satu observasi. Dataframe dapat berisi berbagai jenis data, seperti numerik, karakter, dan logika.
head(data_ssn)
## # A tibble: 6 × 118
## URUT R101 R102 R105 R401 R403 R404 R405 R407 R408 R409
## <dbl> <dbl+lb> <dbl> <dbl+l> <dbl> <dbl+l> <dbl+l> <dbl+l> <dbl> <dbl+l> <dbl>
## 1 1 18 [Lam… 7 2 [Per… 4 5 [Men… 2 [Kaw… 1 [Lak… 26 1 [Ya] 23
## 2 2 19 [Kep… 3 1 [Per… 5 3 [Ana… 1 [Bel… 1 [Lak… 9 NA NA
## 3 7 16 [Sum… 71 1 [Per… 1 1 [Kep… 2 [Kaw… 1 [Lak… 39 1 [Ya] 26
## 4 23 16 [Sum… 71 1 [Per… 4 3 [Ana… 1 [Bel… 2 [Per… 9 NA NA
## 5 27 15 [Jam… 5 2 [Per… 4 3 [Ana… 1 [Bel… 1 [Lak… 8 NA NA
## 6 28 52 [Nus… 6 2 [Per… 3 3 [Ana… 3 [Cer… 2 [Per… 37 NA 28
## # ℹ 107 more variables: R406A <dbl>, R406B <dbl>, R406C <dbl>, R410 <dbl>,
## # R601 <dbl>, R602 <dbl>, R603 <dbl>, R604 <dbl>, R605 <dbl+lbl>,
## # R606 <dbl+lbl>, R607 <dbl+lbl>, R608 <dbl+lbl>, R609 <dbl+lbl>,
## # R610 <dbl+lbl>, R611 <dbl+lbl>, R612 <dbl+lbl>, R613 <dbl+lbl>,
## # R614 <dbl+lbl>, R615 <dbl+lbl>, R616 <dbl+lbl>, R617 <dbl>, R618 <dbl>,
## # R619 <dbl+lbl>, R620 <dbl+lbl>, R621 <dbl+lbl>, R1101_A <chr>,
## # R1101_B <chr>, R1101_C <chr>, R1101_D <chr>, R1101_E <chr>, …
#str(data_ssn) # Melihat ringkasan struktur dari suatu objek data
#summary(data_ssn) # Melihat ringkasan statistik dasar
Missing Value dituliskan dengan NA. Untuk menguji NA, gunakan is.na().
colSums(is.na(df_ori))
sum(is.na(df_ori$varA))
sum(is.na(data_ssn$R105))
## [1] 0
Untuk menjaga data asli, lebih baik membuat kolom baru daripada menimpa kolom asli. Membuat variabel baru dalam data frame dapat dilakukan seperti membuat vektor.
base
df_ori$var_baru <- ekspresi
df_ori[,"var_baru"] <- ekspresi
dplyr
library(dplyr)
df_baru <- df_ori %>%
mutate(var_baru = ifelse(condition, "Label 1", "Label 2"))
print(df_baru[,c("var_baru")])
Menggunakan syntax base
data_ssn$tambah <- 1
head(data_ssn[,"tambah"],8)
## # A tibble: 8 × 1
## tambah
## <dbl>
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
## 7 1
## 8 1
data_ssn[,"Var_baru"] <- 1
head(data_ssn[,"Var_baru"],6)
## # A tibble: 6 × 1
## Var_baru
## <dbl>
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
Menggunakan mutate()
Library dplyr
menyediakan fungsi mutate()
untuk menambah/memodifikasi kolom dalam data frame.
Fungsi ini memungkinkan untuk melakukan penghitungan/transformasi pada kolom yang ada, dan menambahkan hasilnya sebagai kolom baru ke dalam data frame.
library(dplyr)
data <- data_ssn %>%
mutate(status = ifelse(R407 < 18, "Anak-anak", "Dewasa")) #Variabel baru bernama status
print(data[,c("status")])
## # A tibble: 48,797 × 1
## status
## <chr>
## 1 Dewasa
## 2 Anak-anak
## 3 Dewasa
## 4 Anak-anak
## 5 Anak-anak
## 6 Dewasa
## 7 Dewasa
## 8 Dewasa
## 9 Dewasa
## 10 Dewasa
## # ℹ 48,787 more rows
Dilakukan untuk mengurutkan data berdasarkan beberapa variabel tertentu
Dilakukan dengan membuat vektor logika untuk melakukan pengurutan data
Fungsi yang sering digunakan: arrange (...)
,
order(...)
Dalam library dplyr
, fungsi arrange() otomatis
mengurutkan secara ascending tanpa memerlukan fungsi tambahan
seperti asc()
. Hanya perlu menggunakan desc()
untuk mengurutkan secara descending.
base
df_sorted_asc <- df_ori[order(df_ori$varA, decreasing = FALSE), ]
df_sorted_desc <- df_ori[order(df_ori$varA, decreasing = TRUE), ]
dplyr
library(dplyr)
df_sort <- df_ori %>%
arrange(varA) #Ascending
library(dplyr)
df_sort <- df_ori %>%
arrange(desc(varA) #Descending
1. Menyortir 1 Variabel dengan
arrange()
library(dplyr)
data_sorted111 <- data_ssn %>%
arrange(R101) # Sort kolom R101 secara Ascending
print(data_sorted111)
## # A tibble: 48,797 × 120
## URUT R101 R102 R105 R401 R403 R404 R405 R407 R408
## <dbl> <dbl+lbl> <dbl> <dbl+lbl> <dbl> <dbl+l> <dbl+l> <dbl+l> <dbl> <dbl+l>
## 1 300062 11 [Aceh] 3 2 [Perdes… 2 2 [Ist… 2 [Kaw… 2 [Per… 58 1 [Ya]
## 2 300235 11 [Aceh] 74 1 [Perkot… 4 3 [Ana… 1 [Bel… 2 [Per… 17 NA
## 3 300240 11 [Aceh] 74 2 [Perdes… 6 3 [Ana… 1 [Bel… 2 [Per… 3 NA
## 4 300411 11 [Aceh] 15 2 [Perdes… 4 3 [Ana… 1 [Bel… 1 [Lak… 0 NA
## 5 300576 11 [Aceh] 14 2 [Perdes… 1 1 [Kep… 2 [Kaw… 1 [Lak… 61 1 [Ya]
## 6 300606 11 [Aceh] 4 1 [Perkot… 2 2 [Ist… 2 [Kaw… 2 [Per… 37 1 [Ya]
## 7 300632 11 [Aceh] 3 2 [Perdes… 4 5 [Men… 2 [Kaw… 2 [Per… 38 1 [Ya]
## 8 300748 11 [Aceh] 75 2 [Perdes… 2 2 [Ist… 2 [Kaw… 2 [Per… 44 1 [Ya]
## 9 301149 11 [Aceh] 9 2 [Perdes… 4 3 [Ana… 1 [Bel… 1 [Lak… 19 NA
## 10 301179 11 [Aceh] 17 2 [Perdes… 4 3 [Ana… 1 [Bel… 2 [Per… 25 NA
## # ℹ 48,787 more rows
## # ℹ 110 more variables: R409 <dbl>, R406A <dbl>, R406B <dbl>, R406C <dbl>,
## # R410 <dbl>, R601 <dbl>, R602 <dbl>, R603 <dbl>, R604 <dbl>, R605 <dbl+lbl>,
## # R606 <dbl+lbl>, R607 <dbl+lbl>, R608 <dbl+lbl>, R609 <dbl+lbl>,
## # R610 <dbl+lbl>, R611 <dbl+lbl>, R612 <dbl+lbl>, R613 <dbl+lbl>,
## # R614 <dbl+lbl>, R615 <dbl+lbl>, R616 <dbl+lbl>, R617 <dbl>, R618 <dbl>,
## # R619 <dbl+lbl>, R620 <dbl+lbl>, R621 <dbl+lbl>, R1101_A <chr>, …
data_sorted_desc112 <- data_ssn %>%
arrange(desc(R101))
print(data_sorted_desc112)
## # A tibble: 48,797 × 120
## URUT R101 R102 R105 R401 R403 R404 R405 R407 R408
## <dbl> <dbl+lbl> <dbl> <dbl+lbl> <dbl> <dbl+l> <dbl+l> <dbl+l> <dbl> <dbl+l>
## 1 300131 94 [Papua] 19 2 [Perde… 5 3 [Ana… 1 [Bel… 2 [Per… 10 NA
## 2 301416 94 [Papua] 12 1 [Perko… 3 3 [Ana… 1 [Bel… 2 [Per… 20 NA
## 3 301449 94 [Papua] 1 2 [Perde… 2 2 [Ist… 2 [Kaw… 2 [Per… 40 1 [Ya]
## 4 301540 94 [Papua] 12 1 [Perko… 3 3 [Ana… 1 [Bel… 1 [Lak… 10 NA
## 5 301758 94 [Papua] 4 2 [Perde… 2 2 [Ist… 2 [Kaw… 2 [Per… 47 1 [Ya]
## 6 302124 94 [Papua] 1 1 [Perko… 1 1 [Kep… 3 [Cer… 1 [Lak… 62 NA
## 7 302233 94 [Papua] 12 1 [Perko… 3 3 [Ana… 1 [Bel… 1 [Lak… 22 NA
## 8 302405 94 [Papua] 4 1 [Perko… 3 3 [Ana… 1 [Bel… 2 [Per… 18 NA
## 9 302501 94 [Papua] 12 2 [Perde… 3 3 [Ana… 1 [Bel… 1 [Lak… 25 NA
## 10 303359 94 [Papua] 20 2 [Perde… 1 1 [Kep… 2 [Kaw… 1 [Lak… 40 1 [Ya]
## # ℹ 48,787 more rows
## # ℹ 110 more variables: R409 <dbl>, R406A <dbl>, R406B <dbl>, R406C <dbl>,
## # R410 <dbl>, R601 <dbl>, R602 <dbl>, R603 <dbl>, R604 <dbl>, R605 <dbl+lbl>,
## # R606 <dbl+lbl>, R607 <dbl+lbl>, R608 <dbl+lbl>, R609 <dbl+lbl>,
## # R610 <dbl+lbl>, R611 <dbl+lbl>, R612 <dbl+lbl>, R613 <dbl+lbl>,
## # R614 <dbl+lbl>, R615 <dbl+lbl>, R616 <dbl+lbl>, R617 <dbl>, R618 <dbl>,
## # R619 <dbl+lbl>, R620 <dbl+lbl>, R621 <dbl+lbl>, R1101_A <chr>, …
glimpse(data_sorted_desc112)
## Rows: 48,797
## Columns: 120
## $ URUT <dbl> 300131, 301416, 301449, 301540, 301758, 302124, 302233, 30240…
## $ R101 <dbl+lbl> 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 9…
## $ R102 <dbl> 19, 12, 1, 12, 4, 1, 12, 4, 12, 20, 29, 10, 32, 16, 16, 16, 1…
## $ R105 <dbl+lbl> 2, 1, 2, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ R401 <dbl> 5, 3, 2, 3, 2, 1, 3, 3, 3, 1, 5, 1, 6, 1, 2, 2, 4, 7, 2, 1, 5…
## $ R403 <dbl+lbl> 3, 3, 2, 3, 2, 1, 3, 3, 3, 1, 3, 1, 3, 1, 3, 2, 3, 3, 2, …
## $ R404 <dbl+lbl> 1, 1, 2, 1, 2, 3, 1, 1, 1, 2, 1, 2, 1, 4, 1, 2, 1, 1, 2, …
## $ R405 <dbl+lbl> 2, 2, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, …
## $ R407 <dbl> 10, 20, 40, 10, 47, 62, 22, 18, 25, 40, 3, 39, 9, 43, 22, 43,…
## $ R408 <dbl+lbl> NA, NA, 1, NA, 1, NA, NA, NA, NA, 1, NA, 1, NA, NA, N…
## $ R409 <dbl> NA, NA, 24, NA, 20, 24, NA, NA, NA, 24, NA, 30, NA, 20, NA, 1…
## $ R406A <dbl> 28, 15, 9, 11, 7, 26, 15, 31, 11, 2, 29, 3, 28, 1, 1, 17, 4, …
## $ R406B <dbl> 7, 12, 8, 5, 5, 5, 10, 8, 12, 7, 8, 5, 7, 7, 7, 7, 6, 2, 10, …
## $ R406C <dbl> 2012, 2002, 1982, 2012, 1975, 1960, 2000, 2004, 1997, 1982, 2…
## $ R410 <dbl> 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2…
## $ R601 <dbl> 94, 94, 53, 91, 73, 73, 73, 73, 81, 94, 94, 94, 94, 94, 94, 9…
## $ R602 <dbl> 19, 12, 9, 1, 71, 71, 8, 8, 72, 2, 29, 10, 32, 16, 16, 16, 13…
## $ R603 <dbl> 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, NA, 94, 94, 94, 94, 9…
## $ R604 <dbl> 19, 12, 1, 12, 4, 1, 12, 4, 12, 20, NA, 10, 32, 16, 16, 16, 1…
## $ R605 <dbl+lbl> 3, NA, NA, 4, NA, NA, NA, NA, NA, NA, 4, NA, 4, NA, N…
## $ R606 <dbl+lbl> 4, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ R607 <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA, 1, 1, 5, …
## $ R608 <dbl+lbl> 5, 5, 5, 1, 5, 5, 5, 5, 5, 5, NA, 5, 5, 5, …
## $ R609 <dbl+lbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, NA, 5, 5, 5, …
## $ R610 <dbl+lbl> 2, 2, 3, 2, 3, 3, 2, 2, 3, 3, NA, 1, 2, 1, …
## $ R611 <dbl+lbl> 1, 2, NA, 1, NA, NA, 1, 1, NA, NA, NA, NA, 1, NA, N…
## $ R612 <dbl+lbl> 3, 13, 13, 3, 3, 8, 21, 21, 8, 3, NA, NA, 3, NA, N…
## $ R613 <dbl+lbl> 5, 3, 8, 5, 8, 8, 2, 2, 8, 8, NA, NA, 4, NA, N…
## $ R614 <dbl+lbl> 25, 8, 13, 25, 3, 8, 13, 13, 8, 3, NA, NA, 25, NA, N…
## $ R615 <dbl+lbl> 5, 5, NA, 5, NA, NA, 5, 5, 5, NA, NA, NA, 2, NA, N…
## $ R616 <dbl+lbl> 5, 5, NA, 5, NA, NA, 5, 5, 5, NA, NA, NA, 1, NA, N…
## $ R617 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 225000, NA, N…
## $ R618 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA…
## $ R619 <dbl+lbl> 2, 2, NA, 2, NA, NA, 2, 2, NA, NA, NA, NA, 2, NA, N…
## $ R620 <dbl+lbl> 3, 13, NA, 3, NA, NA, 21, 21, NA, NA, NA, NA, 3, NA, N…
## $ R621 <dbl+lbl> 4, 2, NA, 4, NA, NA, 1, 1, NA, NA, NA, NA, 3, NA, N…
## $ R1101_A <chr> "", "", "A", "", "A", "", "", "", "A", "A", "", "", "", "", "…
## $ R1101_B <chr> "", "", "", "B", "", "", "", "B", "", "", "", "", "", "", "",…
## $ R1101_C <chr> "C", "", "", "", "", "", "", "", "", "", "C", "", "C", "C", "…
## $ R1101_D <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1101_E <chr> "", "", "", "", "", "", "E", "", "", "", "", "", "", "", "", …
## $ R1101_X <chr> "", "X", "", "", "", "X", "", "", "", "", "", "X", "", "", ""…
## $ R1102 <dbl+lbl> 5, 5, 5, 5, 5, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, …
## $ R1103 <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, 5, NA, NA, NA, NA, NA, NA, N…
## $ R1104 <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, N…
## $ R1105 <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, 5, NA, NA, NA, NA, NA, NA, N…
## $ R1106 <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, 5, NA, NA, NA, NA, NA, NA, N…
## $ R1107_A <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1107_B <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1107_C <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1107_D <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1107_E <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1107_F <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1107_G <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1107_H <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1108 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ R1109_A <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1109_B <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1109_C <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1109_D <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1109_E <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1109_X <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1110_A <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1110_B <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1110_C <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1110_D <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1110_E <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1110_F <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1110_G <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1110_H <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1110_I <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1201 <dbl+lbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, …
## $ R1202_A <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1202_B <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1202_C <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1202_D <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1202_E <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1202_F <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1202_G <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1203 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ R1204_A <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1204_B <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1204_C <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1204_D <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1204_E <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1204_X <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1205_A <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1205_B <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1205_C <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1205_D <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1205_E <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1205_F <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1205_G <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1205_H <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1205_I <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ R1206 <dbl+lbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, NA, 5, 5, 5, …
## $ R1207 <dbl+lbl> 5, 5, 5, 5, 5, 1, 5, 5, 8, 5, NA, 5, 5, 5, …
## $ R1208 <dbl> NA, NA, NA, NA, NA, 84, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ R1209 <dbl+lbl> 5, 5, 5, 5, 5, 1, 5, 5, NA, 5, NA, 5, 5, 5, …
## $ FWT <dbl> 18.677762, 8.387459, 89.970590, 90.586172, 54.131840, 235.579…
## $ R301 <dbl> 5, 6, 6, 3, 4, 2, 4, 5, 5, 2, 5, 2, 7, 3, 3, 4, 6, 8, 4, 10, …
## $ R302 <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0…
## $ R303 <dbl> 5, 5, 5, 3, 4, 2, 4, 5, 5, 2, 4, 2, 7, 3, 3, 4, 5, 8, 4, 9, 5…
## $ R304 <dbl> 5, 4, 4, 3, 3, 2, 4, 4, 5, 2, 3, 2, 5, 3, 3, 4, 2, 8, 4, 8, 5…
## $ R305 <dbl> 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ EXPEND <dbl> 20158990, 6701536, 6334490, 4396231, 6277762, 7281107, 125883…
## $ KAPITA <dbl> 4031798.1, 1116922.6, 1055748.4, 1465410.3, 1569440.5, 364055…
## $ WERT <dbl> 18.677762, 8.387459, 89.970590, 90.586172, 54.131840, 235.579…
## $ WEIND <dbl> 93.38881, 50.32476, 539.82354, 271.75852, 216.52736, 471.1592…
## $ NKS <chr> "100080", "150402", "100396", "150090", "100146", "150137", "…
## $ NURT <dbl> 3, 13, 12, 15, 6, 7, 12, 3, 11, 6, 2, 8, 4, 6, 6, 1, 10, 10, …
## $ GK <dbl> 661946, 743289, 661946, 743289, 661946, 743289, 743289, 74328…
## $ MISKIN <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2, …
## $ MISKINWB <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ KABU <dbl+lbl> 9419, 9412, 9401, 9412, 9404, 9401, 9412, 9404, 9412, 942…
## $ Z101 <dbl+lbl> 94, 96, 95, 96, 96, 95, 96, 96, 96, 94, 97, 96, 97, 97, 9…
## $ Z102 <dbl> 19, 1, 1, 1, 4, 1, 1, 4, 1, 20, 1, 5, 6, 7, 7, 7, 2, 6, 1, 4,…
## $ zkabu <dbl+lbl> 9419, 9601, 9501, 9601, 9604, 9501, 9601, 9604, 9601, 942…
## $ tambah <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Var_baru <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
#Menggunakan order() dari Base R
data_sorted121 <- data_ssn[order(data_ssn$R405), ]
head(data_sorted121)
## # A tibble: 6 × 120
## URUT R101 R102 R105 R401 R403 R404 R405 R407 R408 R409
## <dbl> <dbl+lb> <dbl> <dbl+l> <dbl> <dbl+l> <dbl+l> <dbl+l> <dbl> <dbl+l> <dbl>
## 1 1 18 [Lam… 7 2 [Per… 4 5 [Men… 2 [Kaw… 1 [Lak… 26 1 [Ya] 23
## 2 2 19 [Kep… 3 1 [Per… 5 3 [Ana… 1 [Bel… 1 [Lak… 9 NA NA
## 3 7 16 [Sum… 71 1 [Per… 1 1 [Kep… 2 [Kaw… 1 [Lak… 39 1 [Ya] 26
## 4 27 15 [Jam… 5 2 [Per… 4 3 [Ana… 1 [Bel… 1 [Lak… 8 NA NA
## 5 51 18 [Lam… 7 2 [Per… 3 3 [Ana… 1 [Bel… 1 [Lak… 23 NA NA
## 6 61 52 [Nus… 5 1 [Per… 3 3 [Ana… 1 [Bel… 1 [Lak… 20 NA NA
## # ℹ 109 more variables: R406A <dbl>, R406B <dbl>, R406C <dbl>, R410 <dbl>,
## # R601 <dbl>, R602 <dbl>, R603 <dbl>, R604 <dbl>, R605 <dbl+lbl>,
## # R606 <dbl+lbl>, R607 <dbl+lbl>, R608 <dbl+lbl>, R609 <dbl+lbl>,
## # R610 <dbl+lbl>, R611 <dbl+lbl>, R612 <dbl+lbl>, R613 <dbl+lbl>,
## # R614 <dbl+lbl>, R615 <dbl+lbl>, R616 <dbl+lbl>, R617 <dbl>, R618 <dbl>,
## # R619 <dbl+lbl>, R620 <dbl+lbl>, R621 <dbl+lbl>, R1101_A <chr>,
## # R1101_B <chr>, R1101_C <chr>, R1101_D <chr>, R1101_E <chr>, …
data_sorted_desc122 <- data_ssn[order(data_ssn$R405, decreasing = TRUE), ]
head(data_sorted_desc122)
## # A tibble: 6 × 120
## URUT R101 R102 R105 R401 R403 R404 R405 R407 R408 R409
## <dbl> <dbl+lb> <dbl> <dbl+l> <dbl> <dbl+l> <dbl+l> <dbl+l> <dbl> <dbl+l> <dbl>
## 1 23 16 [Sum… 71 1 [Per… 4 3 [Ana… 1 [Bel… 2 [Per… 9 NA NA
## 2 28 52 [Nus… 6 2 [Per… 3 3 [Ana… 3 [Cer… 2 [Per… 37 NA 28
## 3 51 18 [Lam… 7 2 [Per… 2 2 [Ist… 2 [Kaw… 2 [Per… 52 1 [Ya] 15
## 4 58 21 [Kep… 1 1 [Per… 2 2 [Ist… 2 [Kaw… 2 [Per… 46 1 [Ya] 23
## 5 82 16 [Sum… 10 1 [Per… 2 2 [Ist… 2 [Kaw… 2 [Per… 52 1 [Ya] 20
## 6 84 63 [Kal… 4 1 [Per… 5 3 [Ana… 1 [Bel… 2 [Per… 13 NA NA
## # ℹ 109 more variables: R406A <dbl>, R406B <dbl>, R406C <dbl>, R410 <dbl>,
## # R601 <dbl>, R602 <dbl>, R603 <dbl>, R604 <dbl>, R605 <dbl+lbl>,
## # R606 <dbl+lbl>, R607 <dbl+lbl>, R608 <dbl+lbl>, R609 <dbl+lbl>,
## # R610 <dbl+lbl>, R611 <dbl+lbl>, R612 <dbl+lbl>, R613 <dbl+lbl>,
## # R614 <dbl+lbl>, R615 <dbl+lbl>, R616 <dbl+lbl>, R617 <dbl>, R618 <dbl>,
## # R619 <dbl+lbl>, R620 <dbl+lbl>, R621 <dbl+lbl>, R1101_A <chr>,
## # R1101_B <chr>, R1101_C <chr>, R1101_D <chr>, R1101_E <chr>, …
2. Menyortir berdasarkan 2 Variabel
library(dplyr)
data_sorted211 <- data_ssn %>%
arrange(R101, desc(R610))
#Sort R101 secara naik, dan jika ada nilai yang sama pada R101, sortir 610 dengan menurun.
print(data_sorted211[,c("R101","R610")])
## # A tibble: 48,797 × 2
## R101 R610
## <dbl+lbl> <dbl+lbl>
## 1 11 [Aceh] 3 [Tidak bersekolah lagi]
## 2 11 [Aceh] 3 [Tidak bersekolah lagi]
## 3 11 [Aceh] 3 [Tidak bersekolah lagi]
## 4 11 [Aceh] 3 [Tidak bersekolah lagi]
## 5 11 [Aceh] 3 [Tidak bersekolah lagi]
## 6 11 [Aceh] 3 [Tidak bersekolah lagi]
## 7 11 [Aceh] 3 [Tidak bersekolah lagi]
## 8 11 [Aceh] 3 [Tidak bersekolah lagi]
## 9 11 [Aceh] 3 [Tidak bersekolah lagi]
## 10 11 [Aceh] 3 [Tidak bersekolah lagi]
## # ℹ 48,787 more rows
#Menggunakan order() dari Base R
data_sorted212 <- data_ssn[order(data_ssn$R101, -data_ssn$R407), ]
print(data_sorted212[,c("R101","R407")])
## # A tibble: 48,797 × 2
## R101 R407
## <dbl+lbl> <dbl>
## 1 11 [Aceh] 91
## 2 11 [Aceh] 88
## 3 11 [Aceh] 87
## 4 11 [Aceh] 83
## 5 11 [Aceh] 83
## 6 11 [Aceh] 82
## 7 11 [Aceh] 82
## 8 11 [Aceh] 81
## 9 11 [Aceh] 81
## 10 11 [Aceh] 79
## # ℹ 48,787 more rows
3. Menyortir Berdasarkan 3 atau lebih Variabel
library(dplyr)
data_sorted311 <- data_ssn %>%
arrange(R101, desc(R610), R404)
print(data_sorted311[,c("R101","R610", "R404")])
## # A tibble: 48,797 × 3
## R101 R610 R404
## <dbl+lbl> <dbl+lbl> <dbl+lbl>
## 1 11 [Aceh] 3 [Tidak bersekolah lagi] 1 [Belum kawin]
## 2 11 [Aceh] 3 [Tidak bersekolah lagi] 1 [Belum kawin]
## 3 11 [Aceh] 3 [Tidak bersekolah lagi] 1 [Belum kawin]
## 4 11 [Aceh] 3 [Tidak bersekolah lagi] 1 [Belum kawin]
## 5 11 [Aceh] 3 [Tidak bersekolah lagi] 1 [Belum kawin]
## 6 11 [Aceh] 3 [Tidak bersekolah lagi] 1 [Belum kawin]
## 7 11 [Aceh] 3 [Tidak bersekolah lagi] 1 [Belum kawin]
## 8 11 [Aceh] 3 [Tidak bersekolah lagi] 1 [Belum kawin]
## 9 11 [Aceh] 3 [Tidak bersekolah lagi] 1 [Belum kawin]
## 10 11 [Aceh] 3 [Tidak bersekolah lagi] 1 [Belum kawin]
## # ℹ 48,787 more rows
#Menggunakan order() dari Base R
data_sorted312 <- data_ssn[order(data_ssn$R101, -data_ssn$R401, data_ssn$R407), ]
print(data_sorted312[,c("R101","R401", "R407")])
## # A tibble: 48,797 × 3
## R101 R401 R407
## <dbl+lbl> <dbl> <dbl>
## 1 11 [Aceh] 15 0
## 2 11 [Aceh] 10 6
## 3 11 [Aceh] 10 44
## 4 11 [Aceh] 9 16
## 5 11 [Aceh] 9 34
## 6 11 [Aceh] 8 0
## 7 11 [Aceh] 8 0
## 8 11 [Aceh] 8 1
## 9 11 [Aceh] 8 2
## 10 11 [Aceh] 8 5
## # ℹ 48,787 more rows
Manipulasi data select variables digunakan untuk memilih kolom/variabel.
base
data_selected <- df_ori[, c("var 1","var 2","var 3","...","var-n")]
data_selected <- df_ori[, c(posisi A, posisi B, ..., posisi-n)]
dplyr
library(dplyr)
data_selected1 <- df_ori %>%
select(var1, var2, ..., var-n)
data_selected2 <- df_ori %>%
select(-c(var1, var2, ..., var-n))
data_selected3 <- df_ori %>%
select(starts_with("partname"))
data_selected4 <- df_ori %>%
select(contains("partname"))
Dalam operasi ini bisa dilakukan proses drop and keep variabel yang digunakan.
1. Menyebut Nama-Nama Kolom
Memilih variabel berdasarkan nama kolom/variabel.
# Memilih variabel tertentu
data_selected1 <- data_ssn[, c("R101","R105","R407","FWT","R1102")]
head(data_selected1)
## # A tibble: 6 × 5
## R101 R105 R407 FWT R1102
## <dbl+lbl> <dbl+lbl> <dbl> <dbl> <dbl+lbl>
## 1 18 [Lampung] 2 [Perdesaan] 26 293. 5 [Tidak]
## 2 19 [Kepulauan Bangka Belitung] 1 [Perkotaan] 9 40.5 5 [Tidak]
## 3 16 [Sumatera Selatan] 1 [Perkotaan] 39 521. 5 [Tidak]
## 4 16 [Sumatera Selatan] 1 [Perkotaan] 9 239. 5 [Tidak]
## 5 15 [Jambi] 2 [Perdesaan] 8 277. 5 [Tidak]
## 6 52 [Nusa Tenggara Barat] 2 [Perdesaan] 37 143. 5 [Tidak]
2. Menggunakan Indeks Kolom
Memilih variabel berdasarkan posisi urutan kolom variabel tersebut.
# Memilih variabel berdasarkan indeks kolom
data_selected2 <- data_ssn[, c(1, 2, 3, 5)] # Memilih kolom 1, 2, 3, dan 5
head(data_selected2)
## # A tibble: 6 × 4
## URUT R101 R102 R401
## <dbl> <dbl+lbl> <dbl> <dbl>
## 1 1 18 [Lampung] 7 4
## 2 2 19 [Kepulauan Bangka Belitung] 3 5
## 3 7 16 [Sumatera Selatan] 71 1
## 4 23 16 [Sumatera Selatan] 71 4
## 5 27 15 [Jambi] 5 4
## 6 28 52 [Nusa Tenggara Barat] 6 3
3. Menggunakan Operator select()
Menggunakan fungsi select()
untuk menyeleksi kolom
dengan cara yang lebih efisien dan elegan.
library(dplyr)
data_selected3 <- data_ssn %>%
select(R101, R105, R407, FWT, R1102)
head(data_selected3)
## # A tibble: 6 × 5
## R101 R105 R407 FWT R1102
## <dbl+lbl> <dbl+lbl> <dbl> <dbl> <dbl+lbl>
## 1 18 [Lampung] 2 [Perdesaan] 26 293. 5 [Tidak]
## 2 19 [Kepulauan Bangka Belitung] 1 [Perkotaan] 9 40.5 5 [Tidak]
## 3 16 [Sumatera Selatan] 1 [Perkotaan] 39 521. 5 [Tidak]
## 4 16 [Sumatera Selatan] 1 [Perkotaan] 9 239. 5 [Tidak]
## 5 15 [Jambi] 2 [Perdesaan] 8 277. 5 [Tidak]
## 6 52 [Nusa Tenggara Barat] 2 [Perdesaan] 37 143. 5 [Tidak]
4. Mengecualikan Variabel Tertentu
Mengecualikan variabel tertentu dengan memberi tanda minus (-)
data_selected4 <- data_ssn %>%
select(-MISKIN)
head(data_selected4)
## # A tibble: 6 × 119
## URUT R101 R102 R105 R401 R403 R404 R405 R407 R408 R409
## <dbl> <dbl+lb> <dbl> <dbl+l> <dbl> <dbl+l> <dbl+l> <dbl+l> <dbl> <dbl+l> <dbl>
## 1 1 18 [Lam… 7 2 [Per… 4 5 [Men… 2 [Kaw… 1 [Lak… 26 1 [Ya] 23
## 2 2 19 [Kep… 3 1 [Per… 5 3 [Ana… 1 [Bel… 1 [Lak… 9 NA NA
## 3 7 16 [Sum… 71 1 [Per… 1 1 [Kep… 2 [Kaw… 1 [Lak… 39 1 [Ya] 26
## 4 23 16 [Sum… 71 1 [Per… 4 3 [Ana… 1 [Bel… 2 [Per… 9 NA NA
## 5 27 15 [Jam… 5 2 [Per… 4 3 [Ana… 1 [Bel… 1 [Lak… 8 NA NA
## 6 28 52 [Nus… 6 2 [Per… 3 3 [Ana… 3 [Cer… 2 [Per… 37 NA 28
## # ℹ 108 more variables: R406A <dbl>, R406B <dbl>, R406C <dbl>, R410 <dbl>,
## # R601 <dbl>, R602 <dbl>, R603 <dbl>, R604 <dbl>, R605 <dbl+lbl>,
## # R606 <dbl+lbl>, R607 <dbl+lbl>, R608 <dbl+lbl>, R609 <dbl+lbl>,
## # R610 <dbl+lbl>, R611 <dbl+lbl>, R612 <dbl+lbl>, R613 <dbl+lbl>,
## # R614 <dbl+lbl>, R615 <dbl+lbl>, R616 <dbl+lbl>, R617 <dbl>, R618 <dbl>,
## # R619 <dbl+lbl>, R620 <dbl+lbl>, R621 <dbl+lbl>, R1101_A <chr>,
## # R1101_B <chr>, R1101_C <chr>, R1101_D <chr>, R1101_E <chr>, …
5. Memilih Variabel dengan Pola Nama
# Memilih variabel yang diawali dengan huruf "R10"
data_selected5 <- data_ssn %>%
select(starts_with("R10"))
head(data_selected5)
## # A tibble: 6 × 3
## R101 R102 R105
## <dbl+lbl> <dbl> <dbl+lbl>
## 1 18 [Lampung] 7 2 [Perdesaan]
## 2 19 [Kepulauan Bangka Belitung] 3 1 [Perkotaan]
## 3 16 [Sumatera Selatan] 71 1 [Perkotaan]
## 4 16 [Sumatera Selatan] 71 1 [Perkotaan]
## 5 15 [Jambi] 5 2 [Perdesaan]
## 6 52 [Nusa Tenggara Barat] 6 2 [Perdesaan]
# Memilih variabel yang mengandung kata "R4"
data_selected6 <- data_ssn %>%
select(contains("R4"))
head(data_selected6)
## # A tibble: 6 × 11
## R401 R403 R404 R405 R407 R408 R409 R406A R406B R406C R410
## <dbl> <dbl+lbl> <dbl+l> <dbl+l> <dbl> <dbl+l> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4 5 [Menantu] 2 [Kaw… 1 [Lak… 26 1 [Ya] 23 12 9 1996 2
## 2 5 3 [Anak kan… 1 [Bel… 1 [Lak… 9 NA NA 16 9 2013 1
## 3 1 1 [Kepala r… 2 [Kaw… 1 [Lak… 39 1 [Ya] 26 27 10 1983 2
## 4 4 3 [Anak kan… 1 [Bel… 2 [Per… 9 NA NA 6 2 2014 1
## 5 4 3 [Anak kan… 1 [Bel… 1 [Lak… 8 NA NA 10 1 2015 2
## 6 3 3 [Anak kan… 3 [Cer… 2 [Per… 37 NA 28 1 1 1986 2
base
df_ori[df_ori$varX == 1,] # Misalnya memilih yang berkode 1 dari var X
df_ori[df_ori$varX == 2 | df_ori$varX > 1500,] # Misalnya memilih yang berkode 2 atau >1500 dari var X
dplyr
Select Single
library(dplyr)
df_select <- subset(df_ori, var-n)
df_select <- df_ori %>%
filter(kondisi)
Select Multiple
pilihan <- c(32, 11, 62)
df_select <- df_ori %>%
filter(var-n %in%
pilihan)
1. Menggunakan Operator Subsetting Dasar [ ]
data_jawa_barat <- data_ssn[data_ssn$R101==32, ]
2. Menggunakan subset()
data_jawa_barat <- subset(data_ssn, R101 == 32)
3. Menggunakan filter ()
library(dplyr)
data_jawa_barat <- data_ssn %>%
filter(R101 == 32)
4. Memilih berdasarkan >1 kriteria
data_jabar_bandung <- data_ssn %>%
filter(R101 == 32 & R403 == 1)
data_jawa_barat_tengah <- data_ssn %>%
filter(R101 == 32 | R101 == 33)
1. Menggunakan Operator Subsetting Dasar [ ] dengan %in%
# daftar provinsi yang ingin dipilih
provinsi_pilihan <- c(32, 11, 62)
# Memilih data untuk provinsi-provinsi yang ada di daftar
data_provinsi_pilihan1 <- data_ssn[data_ssn$R101 %in%
provinsi_pilihan, ]
2. Menggunakan fungsi subset()
dengan
%in%
data_provinsi_pilihan2 <- subset(data_ssn, R101 %in%
provinsi_pilihan)
3. Menggunakan dplyr dengan filter()
dan
%in%
library(dplyr)
data_provinsi_pilihan3 <- data_ssn %>%
filter(R101%in%
provinsi_pilihan)
head(data_provinsi_pilihan3, 10)
## # A tibble: 10 × 120
## URUT R101 R102 R105 R401 R403 R404 R405 R407 R408
## <dbl> <dbl+lbl> <dbl> <dbl+l> <dbl> <dbl+l> <dbl+l> <dbl+l> <dbl> <dbl+l>
## 1 172 62 [Kalimant… 71 1 [Per… 3 3 [Ana… 1 [Bel… 1 [Lak… 17 NA
## 2 248 62 [Kalimant… 5 2 [Per… 4 3 [Ana… 1 [Bel… 1 [Lak… 7 NA
## 3 301 62 [Kalimant… 2 1 [Per… 5 3 [Ana… 1 [Bel… 1 [Lak… 9 NA
## 4 360 62 [Kalimant… 9 2 [Per… 1 1 [Kep… 2 [Kaw… 1 [Lak… 47 1 [Ya]
## 5 378 62 [Kalimant… 4 2 [Per… 4 3 [Ana… 1 [Bel… 1 [Lak… 13 NA
## 6 793 62 [Kalimant… 13 1 [Per… 1 1 [Kep… 2 [Kaw… 1 [Lak… 48 1 [Ya]
## 7 1050 62 [Kalimant… 71 1 [Per… 3 3 [Ana… 1 [Bel… 2 [Per… 17 NA
## 8 1340 62 [Kalimant… 7 2 [Per… 6 6 [Cuc… 1 [Bel… 2 [Per… 4 NA
## 9 1384 62 [Kalimant… 8 2 [Per… 2 2 [Ist… 2 [Kaw… 2 [Per… 33 1 [Ya]
## 10 1450 62 [Kalimant… 2 1 [Per… 2 2 [Ist… 2 [Kaw… 2 [Per… 26 1 [Ya]
## # ℹ 110 more variables: R409 <dbl>, R406A <dbl>, R406B <dbl>, R406C <dbl>,
## # R410 <dbl>, R601 <dbl>, R602 <dbl>, R603 <dbl>, R604 <dbl>, R605 <dbl+lbl>,
## # R606 <dbl+lbl>, R607 <dbl+lbl>, R608 <dbl+lbl>, R609 <dbl+lbl>,
## # R610 <dbl+lbl>, R611 <dbl+lbl>, R612 <dbl+lbl>, R613 <dbl+lbl>,
## # R614 <dbl+lbl>, R615 <dbl+lbl>, R616 <dbl+lbl>, R617 <dbl>, R618 <dbl>,
## # R619 <dbl+lbl>, R620 <dbl+lbl>, R621 <dbl+lbl>, R1101_A <chr>,
## # R1101_B <chr>, R1101_C <chr>, R1101_D <chr>, R1101_E <chr>, …
4. Menggunakan Logika OR
library(dplyr)
data_provinsi_pilihan4 <- data_ssn %>%
filter(R101 == 32 | R101 == 33 | R101 == 31)
head(data_provinsi_pilihan4, 10)
## # A tibble: 10 × 120
## URUT R101 R102 R105 R401 R403 R404 R405 R407 R408
## <dbl> <dbl+lbl> <dbl> <dbl+l> <dbl> <dbl+l> <dbl+l> <dbl+l> <dbl> <dbl+l>
## 1 70 31 [DKI Jaka… 71 1 [Per… 5 3 [Ana… 1 [Bel… 1 [Lak… 13 NA
## 2 71 31 [DKI Jaka… 72 1 [Per… 1 1 [Kep… 2 [Kaw… 1 [Lak… 50 1 [Ya]
## 3 244 33 [Jawa Ten… 15 2 [Per… 1 1 [Kep… 2 [Kaw… 1 [Lak… 51 1 [Ya]
## 4 300 33 [Jawa Ten… 25 1 [Per… 2 2 [Ist… 2 [Kaw… 2 [Per… 54 1 [Ya]
## 5 303 33 [Jawa Ten… 75 1 [Per… 3 3 [Ana… 1 [Bel… 2 [Per… 16 NA
## 6 339 31 [DKI Jaka… 72 1 [Per… 1 1 [Kep… 4 [Cer… 2 [Per… 74 NA
## 7 370 31 [DKI Jaka… 72 1 [Per… 2 2 [Ist… 2 [Kaw… 2 [Per… 38 1 [Ya]
## 8 371 33 [Jawa Ten… 2 1 [Per… 2 2 [Ist… 2 [Kaw… 2 [Per… 33 1 [Ya]
## 9 375 33 [Jawa Ten… 27 2 [Per… 4 3 [Ana… 1 [Bel… 2 [Per… 6 NA
## 10 401 33 [Jawa Ten… 14 2 [Per… 5 3 [Ana… 1 [Bel… 2 [Per… 3 NA
## # ℹ 110 more variables: R409 <dbl>, R406A <dbl>, R406B <dbl>, R406C <dbl>,
## # R410 <dbl>, R601 <dbl>, R602 <dbl>, R603 <dbl>, R604 <dbl>, R605 <dbl+lbl>,
## # R606 <dbl+lbl>, R607 <dbl+lbl>, R608 <dbl+lbl>, R609 <dbl+lbl>,
## # R610 <dbl+lbl>, R611 <dbl+lbl>, R612 <dbl+lbl>, R613 <dbl+lbl>,
## # R614 <dbl+lbl>, R615 <dbl+lbl>, R616 <dbl+lbl>, R617 <dbl>, R618 <dbl>,
## # R619 <dbl+lbl>, R620 <dbl+lbl>, R621 <dbl+lbl>, R1101_A <chr>,
## # R1101_B <chr>, R1101_C <chr>, R1101_D <chr>, R1101_E <chr>, …
base
data_filter <- df_ori[kondisi df_ori$var, ]
data_filter <- subset(df_ori, kondisi var)
dplyr
library(dplyr)
df_filter <- df_ori %>%
filter(kondisi)
library(dplyr)
pilihan <- c(32, 11, 62)
df_filter <- df_ori %>%
filter(var-n %in%
pilihan)
Berikut merupakan cara untuk memfilter data menurut kondisi tertentu
1. Menggunakan Subset Operator [ ]
# Memilih variabel umur lebih besar dari 15
data_filtered11 <- data_ssn[data_ssn$R407 >=15, ]
# Memilih variabel tamat pendidikan tertentu
data_filtered12 <- data_ssn[data_ssn$R614>=5 & data_ssn$R614<=7, ]
2. Menggunakan subset()
# Memilih variabel umur lebih besar dari 15
data_filtered21 <- subset(data_ssn, R407 >=15)
# Memilih data dimanavariabel tamat pendidikan tertentu
data_filtered22 <- subset(data_ssn, R614 >= 15 & R614<=17)
# Memilih data dimana umur lebih dari 30 dan pengeluaran lebih dari 5 juta
data_filtered23 <- subset(data_ssn, R407 > 30 & EXPEND > 5000)
3. Menggunakan filter()
library(dplyr)
# Memilih data dimana variabel umur lebih besar dari 30
data_filtered31 <- data_ssn %>%
filter(R407 > 30)
# Memilih data dimana variabel tamat pendidikan tertentu
data_filtered32 <- data_ssn %>%
filter(R614 >= 1 & R614<=4)
# Memilih data dimana umur lebih dari 30 dan pengeluaran lebih dari 5 juta
data_filtered33 <- data_ssn %>%
filter(R407 > 30, EXPEND > 5000000)
4. Menggabungkan Kondisi dengan Operator Logika
# Memilih data dimana umur lebih dari 30 dan tamat pendidikan sama dengan SD/SEDERAJAT
data_filtered41 <- data_ssn %>%
filter(R407 > 30 & R614>=1 & R614<=4)
# Memilih data dimana umur lebih dari 30 atau tamat pendidikan sama dengan SD/SEDERAJAT
data_filtered42 <- data_ssn %>%
filter(R407 > 30 | R614>=1 & R614<=4)
# Memilih data dimana umur tidak lebih dari 30
data_filtered43 <- data_ssn %>%
filter(!(R407 > 30))
Recoding berfungsi untuk membuat nilai baru dari nilai variabel yang sudah ada.
base
df_ori$recode <- ifelse(df_ori$kondisi_var, "Label 1","Label 2")
car
library(car)
recode(df_ori$varA,'kondisi1=nilai1; else=nilai2')
dplyr
library(dplyr)
df_recode <- df_ori %>%
mutate(kelum = case_when(
condition1 ~ value1,
condition2 ~ value2,
TRUE ~ default_value
))
Digunakan untuk membuat nilai baru dari variabel yang sudah ada
Fungsi yang biasa digunakan: fungsi if else(...)
,
recode(...)
, case_when(...)
1. Menggunakan ifelse()
Fungsi ifelse()
adalah fungsi yang sering digunakan
untuk merecode variabel dalam satu baris kode.
data_ssn$educ_recoded <- ifelse(data_ssn$R614 >= 1 & data_ssn$R614 <= 4, "SD",
ifelse(data_ssn$R614 >= 5 & data_ssn$R614 <= 8, "SMP",
ifelse(data_ssn$R614 >= 9, "SMA+", NA)))
table(data_ssn$educ_recoded)
##
## SD SMA+ SMP
## 10648 23945 7471
2. Menggunakan recode()
Jika memiliki nilai yang ingin direkode satu per satu:
library(dplyr)
data_recode1 <- data_ssn %>%
mutate(R610 = as_factor(R610)) %>% # Konversi ke faktor
mutate(pendidikan_recoded = recode(R610,
`1` = 1,
`2` = 2,
`3` = 1))
print(data_recode1)
## # A tibble: 48,797 × 122
## URUT R101 R102 R105 R401 R403 R404 R405 R407 R408
## <dbl> <dbl+lbl> <dbl> <dbl+l> <dbl> <dbl+l> <dbl+l> <dbl+l> <dbl> <dbl+l>
## 1 1 18 [Lampung] 7 2 [Per… 4 5 [Men… 2 [Kaw… 1 [Lak… 26 1 [Ya]
## 2 2 19 [Kepulaua… 3 1 [Per… 5 3 [Ana… 1 [Bel… 1 [Lak… 9 NA
## 3 7 16 [Sumatera… 71 1 [Per… 1 1 [Kep… 2 [Kaw… 1 [Lak… 39 1 [Ya]
## 4 23 16 [Sumatera… 71 1 [Per… 4 3 [Ana… 1 [Bel… 2 [Per… 9 NA
## 5 27 15 [Jambi] 5 2 [Per… 4 3 [Ana… 1 [Bel… 1 [Lak… 8 NA
## 6 28 52 [Nusa Ten… 6 2 [Per… 3 3 [Ana… 3 [Cer… 2 [Per… 37 NA
## 7 51 18 [Lampung] 7 2 [Per… 2 2 [Ist… 2 [Kaw… 2 [Per… 52 1 [Ya]
## 8 51 18 [Lampung] 7 2 [Per… 3 3 [Ana… 1 [Bel… 1 [Lak… 23 NA
## 9 58 21 [Kepulaua… 1 1 [Per… 2 2 [Ist… 2 [Kaw… 2 [Per… 46 1 [Ya]
## 10 61 52 [Nusa Ten… 5 1 [Per… 3 3 [Ana… 1 [Bel… 1 [Lak… 20 NA
## # ℹ 48,787 more rows
## # ℹ 112 more variables: R409 <dbl>, R406A <dbl>, R406B <dbl>, R406C <dbl>,
## # R410 <dbl>, R601 <dbl>, R602 <dbl>, R603 <dbl>, R604 <dbl>, R605 <dbl+lbl>,
## # R606 <dbl+lbl>, R607 <dbl+lbl>, R608 <dbl+lbl>, R609 <dbl+lbl>, R610 <fct>,
## # R611 <dbl+lbl>, R612 <dbl+lbl>, R613 <dbl+lbl>, R614 <dbl+lbl>,
## # R615 <dbl+lbl>, R616 <dbl+lbl>, R617 <dbl>, R618 <dbl>, R619 <dbl+lbl>,
## # R620 <dbl+lbl>, R621 <dbl+lbl>, R1101_A <chr>, R1101_B <chr>, …
3. Menggunakan Paket car
Sama seperti recode()
dari dplyr
,
car
juga untuk recoding langsung.
library(car)
data_ssn <- data.frame(R404 = c("Belum kawin", "Kawin", "Cerai Hidup", "Cerai Mati"))
# Merecode kolom 'R404'
data_ssn$status_recoded <- recode(data_ssn$R404, "'Belum kawin' = 1; 'Kawin' = 2; 'Cerai Hidup' = 3; 'Cerai Mati' = 4")
print(data_ssn$status_recoded)
## [1] 1 2 3 4
4. Menggunakan case_when()
Digunakan jika ingin melakukan merekoding berdasarkan rentang nilai, kondisi logis yang kompleks, atau gabungan dari berbagai kondisi.
Fungsi case_when()
digunakan dalam fungsi
mutate()
dan setiap kondisi dalam case_when()
harus menggunakan formula dua sisi (dengan kondisi di sebelah kiri dan
hasil di sebelah kanan).
library(dplyr)
data_TAMAT <- data_ssn %>%
mutate(TAMAT = case_when(
is.na(R614) ~ 1, # Jika nilai R614 adalah missing (SYSMIS), maka TAMAT = 1
R614 == 25 ~ 1, # Jika R614 = 25, maka TAMAT = 1
R614 >= 1 & R614 <= 5 ~ 2, # Jika R614 antara 1 sampai 5, maka TAMAT = 2
R614 >= 6 & R614 <= 10 ~ 3, # Jika R614 antara 6 sampai 10, maka TAMAT = 3
R614 >= 11 & R614 <= 17 ~ 4, # Jika R614 antara 11 sampai 17, maka TAMAT = 4
R614 >= 18 & R614 <= 24 ~ 5 # Jika R614 antara 18 sampai 24, maka TAMAT = 5
))
head(data_ssn[,c("URUT","TAMAT")],5)
base
names(df_ori)[names(df_ori == "VarA"] <- "VarA_baru"
dplyr
library(dplyr)
data_ssn1 <- rename(df_ori, VarA_baru = VarA)
library(dplyr)
data_ssn2 <- data_ssn %>%
rename(VarA_baru = VarA)
library(dplyr)
data_ssn3 <- data_ssn %>%
rename(VarA_baru = VarA, VarB_baru = VarB)
1. Menggunakan names() atau colnames()
## [1] "URUT" "R101" "R102" "R105" "R401" "R403"
## [7] "R404" "R405" "R407" "R408" "R409" "R406A"
## [13] "R406B" "R406C" "R410" "R601" "R602" "R603"
## [19] "R604" "R605" "R606" "R607" "R608" "R609"
## [25] "R610" "R611" "R612" "R613" "R614" "R615"
## [31] "R616" "R617" "R618" "R619" "R620" "R621"
## [37] "R1101_A" "R1101_B" "R1101_C" "R1101_D" "R1101_E" "R1101_X"
## [43] "R1102" "R1103" "R1104" "R1105" "R1106" "R1107_A"
## [49] "R1107_B" "R1107_C" "R1107_D" "R1107_E" "R1107_F" "R1107_G"
## [55] "R1107_H" "R1108" "R1109_A" "R1109_B" "R1109_C" "R1109_D"
## [61] "R1109_E" "R1109_X" "R1110_A" "R1110_B" "R1110_C" "R1110_D"
## [67] "R1110_E" "R1110_F" "R1110_G" "R1110_H" "R1110_I" "R1201"
## [73] "R1202_A" "R1202_B" "R1202_C" "R1202_D" "R1202_E" "R1202_F"
## [79] "R1202_G" "R1203" "R1204_A" "R1204_B" "R1204_C" "R1204_D"
## [85] "R1204_E" "R1204_X" "R1205_A" "R1205_B" "R1205_C" "R1205_D"
## [91] "R1205_E" "R1205_F" "R1205_G" "R1205_H" "R1205_I" "R1206"
## [97] "R1207" "R1208" "R1209" "FWT" "R301" "R302"
## [103] "R303" "R304" "R305" "EXPEND" "KAPITA" "WERT"
## [109] "WEIND" "NKS" "NURT" "GK" "MISKIN" "MISKINWB"
## [115] "KABU" "Z101" "Z102" "zkabu"
# Mengganti nama kolom
names(data_ssn)[names(data_ssn) == "R101"] <- "PROV"
names(data_ssn)
## [1] "URUT" "PROV" "R102" "R105" "R401" "R403"
## [7] "R404" "R405" "R407" "R408" "R409" "R406A"
## [13] "R406B" "R406C" "R410" "R601" "R602" "R603"
## [19] "R604" "R605" "R606" "R607" "R608" "R609"
## [25] "R610" "R611" "R612" "R613" "R614" "R615"
## [31] "R616" "R617" "R618" "R619" "R620" "R621"
## [37] "R1101_A" "R1101_B" "R1101_C" "R1101_D" "R1101_E" "R1101_X"
## [43] "R1102" "R1103" "R1104" "R1105" "R1106" "R1107_A"
## [49] "R1107_B" "R1107_C" "R1107_D" "R1107_E" "R1107_F" "R1107_G"
## [55] "R1107_H" "R1108" "R1109_A" "R1109_B" "R1109_C" "R1109_D"
## [61] "R1109_E" "R1109_X" "R1110_A" "R1110_B" "R1110_C" "R1110_D"
## [67] "R1110_E" "R1110_F" "R1110_G" "R1110_H" "R1110_I" "R1201"
## [73] "R1202_A" "R1202_B" "R1202_C" "R1202_D" "R1202_E" "R1202_F"
## [79] "R1202_G" "R1203" "R1204_A" "R1204_B" "R1204_C" "R1204_D"
## [85] "R1204_E" "R1204_X" "R1205_A" "R1205_B" "R1205_C" "R1205_D"
## [91] "R1205_E" "R1205_F" "R1205_G" "R1205_H" "R1205_I" "R1206"
## [97] "R1207" "R1208" "R1209" "FWT" "R301" "R302"
## [103] "R303" "R304" "R305" "EXPEND" "KAPITA" "WERT"
## [109] "WEIND" "NKS" "NURT" "GK" "MISKIN" "MISKINWB"
## [115] "KABU" "Z101" "Z102" "zkabu"
#Hasil names() = Hasil colnames()
colnames(data_ssn)
## [1] "URUT" "PROV" "R102" "R105" "R401" "R403"
## [7] "R404" "R405" "R407" "R408" "R409" "R406A"
## [13] "R406B" "R406C" "R410" "R601" "R602" "R603"
## [19] "R604" "R605" "R606" "R607" "R608" "R609"
## [25] "R610" "R611" "R612" "R613" "R614" "R615"
## [31] "R616" "R617" "R618" "R619" "R620" "R621"
## [37] "R1101_A" "R1101_B" "R1101_C" "R1101_D" "R1101_E" "R1101_X"
## [43] "R1102" "R1103" "R1104" "R1105" "R1106" "R1107_A"
## [49] "R1107_B" "R1107_C" "R1107_D" "R1107_E" "R1107_F" "R1107_G"
## [55] "R1107_H" "R1108" "R1109_A" "R1109_B" "R1109_C" "R1109_D"
## [61] "R1109_E" "R1109_X" "R1110_A" "R1110_B" "R1110_C" "R1110_D"
## [67] "R1110_E" "R1110_F" "R1110_G" "R1110_H" "R1110_I" "R1201"
## [73] "R1202_A" "R1202_B" "R1202_C" "R1202_D" "R1202_E" "R1202_F"
## [79] "R1202_G" "R1203" "R1204_A" "R1204_B" "R1204_C" "R1204_D"
## [85] "R1204_E" "R1204_X" "R1205_A" "R1205_B" "R1205_C" "R1205_D"
## [91] "R1205_E" "R1205_F" "R1205_G" "R1205_H" "R1205_I" "R1206"
## [97] "R1207" "R1208" "R1209" "FWT" "R301" "R302"
## [103] "R303" "R304" "R305" "EXPEND" "KAPITA" "WERT"
## [109] "WEIND" "NKS" "NURT" "GK" "MISKIN" "MISKINWB"
## [115] "KABU" "Z101" "Z102" "zkabu"
# Mengganti nama kolom dengan colnames
#colnames(data_ssn)[colnames(data_ssn) == "R102"] <- "KAB_KOTA"
#colnames(data_ssn)
2. Menggunakan rename()
library(dplyr)
data_ssnA <- rename(data_ssn, HUB = R403)
names(data_ssnA)
## [1] "URUT" "PROV" "R102" "R105" "R401" "HUB"
## [7] "R404" "R405" "R407" "R408" "R409" "R406A"
## [13] "R406B" "R406C" "R410" "R601" "R602" "R603"
## [19] "R604" "R605" "R606" "R607" "R608" "R609"
## [25] "R610" "R611" "R612" "R613" "R614" "R615"
## [31] "R616" "R617" "R618" "R619" "R620" "R621"
## [37] "R1101_A" "R1101_B" "R1101_C" "R1101_D" "R1101_E" "R1101_X"
## [43] "R1102" "R1103" "R1104" "R1105" "R1106" "R1107_A"
## [49] "R1107_B" "R1107_C" "R1107_D" "R1107_E" "R1107_F" "R1107_G"
## [55] "R1107_H" "R1108" "R1109_A" "R1109_B" "R1109_C" "R1109_D"
## [61] "R1109_E" "R1109_X" "R1110_A" "R1110_B" "R1110_C" "R1110_D"
## [67] "R1110_E" "R1110_F" "R1110_G" "R1110_H" "R1110_I" "R1201"
## [73] "R1202_A" "R1202_B" "R1202_C" "R1202_D" "R1202_E" "R1202_F"
## [79] "R1202_G" "R1203" "R1204_A" "R1204_B" "R1204_C" "R1204_D"
## [85] "R1204_E" "R1204_X" "R1205_A" "R1205_B" "R1205_C" "R1205_D"
## [91] "R1205_E" "R1205_F" "R1205_G" "R1205_H" "R1205_I" "R1206"
## [97] "R1207" "R1208" "R1209" "FWT" "R301" "R302"
## [103] "R303" "R304" "R305" "EXPEND" "KAPITA" "WERT"
## [109] "WEIND" "NKS" "NURT" "GK" "MISKIN" "MISKINWB"
## [115] "KABU" "Z101" "Z102" "zkabu"
library(dplyr)
data_ssn1 <- data_ssn %>%
rename(Sex = R405)
names(data_ssn1)
## [1] "URUT" "PROV" "R102" "R105" "R401" "R403"
## [7] "R404" "Sex" "R407" "R408" "R409" "R406A"
## [13] "R406B" "R406C" "R410" "R601" "R602" "R603"
## [19] "R604" "R605" "R606" "R607" "R608" "R609"
## [25] "R610" "R611" "R612" "R613" "R614" "R615"
## [31] "R616" "R617" "R618" "R619" "R620" "R621"
## [37] "R1101_A" "R1101_B" "R1101_C" "R1101_D" "R1101_E" "R1101_X"
## [43] "R1102" "R1103" "R1104" "R1105" "R1106" "R1107_A"
## [49] "R1107_B" "R1107_C" "R1107_D" "R1107_E" "R1107_F" "R1107_G"
## [55] "R1107_H" "R1108" "R1109_A" "R1109_B" "R1109_C" "R1109_D"
## [61] "R1109_E" "R1109_X" "R1110_A" "R1110_B" "R1110_C" "R1110_D"
## [67] "R1110_E" "R1110_F" "R1110_G" "R1110_H" "R1110_I" "R1201"
## [73] "R1202_A" "R1202_B" "R1202_C" "R1202_D" "R1202_E" "R1202_F"
## [79] "R1202_G" "R1203" "R1204_A" "R1204_B" "R1204_C" "R1204_D"
## [85] "R1204_E" "R1204_X" "R1205_A" "R1205_B" "R1205_C" "R1205_D"
## [91] "R1205_E" "R1205_F" "R1205_G" "R1205_H" "R1205_I" "R1206"
## [97] "R1207" "R1208" "R1209" "FWT" "R301" "R302"
## [103] "R303" "R304" "R305" "EXPEND" "KAPITA" "WERT"
## [109] "WEIND" "NKS" "NURT" "GK" "MISKIN" "MISKINWB"
## [115] "KABU" "Z101" "Z102" "zkabu"
1. Mengganti Nama Banyak Kolom Sekaligus
names(data_ssn)[names(data_ssn) %in% c("R403", "R404")] <- c("HB_KRT", "STATUS_KWN")
2. Menggunakan rename()
library(dplyr)
data_ssn3 <- data_ssn %>%
rename(SEKO = R610, JENJANG = R611)
names(data_ssn3)
## [1] "URUT" "PROV" "R102" "R105" "R401"
## [6] "HB_KRT" "STATUS_KWN" "R405" "R407" "R408"
## [11] "R409" "R406A" "R406B" "R406C" "R410"
## [16] "R601" "R602" "R603" "R604" "R605"
## [21] "R606" "R607" "R608" "R609" "SEKO"
## [26] "JENJANG" "R612" "R613" "R614" "R615"
## [31] "R616" "R617" "R618" "R619" "R620"
## [36] "R621" "R1101_A" "R1101_B" "R1101_C" "R1101_D"
## [41] "R1101_E" "R1101_X" "R1102" "R1103" "R1104"
## [46] "R1105" "R1106" "R1107_A" "R1107_B" "R1107_C"
## [51] "R1107_D" "R1107_E" "R1107_F" "R1107_G" "R1107_H"
## [56] "R1108" "R1109_A" "R1109_B" "R1109_C" "R1109_D"
## [61] "R1109_E" "R1109_X" "R1110_A" "R1110_B" "R1110_C"
## [66] "R1110_D" "R1110_E" "R1110_F" "R1110_G" "R1110_H"
## [71] "R1110_I" "R1201" "R1202_A" "R1202_B" "R1202_C"
## [76] "R1202_D" "R1202_E" "R1202_F" "R1202_G" "R1203"
## [81] "R1204_A" "R1204_B" "R1204_C" "R1204_D" "R1204_E"
## [86] "R1204_X" "R1205_A" "R1205_B" "R1205_C" "R1205_D"
## [91] "R1205_E" "R1205_F" "R1205_G" "R1205_H" "R1205_I"
## [96] "R1206" "R1207" "R1208" "R1209" "FWT"
## [101] "R301" "R302" "R303" "R304" "R305"
## [106] "EXPEND" "KAPITA" "WERT" "WEIND" "NKS"
## [111] "NURT" "GK" "MISKIN" "MISKINWB" "KABU"
## [116] "Z101" "Z102" "zkabu"
Untuk melakukan agregasi, digunakan fungsi summarize()
dan group_by ()
Ingin melihat hasil agregasi menurut provinsi:
aggr1 <- data_ssn %>%
group_by(R101) %>%
summarize(mean_theta = mean(KAPITA, na.rm = TRUE))
print(aggr1)
## # A tibble: 34 × 2
## R101 mean_theta
## <dbl+lbl> <dbl>
## 1 11 [Aceh] 1298305.
## 2 12 [Sumatera Utara] 1260795.
## 3 13 [Sumatera Barat] 1425005.
## 4 14 [Riau] 1536939.
## 5 15 [Jambi] 1470432.
## 6 16 [Sumatera Selatan] 1196369.
## 7 17 [Bengkulu] 1324300.
## 8 18 [Lampung] 1170050.
## 9 19 [Kepulauan Bangka Belitung] 1822676.
## 10 21 [Kepulauan Riau] 1799927.
## # ℹ 24 more rows
Ingin melihat hasil agregasi menurut provinsi dan tipe daerah:
aggr2 <- data_ssn %>%
group_by(R101, R105) %>%
summarize(mean_theta = mean(KAPITA, na.rm = TRUE))
print(aggr2)
## # A tibble: 67 × 3
## # Groups: R101 [34]
## R101 R105 mean_theta
## <dbl+lbl> <dbl+lbl> <dbl>
## 1 11 [Aceh] 1 [Perkotaan] 1607555.
## 2 11 [Aceh] 2 [Perdesaan] 1161717.
## 3 12 [Sumatera Utara] 1 [Perkotaan] 1482937.
## 4 12 [Sumatera Utara] 2 [Perdesaan] 1088072.
## 5 13 [Sumatera Barat] 1 [Perkotaan] 1683709.
## 6 13 [Sumatera Barat] 2 [Perdesaan] 1225497.
## 7 14 [Riau] 1 [Perkotaan] 1811815.
## 8 14 [Riau] 2 [Perdesaan] 1386664.
## 9 15 [Jambi] 1 [Perkotaan] 1797469.
## 10 15 [Jambi] 2 [Perdesaan] 1341387.
## # ℹ 57 more rows
tab <- xtabs(~ R101 + R105 + R405, data = data_ssn)
print(tab)
## , , R405 = 1
##
## R105
## R101 1 2
## 11 295 685
## 12 670 885
## 13 395 519
## 14 200 425
## 15 140 350
## 16 266 551
## 17 129 286
## 18 203 563
## 19 138 139
## 21 225 88
## 31 357 0
## 32 1162 564
## 33 1026 891
## 34 156 65
## 35 1099 950
## 36 370 173
## 51 285 203
## 52 188 266
## 53 181 877
## 61 166 436
## 62 172 396
## 63 220 305
## 64 277 174
## 65 111 103
## 71 240 377
## 72 105 422
## 73 377 726
## 74 152 505
## 75 70 181
## 76 55 204
## 81 168 381
## 82 85 320
## 91 110 425
## 94 210 939
##
## , , R405 = 2
##
## R105
## R101 1 2
## 11 307 678
## 12 721 904
## 13 397 508
## 14 239 378
## 15 137 352
## 16 256 551
## 17 114 216
## 18 215 490
## 19 137 136
## 21 212 71
## 31 371 0
## 32 1144 545
## 33 1111 905
## 34 178 84
## 35 1081 959
## 36 345 186
## 51 315 198
## 52 192 278
## 53 178 823
## 61 170 447
## 62 182 346
## 63 237 291
## 64 253 158
## 65 118 94
## 71 223 355
## 72 138 426
## 73 406 772
## 74 146 595
## 75 71 174
## 76 51 217
## 81 160 369
## 82 104 319
## 91 121 460
## 94 204 901
# Menghitung frekuensi untuk kombinasi variabel
tab1 <- data_ssn %>%
count(R101, R105, R405)
print(tab1)
## # A tibble: 134 × 4
## R101 R105 R405 n
## <dbl+lbl> <dbl+lbl> <dbl+lbl> <int>
## 1 11 [Aceh] 1 [Perkotaan] 1 [Laki-laki] 295
## 2 11 [Aceh] 1 [Perkotaan] 2 [Perempuan] 307
## 3 11 [Aceh] 2 [Perdesaan] 1 [Laki-laki] 685
## 4 11 [Aceh] 2 [Perdesaan] 2 [Perempuan] 678
## 5 12 [Sumatera Utara] 1 [Perkotaan] 1 [Laki-laki] 670
## 6 12 [Sumatera Utara] 1 [Perkotaan] 2 [Perempuan] 721
## 7 12 [Sumatera Utara] 2 [Perdesaan] 1 [Laki-laki] 885
## 8 12 [Sumatera Utara] 2 [Perdesaan] 2 [Perempuan] 904
## 9 13 [Sumatera Barat] 1 [Perkotaan] 1 [Laki-laki] 395
## 10 13 [Sumatera Barat] 1 [Perkotaan] 2 [Perempuan] 397
## # ℹ 124 more rows
library(dplyr)
library(tidyr)
library(gt)
# Buat tabel agregat
tabel <- data_ssn %>%
group_by(R101, R405) %>% #Perhatikan urutan. Mempengaruhi posisi pada tabel
summarise(Mean_theta = mean(KAPITA, na.rm = TRUE), .groups = "drop")
# Pivot tabel agar sesuai dengan format tabel kustom (opsional, jika diperlukan format lebar)
tabel_wide <- tabel %>%
pivot_wider(names_from = R405, values_from = Mean_theta)
# 3. Tambahkan kolom total baris
tabel_wide <- tabel_wide %>%
mutate(Total_Baris = rowSums(select(., where(is.numeric)), na.rm = TRUE))
# Membuat tabel dengan gt
tabel_gt <- tabel_wide %>%
gt() %>%
tab_header(
title = "Tabel 1. Rata-rata Kapita menurut Provinsi dan Jenis Kelamin, 2023",
subtitle = "Sumber: Data 2023"
) %>%
cols_label(
R101 = "Provinsi",
Total_Baris = "Total"
)
# Menyimpan tabel ke file HTML
gtsave(tabel_gt, "tabel 2 arah.html")
library(writexl)
write_xlsx(tabel_wide, "tabel_2_arah.xlsx")
library(dplyr)
library(tidyr)
library(gt)
# Buat tabel agregat
tabel <- data_ssn %>%
group_by(R101, KABU, R405) %>% #Perhatikan urutan. Mempengaruhi posisi pada tabel
summarise(Mean_theta = mean(KAPITA, na.rm = TRUE), .groups = "drop")
# Pivot tabel agar sesuai dengan format tabel kustom (opsional, jika diperlukan format lebar)
tabel_wide <- tabel %>%
pivot_wider(names_from = R405, values_from = Mean_theta)
# 3. Tambahkan kolom total baris
tabel_wide <- tabel_wide %>%
mutate(Total_Baris = rowSums(select(., where(is.numeric)), na.rm = TRUE))
# Membuat tabel dengan gt
tabel_gt <- tabel_wide %>%
gt() %>%
tab_header(
title = "Tabel 1. Rata-rata Kapita menurut Provinsi, Kabupaten/kota dan Jenis Kelamin, 2023",
subtitle = "Sumber: Data 2023"
) %>%
cols_label(
KABU = "Kabupaten/Kota",
R101 = "Provinsi",
Total_Baris = "Total"
)
# Menyimpan tabel ke file HTML
gtsave(tabel_gt, "tabel 3 arah.html")
library(writexl)
write_xlsx(tabel_wide, "tabel_3_arah.xlsx")
https://rpubs.com/kaz_yos/rboot02
Direktorat Statistik Kesejahteraan Rakyat, BPS, saptahas@bps.go.id