1 Check Missing Values


colSums(is.na(df_ori))

sum(is.na(df_ori$varA))

Membuang variabel NA
df_ori_clean <- na.omit(df_ori)

Mengubah variabel tertentu menjadi NA
df_ori$varA[df_ori$varA == 8888] <- NA

2 Making New Variable


base
df_ori$var_baru <- ekspresi

df_ori[,"var_baru"] <- ekspresi

dplyr
library(dplyr)
df_baru <- df_ori %>%
  mutate(varBaru = ifelse(cond, "Lab1", "Lab2"))
print(df_baru[,c("varBaru")])

3 Sort Cases


base
df_asc <- df_ori[order(df_ori$varA, decreasing = F), ]

df_desc <- df_ori[order(df_ori$varA, decreasing = T), ]

dplyr
library(dplyr)
df_sort <- df_ori %>%
                  arrange(varA) #Ascending
library(dplyr)
df_sort <- df_ori %>%
                  arrange(desc(varA) #Descending

4 Select Variables


base
data_selected <- df_ori[, c("var1","...","varN")]

data_selected <- df_ori[, c(posisiA, ..., posisiN)]

dplyr
library(dplyr)
data_selected1 <- df_ori %>% 
                  select(var1, var2, ..., var-n)
                  
data_selected2 <- df_ori %>% 
                  select(-c(var1, var2, ..., var-n))    

data_selected3 <- df_ori %>% 
                  select(starts_with("partname"))
                  
data_selected4 <- df_ori %>% 
                  select(contains("partname"))

5 Select Cases


base
Misalnya memilih yang berkode 1 dari var X
df_ori[df_ori$varX == 1,] 

Misalnya memilih yang berkode 2 atau >1500 dari var X
df_ori[df_ori$varX == 2 | df_ori$varX > 1500,]

dplyr
Select Single
library(dplyr)
df_select <- subset(df_ori, var-n)

df_select <- df_ori %>%
                   filter(kondisi)

Select Multiple
pilihan <- c(32, 11, 62)
df_select <- df_ori %>%
                    filter(var-n %in% 
                      pilihan)

6 Filter Data


base
data_filter <- df_ori[kondisi df_ori$var, ]

data_filter <- subset(df_ori, kondisi var)

dplyr
library(dplyr)
df_filter <- df_ori %>%
                   filter(kondisi)
library(dplyr)
pilihan <- c(32, 11, 62)
df_filter <- df_ori %>%
                    filter(var-n %in% 
                      pilihan)

7 Recode Variables

Recoding berfungsi untuk membuat nilai baru dari nilai variabel yang sudah ada.


base
df_ori$rec <- ifelse(df_ori$kondisi_var,"Lab1","Lab2")

car
library(car)
recode(df_ori$varA,'kondisi1=nilai1; else=nilai2')

dplyr
library(dplyr)
df_recode <- df_ori %>%
  mutate(kelum = case_when(
    condition1 ~ value1,
    condition2 ~ value2,
    TRUE ~ default_value
  ))

8 Rename Columns


base
names(df_ori)[names(df_ori == "VarA"] <- "VarA_baru"

dplyr
library(dplyr)
data_ssn1 <- rename(df_ori, VarA_baru = VarA)

library(dplyr)
data_ssn2 <- data_ssn %>% 
                rename(VarA_baru = VarA)

library(dplyr)
data_ssn3 <- data_ssn %>% 
             rename(VarA_baru = VarA, VarB_baru = VarB)

9 Aggregating


aggr1 <- df_ori %>%
    group_by(varA) %>%
    summarize(mean_theta = mean(varX, na.rm = TRUE))
print(aggr1)

aggr2 <- df_ori %>%
    group_by(varA, var B) %>%
    summarize(mean_theta = mean(varX, na.rm = TRUE))
print(aggr2)



Direktorat Statistik Kesejahteraan Rakyat, BPS,