colSums(is.na(df_ori))
sum(is.na(df_ori$varA))
Membuang variabel NA
df_ori_clean <- na.omit(df_ori)
Mengubah variabel tertentu menjadi NA
df_ori$varA[df_ori$varA == 8888] <- NA
base
df_ori$var_baru <- ekspresi
df_ori[,"var_baru"] <- ekspresi
dplyr
library(dplyr)
df_baru <- df_ori %>%
mutate(varBaru = ifelse(cond, "Lab1", "Lab2"))
print(df_baru[,c("varBaru")])
base
df_asc <- df_ori[order(df_ori$varA, decreasing = F), ]
df_desc <- df_ori[order(df_ori$varA, decreasing = T), ]
dplyr
library(dplyr)
df_sort <- df_ori %>%
arrange(varA) #Ascending
library(dplyr)
df_sort <- df_ori %>%
arrange(desc(varA) #Descending
base
data_selected <- df_ori[, c("var1","...","varN")]
data_selected <- df_ori[, c(posisiA, ..., posisiN)]
dplyr
library(dplyr)
data_selected1 <- df_ori %>%
select(var1, var2, ..., var-n)
data_selected2 <- df_ori %>%
select(-c(var1, var2, ..., var-n))
data_selected3 <- df_ori %>%
select(starts_with("partname"))
data_selected4 <- df_ori %>%
select(contains("partname"))
base
Misalnya memilih yang berkode 1 dari var X
df_ori[df_ori$varX == 1,]
Misalnya memilih yang berkode 2 atau >1500 dari var X
df_ori[df_ori$varX == 2 | df_ori$varX > 1500,]
dplyr
Select Single
library(dplyr)
df_select <- subset(df_ori, var-n)
df_select <- df_ori %>%
filter(kondisi)
Select Multiple
pilihan <- c(32, 11, 62)
df_select <- df_ori %>%
filter(var-n %in%
pilihan)
base
data_filter <- df_ori[kondisi df_ori$var, ]
data_filter <- subset(df_ori, kondisi var)
dplyr
library(dplyr)
df_filter <- df_ori %>%
filter(kondisi)
library(dplyr)
pilihan <- c(32, 11, 62)
df_filter <- df_ori %>%
filter(var-n %in%
pilihan)
Recoding berfungsi untuk membuat nilai baru dari nilai variabel yang sudah ada.
base
df_ori$rec <- ifelse(df_ori$kondisi_var,"Lab1","Lab2")
car
library(car)
recode(df_ori$varA,'kondisi1=nilai1; else=nilai2')
dplyr
library(dplyr)
df_recode <- df_ori %>%
mutate(kelum = case_when(
condition1 ~ value1,
condition2 ~ value2,
TRUE ~ default_value
))
base
names(df_ori)[names(df_ori == "VarA"] <- "VarA_baru"
dplyr
library(dplyr)
data_ssn1 <- rename(df_ori, VarA_baru = VarA)
library(dplyr)
data_ssn2 <- data_ssn %>%
rename(VarA_baru = VarA)
library(dplyr)
data_ssn3 <- data_ssn %>%
rename(VarA_baru = VarA, VarB_baru = VarB)
aggr1 <- df_ori %>%
group_by(varA) %>%
summarize(mean_theta = mean(varX, na.rm = TRUE))
print(aggr1)
aggr2 <- df_ori %>%
group_by(varA, var B) %>%
summarize(mean_theta = mean(varX, na.rm = TRUE))
print(aggr2)
Direktorat Statistik Kesejahteraan Rakyat, BPS, saptahas@bps.go.id