Cleaning Data
# Mengganti String Kosong menjadi NA
data[data == ""] <- NA
nrow(data)
## [1] 3019
# Menghapus Baris Jika Rating bernilai NA
data1 <- data %>%
filter(!is.na(Rating))
nrow(data1)
## [1] 2651
# Menghapus Baris Jika Harga bernilai NA
data2 <- data1 %>%
filter(!is.na(Harga))
nrow(data2)
## [1] 2649
# Menghapus Baris Jika Kota bernilai NA dan Show on Map
data3 <- data2 %>%
filter(!is.na(Kota) & Kota != "Show on Map")
View(data3)
nrow(data3)
## [1] 1813
# Mengubah Data sesuai Kata Kunci pada Kolom Room
data4 <- data3 %>%
mutate(
Room = ifelse(grepl("apartment", Room, ignore.case = TRUE), "Apartment", Room),
Room = ifelse(grepl("villa|vila|house|home|rumah", Room, ignore.case = TRUE), "Villa", Room),
Room = ifelse(grepl("superior deluxe|suite|premier|premiere|first class", Room, ignore.case = TRUE), "Superior Deluxe Room", Room),
Room = ifelse(grepl("superior|royal|executive", Room, ignore.case = TRUE), "Superior Room", Room),
Room = ifelse(grepl("deluxe|premium|exclusive", Room, ignore.case = TRUE), "Deluxe Room", Room),
Room = ifelse(grepl("studio", Room, ignore.case = TRUE), "Studio", Room),
Room = ifelse(grepl("family", Room, ignore.case = TRUE), "Family Room", Room),
Room = ifelse(grepl(("twin|double|standar|classic|pavilion|simple|singly|single"), Room, ignore.case = TRUE), "Standard Room", Room),
)
# Menampilkan hasil
View(data4)
# Filter Kolom Room
room_types <- c("Apartment", "Villa", "Superior Deluxe Room", "Superior Room",
"Deluxe Room", "Studio", "Family Room", "Standard Room")
# Menghapus baris yang tidak sesuai dengan daftar room_types
data5 <- data4 %>%
filter(Room %in% room_types)
# Menampilkan hasil
View(data5)
nrow(data5)
## [1] 1630
# Menghapus Baris Jika Bintang bernilai NA
data6 <- data5 %>%
filter(!is.na(Bintang))
nrow(data5)
## [1] 1630
# Menghapus Baris Jika Review bernilai NA
data7 <- data6 %>%
filter(!is.na(Review))
nrow(data7)
## [1] 1284
View(data7)
# Mengubah Data sesuai Kata Kunci pada Kolom Tag
data8 <- data7 %>%
mutate(
Tag = ifelse(grepl("4-hour Cancellation Window", Tag, ignore.case = TRUE), "Free Cancellation", Tag),
Tag = ifelse(grepl("Breakfast & dinner included", Tag, ignore.case = TRUE), "Breakfast included", Tag),
)
# Mengubah Data NA pada Kolom Tag
data9 <- data8 %>%
mutate(Tag = ifelse(is.na(Tag), "Nothing", Tag))
View(data9)
data9 %>%
count(`Bintang`)
## # A tibble: 5 × 2
## Bintang n
## <chr> <int>
## 1 Amazing 165
## 2 Good 117
## 3 Great 282
## 4 Outstanding 79
## 5 Very Good 641
data9 %>%
count(`Kota`)
## # A tibble: 120 × 2
## Kota n
## <chr> <int>
## 1 Andir 1
## 2 Arjuna 3
## 3 Babakan Ciamis 3
## 4 Babakan Penghulu 1
## 5 Bandarharjo 1
## 6 Bandung City 58
## 7 Bandung Regency 4
## 8 Bandung Wetan 13
## 9 Bandungan 4
## 10 Banyukuning 2
## # ℹ 110 more rows
data9 %>%
count(`Room`)
## # A tibble: 7 × 2
## Room n
## <chr> <int>
## 1 Apartment 12
## 2 Deluxe Room 347
## 3 Family Room 7
## 4 Standard Room 281
## 5 Studio 28
## 6 Superior Room 448
## 7 Villa 161
data9 %>%
count(`Tag`)
## # A tibble: 3 × 2
## Tag n
## <chr> <int>
## 1 Breakfast included 238
## 2 Free Cancellation 693
## 3 Nothing 353