Data cleaning
- Selecting variables that’ll be used
clothes <- clothes %>%
select(produk_name, produk_harga, produk_kuantitas,
produk_penilaian, produk_bintang, produk_toko, produk_pengikut)
- Remove string from some numerical variables
clothes <- clothes %>%
mutate(
kuantitas = case_when(
str_detect(produk_kuantitas, ",") ~
str_replace_all(produk_kuantitas, c("RB" = "00", "Terjual" = "", "," = "")),
!str_detect(produk_kuantitas, ",") ~
str_replace_all(produk_kuantitas, c("RB" = "000", "Terjual" = ""))
),
penilaian = case_when(
str_detect(produk_penilaian, ",") ~
str_replace_all(produk_penilaian, c("RB" = "00", "," = "")),
!str_detect(produk_penilaian, ",") ~
str_replace_all(produk_penilaian, "RB", "000")
),
pengikut = case_when(
str_detect(produk_pengikut, ",") ~
str_replace_all(produk_pengikut, c("RB" = "00", "," = "", "JT" = "00000")),
!str_detect(produk_pengikut, ",") ~
str_replace_all(produk_pengikut, "RB", "000")
),
harga = str_replace_all(produk_harga, c("Rp" = "", "-" = " ")),
harga = word(harga, 1),
harga = gsub(".", "", harga, fixed = TRUE)
)
- Selecting variables that already cleaned and convert it’s data type
clothes <- clothes %>%
select(produk_name, produk_toko, produk_bintang, kuantitas, penilaian, pengikut, harga) %>%
rename(nama_produk = produk_name,
penjual = produk_toko,
bintang = produk_bintang) %>%
mutate(kuantitas = as.numeric(kuantitas),
penilaian = as.numeric(penilaian),
pengikut = as.numeric(pengikut),
harga = as.numeric(harga),
penjual = as.factor(penjual)
)
clothes$pengikut <- as.character(clothes$pengikut)
clothes$pengikut[clothes$pengikut == "738400"] <- "738500"
clothes$pengikut <- as.numeric(clothes$pengikut)
clothes %>%
map(~ sum(n_distinct(.)))
## $nama_produk
## [1] 187
##
## $penjual
## [1] 127
##
## $bintang
## [1] 10
##
## $kuantitas
## [1] 137
##
## $penilaian
## [1] 158
##
## $pengikut
## [1] 119
##
## $harga
## [1] 149
clothes %>%
map(~ sum(is.na(.)))
## $nama_produk
## [1] 0
##
## $penjual
## [1] 0
##
## $bintang
## [1] 1
##
## $kuantitas
## [1] 0
##
## $penilaian
## [1] 1
##
## $pengikut
## [1] 0
##
## $harga
## [1] 0
follower <- clothes %>%
select(penjual, pengikut) %>%
distinct(penjual, pengikut) %>%
arrange(desc(pengikut)) %>%
top_n(10) %>%
mutate(penjual = fct_reorder(penjual, desc(pengikut)))
## Selecting by pengikut
hchart(
follower,
"bar",
hcaes(x = penjual, y = pengikut),
name = "pengikut",
dataLabels = list(enabled = TRUE)
) %>%
hc_title(text = "10 Toko Online Dengan Follower Terbanyak") %>%
hc_add_theme(hc_theme_sandsignika())
clothes %>%
select(penjual, kuantitas) %>%
group_by(penjual) %>%
summarize(kuantitas_penjualan = sum(kuantitas)) %>%
arrange(desc(kuantitas_penjualan)) %>%
top_n(10) %>%
hchart(
type = "treemap",
hcaes(x = penjual, value = kuantitas_penjualan, color = kuantitas_penjualan)) %>%
hc_colorAxis(stops = color_stops(colors = viridis::plasma(10))) %>%
hc_title(text = "Toko Online Dengan Kuantitas Penjualan Terbanyak")
## `summarise()` ungrouping output (override with `.groups` argument)
## Selecting by kuantitas_penjualan
revenue <- clothes %>%
select(penjual, kuantitas, harga) %>%
group_by(penjual) %>%
summarize(revenue = sum(kuantitas * harga)) %>%
arrange(desc(revenue)) %>%
top_n(10)
## `summarise()` ungrouping output (override with `.groups` argument)
## Selecting by revenue
hchart(
revenue,
type = "bar",
hcaes(x = penjual, y = revenue),
name = "revenue",
dataLabels = list(enabled = TRUE)
) %>%
hc_add_theme(hc_theme_flat()) %>%
hc_title(text = "10 Toko Online Dengan Revenue Terbesar")
clothes %>%
select(nama_produk, penjual, kuantitas) %>%
group_by(nama_produk) %>%
arrange(desc(kuantitas)) %>%
head(10) %>%
#ggplot(aes(x = reorder(str_wrap(nama_produk, 40), kuantitas), y = kuantitas, fill = penjual)) +
#geom_bar(stat = "identity") +
#coord_flip() +
#theme(legend.position = "bottom") +
#scale_y_continuous(labels = comma) +
#geom_text(aes(label = scales::comma(kuantitas)), hjust=1.6, color="white", size=4.5) +
#ggthemes::scale_fill_tableau() +
#xlab("nama produk")
hchart(
type = "bar",
hcaes(x = nama_produk, y = kuantitas, group = penjual),
dataLabels = list(enabled =T),
pointWidth = 15
) %>%
hc_title(text = "10 Produk Dengan Penjualan Terbanyak")
clothes %>%
select(nama_produk, penjual, kuantitas, harga) %>%
filter(penjual == "girlfashionstory") %>%
group_by(nama_produk) %>%
mutate(gross = sum(kuantitas * harga)) %>%
ungroup() %>%
arrange(desc(gross)) %>%
hchart(
type = "bar",
hcaes(x = nama_produk, y = gross),
name = "gross",
dataLabels = list(enabled = T)
) %>%
hc_title(text = "Daftar Produk girlfashionstory berdasarkan revenuenya") %>%
hc_add_theme(hc_theme_smpl())