library(tidyverse)
library(tidymodels)
library(highcharter)
clothes <- read_csv("C:/Users/LENOVO/Documents/clothesdata2.csv")
## Parsed with column specification:
## cols(
##   produk_name = col_character(),
##   produk_url = col_character(),
##   produk_harga = col_character(),
##   produk_harga_url = col_character(),
##   produk_kuantitas = col_character(),
##   produk_kuantitas_url = col_character(),
##   produk_penilaian = col_character(),
##   produk_bintang = col_double(),
##   produk_toko = col_character(),
##   produk_pengikut = col_character()
## )
clothes %>% head()
## # A tibble: 6 x 10
##   produk_name produk_url produk_harga produk_harga_url produk_kuantitas
##   <chr>       <chr>      <chr>        <chr>            <chr>           
## 1 SUPER SALE~ https://s~ Rp30.000     https://shopee.~ 3,3RB Terjual   
## 2 pretty tur~ https://s~ Rp34.500     https://shopee.~ 7RB Terjual     
## 3 COD CP CIB~ https://s~ Rp39.900     https://shopee.~ 2,2RB Terjual   
## 4 DaveCollec~ https://s~ Rp9.775      https://shopee.~ 736 Terjual     
## 5 Celana Dal~ https://s~ Rp15.500     https://shopee.~ 1,6RB Terjual   
## 6 Rx Fashion~ https://s~ Rp26.300     https://shopee.~ 5,3RB Terjual   
## # ... with 5 more variables: produk_kuantitas_url <chr>,
## #   produk_penilaian <chr>, produk_bintang <dbl>, produk_toko <chr>,
## #   produk_pengikut <chr>

Data cleaning

clothes <- clothes %>% 
  select(produk_name, produk_harga, produk_kuantitas, 
         produk_penilaian, produk_bintang, produk_toko, produk_pengikut)
clothes <- clothes %>% 
  mutate(
    kuantitas = case_when(
      str_detect(produk_kuantitas, ",") ~ 
        str_replace_all(produk_kuantitas, c("RB" = "00", "Terjual" = "", "," = "")),
      !str_detect(produk_kuantitas, ",") ~
        str_replace_all(produk_kuantitas, c("RB" = "000", "Terjual" = ""))
    ),
    penilaian = case_when(
      str_detect(produk_penilaian, ",") ~
        str_replace_all(produk_penilaian, c("RB" = "00", "," = "")),
      !str_detect(produk_penilaian, ",") ~
        str_replace_all(produk_penilaian, "RB", "000")
    ),
    pengikut = case_when(
      str_detect(produk_pengikut, ",") ~
        str_replace_all(produk_pengikut, c("RB" = "00", "," = "", "JT" = "00000")),
      !str_detect(produk_pengikut, ",") ~
        str_replace_all(produk_pengikut, "RB", "000")
    ),
    harga = str_replace_all(produk_harga, c("Rp" = "", "-" = " ")),
    harga = word(harga, 1),
    harga =  gsub(".", "", harga, fixed = TRUE)
    
  )
clothes <- clothes %>% 
  select(produk_name, produk_toko, produk_bintang, kuantitas, penilaian, pengikut, harga) %>% 
  rename(nama_produk = produk_name,
         penjual = produk_toko,
         bintang = produk_bintang) %>% 
  mutate(kuantitas = as.numeric(kuantitas),
         penilaian = as.numeric(penilaian),
         pengikut = as.numeric(pengikut),
         harga = as.numeric(harga),
         penjual = as.factor(penjual)
         )
clothes$pengikut <- as.character(clothes$pengikut)
clothes$pengikut[clothes$pengikut == "738400"] <- "738500"
clothes$pengikut <- as.numeric(clothes$pengikut)
clothes %>% 
  map(~ sum(n_distinct(.)))
## $nama_produk
## [1] 187
## 
## $penjual
## [1] 127
## 
## $bintang
## [1] 10
## 
## $kuantitas
## [1] 137
## 
## $penilaian
## [1] 158
## 
## $pengikut
## [1] 119
## 
## $harga
## [1] 149
clothes %>% 
  map(~ sum(is.na(.)))
## $nama_produk
## [1] 0
## 
## $penjual
## [1] 0
## 
## $bintang
## [1] 1
## 
## $kuantitas
## [1] 0
## 
## $penilaian
## [1] 1
## 
## $pengikut
## [1] 0
## 
## $harga
## [1] 0
follower <- clothes %>% 
  select(penjual, pengikut) %>% 
  distinct(penjual, pengikut) %>% 
  arrange(desc(pengikut)) %>%
  top_n(10) %>%
  mutate(penjual = fct_reorder(penjual, desc(pengikut)))
## Selecting by pengikut
hchart(
  follower,
  "bar",
  hcaes(x = penjual, y = pengikut),
  name = "pengikut",
  dataLabels = list(enabled = TRUE)
) %>% 
hc_title(text = "10 Toko Online Dengan Follower Terbanyak") %>% 
hc_add_theme(hc_theme_sandsignika())
clothes %>% 
  select(penjual, kuantitas) %>% 
  group_by(penjual) %>% 
  summarize(kuantitas_penjualan = sum(kuantitas)) %>% 
  arrange(desc(kuantitas_penjualan)) %>% 
  top_n(10) %>% 
  
  hchart(
    type = "treemap",
    hcaes(x = penjual, value = kuantitas_penjualan, color = kuantitas_penjualan)) %>% 
  hc_colorAxis(stops = color_stops(colors = viridis::plasma(10))) %>% 
  hc_title(text = "Toko Online Dengan Kuantitas Penjualan Terbanyak")
## `summarise()` ungrouping output (override with `.groups` argument)
## Selecting by kuantitas_penjualan
revenue <- clothes %>% 
  select(penjual, kuantitas, harga) %>% 
  group_by(penjual) %>% 
  summarize(revenue = sum(kuantitas * harga)) %>% 
  arrange(desc(revenue)) %>% 
  top_n(10)
## `summarise()` ungrouping output (override with `.groups` argument)
## Selecting by revenue
hchart(
  revenue,
  type = "bar",
  hcaes(x = penjual, y = revenue),
  name = "revenue",
  dataLabels = list(enabled = TRUE)
) %>% 
hc_add_theme(hc_theme_flat()) %>% 
hc_title(text = "10 Toko Online Dengan Revenue Terbesar")
clothes %>% 
  select(nama_produk, penjual, kuantitas) %>% 
  group_by(nama_produk) %>% 
  arrange(desc(kuantitas)) %>% 
  head(10) %>% 
  
  #ggplot(aes(x = reorder(str_wrap(nama_produk, 40), kuantitas), y = kuantitas, fill = penjual)) +
  #geom_bar(stat = "identity") +
  #coord_flip() +
  #theme(legend.position = "bottom") +
  #scale_y_continuous(labels = comma) +
  #geom_text(aes(label = scales::comma(kuantitas)), hjust=1.6, color="white", size=4.5) +
  #ggthemes::scale_fill_tableau() +
  #xlab("nama produk")
  hchart(
    type = "bar",
    hcaes(x = nama_produk, y = kuantitas, group = penjual),
    dataLabels = list(enabled =T),
    pointWidth = 15
  ) %>% 
  hc_title(text = "10 Produk Dengan Penjualan Terbanyak")
clothes %>% 
  select(nama_produk, penjual, kuantitas, harga) %>% 
  filter(penjual == "girlfashionstory") %>%
  group_by(nama_produk) %>% 
  mutate(gross = sum(kuantitas * harga)) %>% 
  ungroup() %>% 
  arrange(desc(gross)) %>% 
  
  hchart(
    type = "bar",
    hcaes(x = nama_produk, y = gross),
    name = "gross",
    dataLabels = list(enabled = T)
  ) %>% 
  hc_title(text = "Daftar Produk girlfashionstory berdasarkan revenuenya") %>% 
  hc_add_theme(hc_theme_smpl())

Korelasi antara variabel-variabel numerik

clothes %>%
  GGally::ggcorr(label = T)