## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'ggthemes' was built under R version 4.4.3
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3

EKSPLORASI DATA AWAL

The data from the survey are in ‘lego_sales’.

data("lego_sales")
glimpse(lego_sales)
## Rows: 620
## Columns: 14
## $ first_name   <chr> "Kimberly", "Neel", "Neel", "Chelsea", "Chelsea", "Chelse…
## $ last_name    <chr> "Beckstead", "Garvin", "Garvin", "Bouchard", "Bouchard", …
## $ age          <dbl> 24, 35, 35, 41, 41, 41, 19, 19, 37, 37, 19, 19, 20, 36, 3…
## $ phone_number <chr> "216-555-2549", "819-555-3189", "819-555-3189", NA, NA, N…
## $ set_id       <dbl> 24701, 25626, 24665, 24695, 25626, 24721, 24797, 24701, 2…
## $ number       <chr> "76062", "70595", "21031", "31048", "70595", "10831", "75…
## $ theme        <chr> "DC Comics Super Heroes", "Ninjago", "Architecture", "Cre…
## $ subtheme     <chr> "Mighty Micros", "Rise of the Villains", NA, NA, "Rise of…
## $ year         <dbl> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 201…
## $ name         <chr> "Robin vs. Bane", "Ultra Stealth Raider", "Burj Khalifa",…
## $ pieces       <dbl> 77, 1093, 333, 368, 1093, 19, 233, 77, 108, NA, 13, 15, 6…
## $ us_price     <dbl> 9.99, 119.99, 39.99, 29.99, 119.99, 9.99, 24.99, 9.99, 9.…
## $ image_url    <chr> "http://images.brickset.com/sets/images/76062-1.jpg", "ht…
## $ quantity     <dbl> 1, 1, 1, 1, 1, 1, 1, 3, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, …
str(lego_sales)
## spc_tbl_ [620 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ first_name  : chr [1:620] "Kimberly" "Neel" "Neel" "Chelsea" ...
##  $ last_name   : chr [1:620] "Beckstead" "Garvin" "Garvin" "Bouchard" ...
##  $ age         : num [1:620] 24 35 35 41 41 41 19 19 37 37 ...
##  $ phone_number: chr [1:620] "216-555-2549" "819-555-3189" "819-555-3189" NA ...
##  $ set_id      : num [1:620] 24701 25626 24665 24695 25626 ...
##  $ number      : chr [1:620] "76062" "70595" "21031" "31048" ...
##  $ theme       : chr [1:620] "DC Comics Super Heroes" "Ninjago" "Architecture" "Creator" ...
##  $ subtheme    : chr [1:620] "Mighty Micros" "Rise of the Villains" NA NA ...
##  $ year        : num [1:620] 2018 2018 2018 2018 2018 ...
##  $ name        : chr [1:620] "Robin vs. Bane" "Ultra Stealth Raider" "Burj Khalifa" "Lakeside Lodge" ...
##  $ pieces      : num [1:620] 77 1093 333 368 1093 ...
##  $ us_price    : num [1:620] 9.99 119.99 39.99 29.99 119.99 ...
##  $ image_url   : chr [1:620] "http://images.brickset.com/sets/images/76062-1.jpg" "http://images.brickset.com/sets/images/70595-1.jpg" "http://images.brickset.com/sets/images/21031-1.jpg" "http://images.brickset.com/sets/images/31048-1.jpg" ...
##  $ quantity    : num [1:620] 1 1 1 1 1 1 1 3 1 2 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   first_name = col_character(),
##   ..   last_name = col_character(),
##   ..   age = col_double(),
##   ..   phone_number = col_character(),
##   ..   set_id = col_double(),
##   ..   number = col_character(),
##   ..   theme = col_character(),
##   ..   subtheme = col_character(),
##   ..   year = col_double(),
##   ..   name = col_character(),
##   ..   pieces = col_double(),
##   ..   us_price = col_double(),
##   ..   image_url = col_character(),
##   ..   quantity = col_double()
##   .. )
summary(lego_sales)
##   first_name         last_name              age        phone_number      
##  Length:620         Length:620         Min.   :16.00   Length:620        
##  Class :character   Class :character   1st Qu.:25.00   Class :character  
##  Mode  :character   Mode  :character   Median :33.00   Mode  :character  
##                                        Mean   :34.36                     
##                                        3rd Qu.:41.00                     
##                                        Max.   :68.00                     
##                                                                          
##      set_id         number             theme             subtheme        
##  Min.   :24548   Length:620         Length:620         Length:620        
##  1st Qu.:24725   Class :character   Class :character   Class :character  
##  Median :24805   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :25125                                                           
##  3rd Qu.:25640                                                           
##  Max.   :26060                                                           
##                                                                          
##       year          name               pieces          us_price     
##  Min.   :2018   Length:620         Min.   :  13.0   Min.   :  3.99  
##  1st Qu.:2018   Class :character   1st Qu.:  70.0   1st Qu.:  9.99  
##  Median :2018   Mode  :character   Median : 114.0   Median : 19.99  
##  Mean   :2018                      Mean   : 254.2   Mean   : 29.04  
##  3rd Qu.:2018                      3rd Qu.: 313.0   3rd Qu.: 29.99  
##  Max.   :2018                      Max.   :4634.0   Max.   :349.99  
##                                    NA's   :69                       
##   image_url            quantity    
##  Length:620         Min.   :1.000  
##  Class :character   1st Qu.:1.000  
##  Mode  :character   Median :1.000  
##                     Mean   :1.437  
##                     3rd Qu.:2.000  
##                     Max.   :5.000  
## 

Cek missing values dan duplikat

sum(is.na(lego_sales))
## [1] 392
lego_sales <- lego_sales %>% distinct()

VISUALISASI WAJIB BUAT MINIMAL 5 VISUALISASI dari KATEGORI BERIKUT

  1. 10 Costumer dengan Jumlah Transaksi Terbanyak
lego_sales %>%
  mutate(customer = paste(first_name, last_name)) %>%
  group_by(customer) %>%
  summarise(Jumlah_Transaksi = n()) %>%
  arrange(desc(Jumlah_Transaksi)) %>%
  slice_max(Jumlah_Transaksi, n = 10) %>%
  ggplot(aes(x = reorder(customer, Jumlah_Transaksi), y = Jumlah_Transaksi)) +
  geom_bar(stat = "identity", fill = "purple") +
  coord_flip() +
  labs(title = "10 Customer dengan Jumlah Transaksi Terbanyak",
       x = "Customer", y = "Jumlah Transaksi") +
  theme_minimal()

  1. 10 Tema LEGO Terpopuler Berdasarkan Penjualan
lego_sales %>%
  group_by(theme) %>%
  summarise(Total_Quantity = sum(quantity)) %>%
  arrange(desc(Total_Quantity)) %>%
  slice_max(Total_Quantity, n = 10) %>%
  ggplot(aes(x = reorder(theme, Total_Quantity), y = Total_Quantity)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  coord_flip() +
  labs(title = "10 Tema LEGO Terpopuler Berdasarkan Penjualan", x = "Tema", y = "Unit Terjual") +
  theme_light()

  1. Sebaran Jumlah Pieces dan Harga
ggplot(lego_sales, aes(x =pieces, y = us_price)) +
  geom_point(alpha = 0.6, color = "purple") +
  labs(title = "Sebaran Jumlah Pieces dan Harga", x = "Jumlah Pieces", y = "Harga") +
  theme_minimal()
## Warning: Removed 69 rows containing missing values or values outside the scale range
## (`geom_point()`).

  1. Komposisi Penjualan Berdasarkan Usia
lego_sales %>%
  group_by(age) %>%
  summarise(Total_Quantity = sum(quantity, na.rm = TRUE)) %>%
  arrange(desc(Total_Quantity)) %>%
  ggplot(aes(x = factor(age), y = Total_Quantity)) +
  geom_bar(stat = "identity", fill = "red") +
  labs(title = "Komposisi Penjualan Berdasarkan Usia",
       x = "Usia",
       y = "Unit Terjual") +
  theme_minimal()

  1. Heatmap Korelasi Antar Variabel Numerik
lego_numeric <- lego_sales %>%
  select(pieces, us_price, quantity) %>%
  drop_na()

cor_matrix <- cor(lego_numeric)
corrplot(cor_matrix, 
         method = "color", 
         type = "upper", 
         tl.cex = 0.8, 
         addCoef.col = "black",
         tl.col = "black", 
         tl.srt = 45,
         col = colorRampPalette(c("red", "white", "blue"))(200),
         mar = c(0,0,1,0),
         title = "Heatmap Korelasi Antar Variabel Numerik")

INSIGHT dan NARASI

  1. Tema Star Wars mendominasi revenue. Berdasarkan visualisasi penjualan per tema, Star Wars menghasilkan pendapatan tertinggi dibandingkan tema lain (City atau Friends).
  2. Jumlah pieces berkorelasi dengan harga. Visualisasi scatter plot antara Pieces dan Price menunjukkan korelasi positif.
  3. 10 pelanggan teratas berkontribusi besar terhadap total penjualan. Dari visualisasi 10 customer teratas, terlihat bahwa segelintir pelanggan menyumbang volume transaksi yang sangat signifikan.
  4. Korelasi tinggi antara quality dan revenue. Heatmap korelasi antar variabel numerik mengungkapkan bahwa Quantity dan Price sangat mempengaruhi total revenue. Selain itu, Pieces juga menunjukkan korelasi sedang dengan Price.