Analisis Penjualan LEGO

Dataset Lego Sales Data

Nomor 1

Eksplorasi Data Awal

# Install Requirement Packages
if (!require(dsbox)) {
  install.packages("devtools")
  devtools::install_github("rstudio-education/dsbox")
}

# Memuat package
library(dsbox)

# Memuat dataset
data("lego_sales")

# Melihat informasi dataset
?lego_sales
# Check the column names in the dataset
colnames(lego_sales)
##  [1] "first_name"   "last_name"    "age"          "phone_number" "set_id"      
##  [6] "number"       "theme"        "subtheme"     "year"         "name"        
## [11] "pieces"       "us_price"     "image_url"    "quantity"
# Menampilkan struktur data
str(lego_sales)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 620 obs. of  14 variables:
##  $ first_name  : chr  "Kimberly" "Neel" "Neel" "Chelsea" ...
##  $ last_name   : chr  "Beckstead" "Garvin" "Garvin" "Bouchard" ...
##  $ age         : num  24 35 35 41 41 41 19 19 37 37 ...
##  $ phone_number: chr  "216-555-2549" "819-555-3189" "819-555-3189" NA ...
##  $ set_id      : num  24701 25626 24665 24695 25626 ...
##  $ number      : chr  "76062" "70595" "21031" "31048" ...
##  $ theme       : chr  "DC Comics Super Heroes" "Ninjago" "Architecture" "Creator" ...
##  $ subtheme    : chr  "Mighty Micros" "Rise of the Villains" NA NA ...
##  $ year        : num  2018 2018 2018 2018 2018 ...
##  $ name        : chr  "Robin vs. Bane" "Ultra Stealth Raider" "Burj Khalifa" "Lakeside Lodge" ...
##  $ pieces      : num  77 1093 333 368 1093 ...
##  $ us_price    : num  9.99 119.99 39.99 29.99 119.99 ...
##  $ image_url   : chr  "http://images.brickset.com/sets/images/76062-1.jpg" "http://images.brickset.com/sets/images/70595-1.jpg" "http://images.brickset.com/sets/images/21031-1.jpg" "http://images.brickset.com/sets/images/31048-1.jpg" ...
##  $ quantity    : num  1 1 1 1 1 1 1 3 1 2 ...
##  - attr(*, "spec")=List of 3
##   ..$ cols   :List of 14
##   .. ..$ first_name  : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
##   .. ..$ last_name   : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
##   .. ..$ age         : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
##   .. ..$ phone_number: list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
##   .. ..$ set_id      : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
##   .. ..$ number      : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
##   .. ..$ theme       : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
##   .. ..$ subtheme    : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
##   .. ..$ year        : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
##   .. ..$ name        : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
##   .. ..$ pieces      : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
##   .. ..$ us_price    : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
##   .. ..$ image_url   : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
##   .. ..$ quantity    : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr [1:2] "collector_guess" "collector"
##   ..$ skip   : num 1
##   ..- attr(*, "class")= chr "col_spec"
# Ringkasan statistik
summary(lego_sales)
##   first_name         last_name              age        phone_number      
##  Length:620         Length:620         Min.   :16.00   Length:620        
##  Class :character   Class :character   1st Qu.:25.00   Class :character  
##  Mode  :character   Mode  :character   Median :33.00   Mode  :character  
##                                        Mean   :34.36                     
##                                        3rd Qu.:41.00                     
##                                        Max.   :68.00                     
##                                                                          
##      set_id         number             theme             subtheme        
##  Min.   :24548   Length:620         Length:620         Length:620        
##  1st Qu.:24725   Class :character   Class :character   Class :character  
##  Median :24805   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :25125                                                           
##  3rd Qu.:25640                                                           
##  Max.   :26060                                                           
##                                                                          
##       year          name               pieces          us_price     
##  Min.   :2018   Length:620         Min.   :  13.0   Min.   :  3.99  
##  1st Qu.:2018   Class :character   1st Qu.:  70.0   1st Qu.:  9.99  
##  Median :2018   Mode  :character   Median : 114.0   Median : 19.99  
##  Mean   :2018                      Mean   : 254.2   Mean   : 29.04  
##  3rd Qu.:2018                      3rd Qu.: 313.0   3rd Qu.: 29.99  
##  Max.   :2018                      Max.   :4634.0   Max.   :349.99  
##                                    NA's   :69                       
##   image_url            quantity    
##  Length:620         Min.   :1.000  
##  Class :character   1st Qu.:1.000  
##  Mode  :character   Median :1.000  
##                     Mean   :1.437  
##                     3rd Qu.:2.000  
##                     Max.   :5.000  
## 
# Cek missing values
colSums(is.na(lego_sales))
##   first_name    last_name          age phone_number       set_id       number 
##            0            0            0           92            0            0 
##        theme     subtheme         year         name       pieces     us_price 
##            0          172            0            0           69            0 
##    image_url     quantity 
##           59            0
# Cek duplikat
sum(duplicated(lego_sales))
## [1] 0
# Hapus duplikat
lego_sales <- lego_sales[!duplicated(lego_sales), ]

# Hapus baris NA
lego_sales <- na.omit(lego_sales)

Nomor 2

Visualisasi Wajib Buat minimal 5 visualisasi dari kategori berikut:

a.) Tren Penjualan LEGO per Tahun (Trend Line Plot)

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
# Hitung total penjualan per tahun
lego_sales %>%
  group_by(year) %>%
  summarise(total_sales = sum(us_price * quantity, na.rm = TRUE)) %>%
  ggplot(aes(x = year, y = total_sales)) +
  geom_line(color = "steelblue", size = 1.2) +
  geom_point(color = "darkred", size = 2) +
  labs(
    title = "Tren Penjualan LEGO per Tahun",
    x = "Tahun",
    y = "Total Penjualan (USD)"
  ) +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

b.) 10 Tema LEGO Terpopuler Berdasarkan Penjualan (Bar Chart - TOP 10 Themes)

library(ggplot2)
library(dplyr)

lego_sales %>%
  group_by(theme) %>%
  summarise(total_quantity = sum(quantity, na.rm = TRUE)) %>%
  arrange(desc(total_quantity)) %>%
  slice_max(total_quantity, n = 10) %>%
  ggplot(aes(x = reorder(theme, total_quantity), y = total_quantity)) +
  geom_col(fill = "#434343") +
  coord_flip() +
  labs(title = "10 Tema LEGO Terpopuler Berdasarkan Penjualan",
       x = "Tema", y = "Jumlah Unit Terjual") +
  theme_minimal()

c.) Sebaran Jumlah Pieces dan Harga (Scatter Plot)

ggplot(lego_sales, aes(x = pieces, y = us_price)) +
  geom_point(alpha = 0.6, color = "darkgreen") +
  labs(title = "Sebaran Jumlah Pieces dan Harga",
       x = "Jumlah Pieces", y = "Harga (USD)") +
  theme_minimal()

d.) Komposisi Penjualan Berdasarkan Usia (Pie Chart)

# Buat kolom age_group berdasarkan nilai age
lego_sales <- lego_sales %>%
  mutate(age_group = case_when(
    age <= 19~ "Anak-anak",
    age <= 29 ~ "Remaja",
    age > 30 ~ "Dewasa",
    TRUE ~ "Tidak diketahui"
  ))

# Pie chart
lego_sales %>%
  group_by(age_group) %>%
  summarise(total_sales = sum(quantity, na.rm = TRUE)) %>%
  ggplot(aes(x = "", y = total_sales, fill = age_group)) +
  geom_col() +
  coord_polar("y", start = 0) +
  labs(title = "Komposisi Penjualan Berdasarkan Usia") +
  theme_void()

e.) Heatmap Korelasi Antar Variabel Numerik

library(corrplot)
## corrplot 0.95 loaded
# Ambil hanya kolom numerik
lego_numeric <- lego_sales %>%
  select_if(is.numeric)

# Hitung korelasi
cor_matrix <- cor(lego_numeric, use = "complete.obs")
## Warning in cor(lego_numeric, use = "complete.obs"): the standard deviation is
## zero
# Visualisasi korelasi
corrplot(cor_matrix, method = "color", type = "upper",
         tl.col = "black", tl.srt = 45,
         addCoef.col = "red", number.cex = 0.7,
         title = "Heatmap Korelasi Antar Variabel Numerik")

Nomor 3

Insight dan Narasi

Jawab :

a.) Tren Penjualan per Tahun:

b.) Tema LEGO Terpopuler:

c.) Sebaran Jumlah Pieces dan Harga:

d.) Komposisi Penjualan Berdasarkan Usia:

e.) Korelasi Antar Variabel: