library(dsbox)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.2
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.4.3
library(scales)
## Warning: package 'scales' was built under R version 4.4.3
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.4.3

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

NOMOR 1 Eksplorasi Data Awal, o Tampilkan informasi umum dataset (jumlah data, kolom, tipe data). o Lakukan pembersihan data sederhana jika diperlukan (null values, duplikat, dll).

data <- lego_sales
glimpse(data)
## Rows: 620
## Columns: 14
## $ first_name   <chr> "Kimberly", "Neel", "Neel", "Chelsea", "Chelsea", "Chelse…
## $ last_name    <chr> "Beckstead", "Garvin", "Garvin", "Bouchard", "Bouchard", …
## $ age          <dbl> 24, 35, 35, 41, 41, 41, 19, 19, 37, 37, 19, 19, 20, 36, 3…
## $ phone_number <chr> "216-555-2549", "819-555-3189", "819-555-3189", NA, NA, N…
## $ set_id       <dbl> 24701, 25626, 24665, 24695, 25626, 24721, 24797, 24701, 2…
## $ number       <chr> "76062", "70595", "21031", "31048", "70595", "10831", "75…
## $ theme        <chr> "DC Comics Super Heroes", "Ninjago", "Architecture", "Cre…
## $ subtheme     <chr> "Mighty Micros", "Rise of the Villains", NA, NA, "Rise of…
## $ year         <dbl> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 201…
## $ name         <chr> "Robin vs. Bane", "Ultra Stealth Raider", "Burj Khalifa",…
## $ pieces       <dbl> 77, 1093, 333, 368, 1093, 19, 233, 77, 108, NA, 13, 15, 6…
## $ us_price     <dbl> 9.99, 119.99, 39.99, 29.99, 119.99, 9.99, 24.99, 9.99, 9.…
## $ image_url    <chr> "http://images.brickset.com/sets/images/76062-1.jpg", "ht…
## $ quantity     <dbl> 1, 1, 1, 1, 1, 1, 1, 3, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, …
sum(is.na(data))
## [1] 392
data <- distinct(data)

NOMOR 2 Visualisasi Wajib Buat minimal 5 visualisasi dari kategori berikut: o 10 Customer dengan jumlah transaksi terbanyak o 10 Tema LEGO Terpopuler Berdasarkan Penjualan o Sebaran Jumlah Pieces dan Harga o Komposisi Penjualan Berdasarkan Usia o Heatmap Korelasi Antar Variabel Numerik

top_customers <- data %>%
  group_by(first_name, last_name) %>%
  summarise(total_transactions = sum(quantity), .groups = "drop") %>%
  arrange(desc(total_transactions)) %>%
  head(10)

ggplot(top_customers, aes(x = reorder(paste(first_name, last_name), total_transactions), 
                          y = total_transactions)) +
  geom_bar(stat = "identity", fill = "blue") +
  coord_flip() +
  labs(title = "10 Customer dengan transaksi terbanyak", x = "Customer", y = "Jumlah Transaksi")

data <- data %>%
  mutate(sales = us_price * quantity)

top_themes <- data %>%
  group_by(theme) %>%
  summarise(Total_Sales = sum(sales, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(Total_Sales)) %>%
  head(10)

ggplot(top_themes, aes(x = reorder(theme, Total_Sales), y = Total_Sales)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  coord_flip() +
  labs(title = "a). 10 Tema LEGO Terpopuler Berdasarkan Penjualan",
       x = "Tema",
       y = "Total Penjualan (USD)")

library(ggplot2)
ggplot(data, aes(x = pieces, y = us_price)) +
  geom_point(alpha = 0.5, color = "blue") +
  geom_smooth(method = "lm", se = FALSE, color = "red", linewidth = 0.7) +
  labs(title = "b).Sebaran Jumlah Pieces dan Harga LEGO",
       x = "Jumlah Pieces",
       y = "Harga (USD)") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 69 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 69 rows containing missing values or values outside the scale range
## (`geom_point()`).

data <- data %>%
  mutate(sales = us_price * quantity)

data <- data %>%
  mutate(age_group = case_when(
    age < 18 ~ "<18",
    age >= 18 & age < 30 ~ "18-29",
    age >= 30 & age < 45 ~ "30-44",
    age >= 45 & age < 60 ~ "45-59",
    age >= 60 ~ "60+",
    TRUE ~ "Unknown"
  ))

sales_by_age <- data %>%
  group_by(age_group) %>%
  summarise(Total_Sales = sum(sales, na.rm = TRUE), .groups = "drop")

ggplot(sales_by_age, aes(x = age_group, y = Total_Sales, fill = age_group)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "c). Komposisi Penjualan Berdasarkan Kelompok Usia",
       x = "Kelompok Usia",
       y = "Total Penjualan (USD)") +
  theme_minimal()

library(ggplot2)
library(dplyr)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.4.3
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
data <- data %>%
  mutate(sales = us_price * quantity)

num_data <- data %>%
  select(age, pieces, us_price, quantity) 
  

corr_matrix <- round(cor(num_data, use = "complete.obs"), 2)

melted_corr <- melt(corr_matrix)

ggplot(melted_corr, aes(Var1, Var2, fill = value)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white",
                       midpoint = 0, limit = c(-1,1), space = "Lab",
                       name = "Korelasi") +
  geom_text(aes(label = value), color = "black", size = 4) +
  theme_minimal() +
  labs(title = "d). Heatmap Korelasi Variabel Numerik",
       x = NULL, y = NULL)

NOMOR 3 Insight dan Narasi o Tuliskan 3–5 insight menarik dari visualisasi yang dibuat. o Contoh: Apakah tema tertentu mendominasi revenue? Apakah set dengan lebih banyak pieces selalu lebih mahal? Apakah ada preferensi tema LEGO pada kelompok usia tertentu?

  1. Tema LEGO yang Dominan terhadap Revenue Dari visualisasi 10 Tema LEGO terpopuler, terlihat bahwa tema seperti “Star Wars”, “City” dan “Ninjago” memberikan kontribusi paling besar terhadap total penjualan. Hal ini menunjukkan bahwa tema populer dari franchise besar memang punya daya tarik tinggi di pasar.

  2. Hubungan antara Jumlah Pieces dan Harga Berdasarkan scatter plot, terlihat ada hubungan positif antara jumlah pieces dan harga LEGO. Umumnya semakin banyak pieces, semakin mahal harga set LEGO tersebut.

  3. Usia pembeli menunjukkan bahwa kelompok usia 18–29 tahun dan 30–44 tahun mendominasi transaksi LEGO. Ini bisa menunjukkan bahwa LEGO tidak hanya diminati anak-anak, tapi juga oleh orang dewasa muda—baik sebagai hobi, koleksi, atau hadiah.

  4. Korelasi Variabel Numerik Dari heatmap korelasi, terlihat bahwa variabel pieces memiliki korelasi yang cukup kuat terhadap us_price dan sales. Ini membuktikan bahwa pieces adalah salah satu faktor utama yang mempengaruhi harga dan total pendapatan dari set LEGO.

  5. Konsumen loyal dan transaksi tinggi berdasarkan analisis 10 customer dengan jumlah transaksi terbanyak, terlihat adanya beberapa konsumen yang melakukan pembelian berulang. Ini membuka peluang untuk strategi loyalty program atau penawaran khusus bagi pelanggan aktif.