#1: Persiapan dan Eksplorasi Data Awal
# Informasi umum dataset
str(lego_sales)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 620 obs. of 14 variables:
## $ first_name : chr "Kimberly" "Neel" "Neel" "Chelsea" ...
## $ last_name : chr "Beckstead" "Garvin" "Garvin" "Bouchard" ...
## $ age : num 24 35 35 41 41 41 19 19 37 37 ...
## $ phone_number: chr "216-555-2549" "819-555-3189" "819-555-3189" NA ...
## $ set_id : num 24701 25626 24665 24695 25626 ...
## $ number : chr "76062" "70595" "21031" "31048" ...
## $ theme : chr "DC Comics Super Heroes" "Ninjago" "Architecture" "Creator" ...
## $ subtheme : chr "Mighty Micros" "Rise of the Villains" NA NA ...
## $ year : num 2018 2018 2018 2018 2018 ...
## $ name : chr "Robin vs. Bane" "Ultra Stealth Raider" "Burj Khalifa" "Lakeside Lodge" ...
## $ pieces : num 77 1093 333 368 1093 ...
## $ us_price : num 9.99 119.99 39.99 29.99 119.99 ...
## $ image_url : chr "http://images.brickset.com/sets/images/76062-1.jpg" "http://images.brickset.com/sets/images/70595-1.jpg" "http://images.brickset.com/sets/images/21031-1.jpg" "http://images.brickset.com/sets/images/31048-1.jpg" ...
## $ quantity : num 1 1 1 1 1 1 1 3 1 2 ...
## - attr(*, "spec")=List of 3
## ..$ cols :List of 14
## .. ..$ first_name : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
## .. ..$ last_name : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
## .. ..$ age : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
## .. ..$ phone_number: list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
## .. ..$ set_id : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
## .. ..$ number : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
## .. ..$ theme : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
## .. ..$ subtheme : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
## .. ..$ year : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
## .. ..$ name : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
## .. ..$ pieces : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
## .. ..$ us_price : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
## .. ..$ image_url : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
## .. ..$ quantity : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr [1:2] "collector_guess" "collector"
## ..$ skip : num 1
## ..- attr(*, "class")= chr "col_spec"
summary(lego_sales)
## first_name last_name age phone_number
## Length:620 Length:620 Min. :16.00 Length:620
## Class :character Class :character 1st Qu.:25.00 Class :character
## Mode :character Mode :character Median :33.00 Mode :character
## Mean :34.36
## 3rd Qu.:41.00
## Max. :68.00
##
## set_id number theme subtheme
## Min. :24548 Length:620 Length:620 Length:620
## 1st Qu.:24725 Class :character Class :character Class :character
## Median :24805 Mode :character Mode :character Mode :character
## Mean :25125
## 3rd Qu.:25640
## Max. :26060
##
## year name pieces us_price
## Min. :2018 Length:620 Min. : 13.0 Min. : 3.99
## 1st Qu.:2018 Class :character 1st Qu.: 70.0 1st Qu.: 9.99
## Median :2018 Mode :character Median : 114.0 Median : 19.99
## Mean :2018 Mean : 254.2 Mean : 29.04
## 3rd Qu.:2018 3rd Qu.: 313.0 3rd Qu.: 29.99
## Max. :2018 Max. :4634.0 Max. :349.99
## NA's :69
## image_url quantity
## Length:620 Min. :1.000
## Class :character 1st Qu.:1.000
## Mode :character Median :1.000
## Mean :1.437
## 3rd Qu.:2.000
## Max. :5.000
##
# Pembersihan data sederhana
# Cek missing values
sum(is.na(lego_sales))
## [1] 392
# Hapus baris dengan missing values jika ada
lego_sales <- na.omit(lego_sales)
# Cek duplikat
sum(duplicated(lego_sales))
## [1] 0
# Hapus duplikat jika ada
lego_sales <- unique(lego_sales)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## [1] "Isi top_customers:"
## # A tibble: 10 Ă— 4
## first_name last_name Total_Transactions Full_Name
## <chr> <chr> <int> <chr>
## 1 Caroline Holstein 5 Caroline Holstein
## 2 Joseph Holub 4 Joseph Holub
## 3 Josie Paley 4 Josie Paley
## 4 Megan Sweetman 4 Megan Sweetman
## 5 Ramses Ackah Yensu 4 Ramses Ackah Yensu
## 6 Aaron Bruner 3 Aaron Bruner
## 7 Brandilyn Robertson 3 Brandilyn Robertson
## 8 Christian Ortiviz Madrid 3 Christian Ortiviz Madrid
## 9 Colleen Galley 3 Colleen Galley
## 10 Connor Padilla 3 Connor Padilla
## [1] "first_name" "last_name" "age" "phone_number" "set_id"
## [6] "number" "theme" "subtheme" "year" "name"
## [11] "pieces" "us_price" "image_url" "quantity"
## `geom_smooth()` using formula = 'y ~ x'
## corrplot 0.95 loaded
# 3. Tuliskan 3–5 insight menarik dari visualisasi yang dibuat.
#jawab: 1. Tema LEGO terpopuler yang Mendominasi Penjualan dimana Ternyata, beberapa tema LEGO seperti Star Wars , City , atau Friends jadi primadona di pasaran. Tema-tema ini mendominasi daftar penjualan tertinggi, yang menunjukkan bahwa mereka punya daya tarik besar bagi pelanggan. Jadi, LEGO bisa terus mengembangkan produk dalam tema-tema ini karena sudah terbukti laris manis.
# 2. Harga Tidak Selalu Sejalan dengan Jumlah Pieces, dimana saat melihat hubungan antara jumlah pieces dan harga, kita menemukan tren positif—artinya, semakin banyak pieces, harga cenderung lebih tinggi. Namun, ada juga beberapa set yang meskipun jumlah pieces-nya sedikit, harganya malah mahal. Ini mungkin karena faktor lisensi khusus, edisi terbatas, atau branding lainnya. Jadi, harga tidak hanya ditentukan oleh jumlah pieces aja, tapi juga nilai tambah lainnya.
# 3.nKorelasi Antar Variabel Penting Yaitu dimana analisis heatmap menunjukkan bahwa ada hubungan kuat antara jumlah pieces dan harga, dimana semakin banyak pieces maka harga cenderung lebih tinggi. Tapi, ada juga indikasi bahwa produk dengan harga mahal biasanya terjual lebih sedikit. Ini bisa jadi pertimbangan LEGO untuk menyeimbangkan antara harga produk dan potensi penjualannya.
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.