#1: Persiapan dan Eksplorasi Data Awal
# Informasi umum dataset
str(lego_sales)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 620 obs. of  14 variables:
##  $ first_name  : chr  "Kimberly" "Neel" "Neel" "Chelsea" ...
##  $ last_name   : chr  "Beckstead" "Garvin" "Garvin" "Bouchard" ...
##  $ age         : num  24 35 35 41 41 41 19 19 37 37 ...
##  $ phone_number: chr  "216-555-2549" "819-555-3189" "819-555-3189" NA ...
##  $ set_id      : num  24701 25626 24665 24695 25626 ...
##  $ number      : chr  "76062" "70595" "21031" "31048" ...
##  $ theme       : chr  "DC Comics Super Heroes" "Ninjago" "Architecture" "Creator" ...
##  $ subtheme    : chr  "Mighty Micros" "Rise of the Villains" NA NA ...
##  $ year        : num  2018 2018 2018 2018 2018 ...
##  $ name        : chr  "Robin vs. Bane" "Ultra Stealth Raider" "Burj Khalifa" "Lakeside Lodge" ...
##  $ pieces      : num  77 1093 333 368 1093 ...
##  $ us_price    : num  9.99 119.99 39.99 29.99 119.99 ...
##  $ image_url   : chr  "http://images.brickset.com/sets/images/76062-1.jpg" "http://images.brickset.com/sets/images/70595-1.jpg" "http://images.brickset.com/sets/images/21031-1.jpg" "http://images.brickset.com/sets/images/31048-1.jpg" ...
##  $ quantity    : num  1 1 1 1 1 1 1 3 1 2 ...
##  - attr(*, "spec")=List of 3
##   ..$ cols   :List of 14
##   .. ..$ first_name  : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
##   .. ..$ last_name   : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
##   .. ..$ age         : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
##   .. ..$ phone_number: list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
##   .. ..$ set_id      : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
##   .. ..$ number      : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
##   .. ..$ theme       : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
##   .. ..$ subtheme    : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
##   .. ..$ year        : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
##   .. ..$ name        : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
##   .. ..$ pieces      : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
##   .. ..$ us_price    : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
##   .. ..$ image_url   : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
##   .. ..$ quantity    : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr [1:2] "collector_guess" "collector"
##   ..$ skip   : num 1
##   ..- attr(*, "class")= chr "col_spec"
summary(lego_sales)
##   first_name         last_name              age        phone_number      
##  Length:620         Length:620         Min.   :16.00   Length:620        
##  Class :character   Class :character   1st Qu.:25.00   Class :character  
##  Mode  :character   Mode  :character   Median :33.00   Mode  :character  
##                                        Mean   :34.36                     
##                                        3rd Qu.:41.00                     
##                                        Max.   :68.00                     
##                                                                          
##      set_id         number             theme             subtheme        
##  Min.   :24548   Length:620         Length:620         Length:620        
##  1st Qu.:24725   Class :character   Class :character   Class :character  
##  Median :24805   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :25125                                                           
##  3rd Qu.:25640                                                           
##  Max.   :26060                                                           
##                                                                          
##       year          name               pieces          us_price     
##  Min.   :2018   Length:620         Min.   :  13.0   Min.   :  3.99  
##  1st Qu.:2018   Class :character   1st Qu.:  70.0   1st Qu.:  9.99  
##  Median :2018   Mode  :character   Median : 114.0   Median : 19.99  
##  Mean   :2018                      Mean   : 254.2   Mean   : 29.04  
##  3rd Qu.:2018                      3rd Qu.: 313.0   3rd Qu.: 29.99  
##  Max.   :2018                      Max.   :4634.0   Max.   :349.99  
##                                    NA's   :69                       
##   image_url            quantity    
##  Length:620         Min.   :1.000  
##  Class :character   1st Qu.:1.000  
##  Mode  :character   Median :1.000  
##                     Mean   :1.437  
##                     3rd Qu.:2.000  
##                     Max.   :5.000  
## 
# Pembersihan data sederhana
# Cek missing values
sum(is.na(lego_sales))
## [1] 392
# Hapus baris dengan missing values jika ada
lego_sales <- na.omit(lego_sales)
# Cek duplikat
sum(duplicated(lego_sales))
## [1] 0
# Hapus duplikat jika ada
lego_sales <- unique(lego_sales)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## [1] "Isi top_customers:"
## # A tibble: 10 Ă— 4
##    first_name last_name      Total_Transactions Full_Name               
##    <chr>      <chr>                       <int> <chr>                   
##  1 Caroline   Holstein                        5 Caroline Holstein       
##  2 Joseph     Holub                           4 Joseph Holub            
##  3 Josie      Paley                           4 Josie Paley             
##  4 Megan      Sweetman                        4 Megan Sweetman          
##  5 Ramses     Ackah Yensu                     4 Ramses Ackah Yensu      
##  6 Aaron      Bruner                          3 Aaron Bruner            
##  7 Brandilyn  Robertson                       3 Brandilyn Robertson     
##  8 Christian  Ortiviz Madrid                  3 Christian Ortiviz Madrid
##  9 Colleen    Galley                          3 Colleen Galley          
## 10 Connor     Padilla                         3 Connor Padilla

##  [1] "first_name"   "last_name"    "age"          "phone_number" "set_id"      
##  [6] "number"       "theme"        "subtheme"     "year"         "name"        
## [11] "pieces"       "us_price"     "image_url"    "quantity"
## `geom_smooth()` using formula = 'y ~ x'

## corrplot 0.95 loaded

# 3. Tuliskan 3–5 insight menarik dari visualisasi yang dibuat.
#jawab: 1. Tema LEGO terpopuler yang Mendominasi Penjualan dimana Ternyata, beberapa tema LEGO seperti Star Wars , City , atau Friends jadi primadona di pasaran. Tema-tema ini mendominasi daftar penjualan tertinggi, yang menunjukkan bahwa mereka punya daya tarik besar bagi pelanggan. Jadi, LEGO bisa terus mengembangkan produk dalam tema-tema ini karena sudah terbukti laris manis.
# 2. Harga Tidak Selalu Sejalan dengan Jumlah Pieces, dimana saat melihat hubungan antara jumlah pieces dan harga, kita menemukan tren positif—artinya, semakin banyak pieces, harga cenderung lebih tinggi. Namun, ada juga beberapa set yang meskipun jumlah pieces-nya sedikit, harganya malah mahal. Ini mungkin karena faktor lisensi khusus, edisi terbatas, atau branding lainnya. Jadi, harga tidak hanya ditentukan oleh jumlah pieces aja, tapi juga nilai tambah lainnya.
# 3.nKorelasi Antar Variabel Penting Yaitu dimana analisis heatmap menunjukkan bahwa ada hubungan kuat antara jumlah pieces dan harga, dimana semakin banyak pieces maka harga cenderung lebih tinggi. Tapi, ada juga indikasi bahwa produk dengan harga mahal biasanya terjual lebih sedikit. Ini bisa jadi pertimbangan LEGO untuk menyeimbangkan antara harga produk dan potensi penjualannya.

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.