## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'ggthemes' was built under R version 4.4.3
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
The data from the survey are in ‘lego_sales’.
data("lego_sales")
glimpse(lego_sales)
## Rows: 620
## Columns: 14
## $ first_name <chr> "Kimberly", "Neel", "Neel", "Chelsea", "Chelsea", "Chelse…
## $ last_name <chr> "Beckstead", "Garvin", "Garvin", "Bouchard", "Bouchard", …
## $ age <dbl> 24, 35, 35, 41, 41, 41, 19, 19, 37, 37, 19, 19, 20, 36, 3…
## $ phone_number <chr> "216-555-2549", "819-555-3189", "819-555-3189", NA, NA, N…
## $ set_id <dbl> 24701, 25626, 24665, 24695, 25626, 24721, 24797, 24701, 2…
## $ number <chr> "76062", "70595", "21031", "31048", "70595", "10831", "75…
## $ theme <chr> "DC Comics Super Heroes", "Ninjago", "Architecture", "Cre…
## $ subtheme <chr> "Mighty Micros", "Rise of the Villains", NA, NA, "Rise of…
## $ year <dbl> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 201…
## $ name <chr> "Robin vs. Bane", "Ultra Stealth Raider", "Burj Khalifa",…
## $ pieces <dbl> 77, 1093, 333, 368, 1093, 19, 233, 77, 108, NA, 13, 15, 6…
## $ us_price <dbl> 9.99, 119.99, 39.99, 29.99, 119.99, 9.99, 24.99, 9.99, 9.…
## $ image_url <chr> "http://images.brickset.com/sets/images/76062-1.jpg", "ht…
## $ quantity <dbl> 1, 1, 1, 1, 1, 1, 1, 3, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, …
str(lego_sales)
## spc_tbl_ [620 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ first_name : chr [1:620] "Kimberly" "Neel" "Neel" "Chelsea" ...
## $ last_name : chr [1:620] "Beckstead" "Garvin" "Garvin" "Bouchard" ...
## $ age : num [1:620] 24 35 35 41 41 41 19 19 37 37 ...
## $ phone_number: chr [1:620] "216-555-2549" "819-555-3189" "819-555-3189" NA ...
## $ set_id : num [1:620] 24701 25626 24665 24695 25626 ...
## $ number : chr [1:620] "76062" "70595" "21031" "31048" ...
## $ theme : chr [1:620] "DC Comics Super Heroes" "Ninjago" "Architecture" "Creator" ...
## $ subtheme : chr [1:620] "Mighty Micros" "Rise of the Villains" NA NA ...
## $ year : num [1:620] 2018 2018 2018 2018 2018 ...
## $ name : chr [1:620] "Robin vs. Bane" "Ultra Stealth Raider" "Burj Khalifa" "Lakeside Lodge" ...
## $ pieces : num [1:620] 77 1093 333 368 1093 ...
## $ us_price : num [1:620] 9.99 119.99 39.99 29.99 119.99 ...
## $ image_url : chr [1:620] "http://images.brickset.com/sets/images/76062-1.jpg" "http://images.brickset.com/sets/images/70595-1.jpg" "http://images.brickset.com/sets/images/21031-1.jpg" "http://images.brickset.com/sets/images/31048-1.jpg" ...
## $ quantity : num [1:620] 1 1 1 1 1 1 1 3 1 2 ...
## - attr(*, "spec")=
## .. cols(
## .. first_name = col_character(),
## .. last_name = col_character(),
## .. age = col_double(),
## .. phone_number = col_character(),
## .. set_id = col_double(),
## .. number = col_character(),
## .. theme = col_character(),
## .. subtheme = col_character(),
## .. year = col_double(),
## .. name = col_character(),
## .. pieces = col_double(),
## .. us_price = col_double(),
## .. image_url = col_character(),
## .. quantity = col_double()
## .. )
summary(lego_sales)
## first_name last_name age phone_number
## Length:620 Length:620 Min. :16.00 Length:620
## Class :character Class :character 1st Qu.:25.00 Class :character
## Mode :character Mode :character Median :33.00 Mode :character
## Mean :34.36
## 3rd Qu.:41.00
## Max. :68.00
##
## set_id number theme subtheme
## Min. :24548 Length:620 Length:620 Length:620
## 1st Qu.:24725 Class :character Class :character Class :character
## Median :24805 Mode :character Mode :character Mode :character
## Mean :25125
## 3rd Qu.:25640
## Max. :26060
##
## year name pieces us_price
## Min. :2018 Length:620 Min. : 13.0 Min. : 3.99
## 1st Qu.:2018 Class :character 1st Qu.: 70.0 1st Qu.: 9.99
## Median :2018 Mode :character Median : 114.0 Median : 19.99
## Mean :2018 Mean : 254.2 Mean : 29.04
## 3rd Qu.:2018 3rd Qu.: 313.0 3rd Qu.: 29.99
## Max. :2018 Max. :4634.0 Max. :349.99
## NA's :69
## image_url quantity
## Length:620 Min. :1.000
## Class :character 1st Qu.:1.000
## Mode :character Median :1.000
## Mean :1.437
## 3rd Qu.:2.000
## Max. :5.000
##
Cek missing values dan duplikat
sum(is.na(lego_sales))
## [1] 392
lego_sales <- lego_sales %>% distinct()
lego_sales %>%
mutate(customer = paste(first_name, last_name)) %>%
group_by(customer) %>%
summarise(Jumlah_Transaksi = n()) %>%
arrange(desc(Jumlah_Transaksi)) %>%
slice_max(Jumlah_Transaksi, n = 10) %>%
ggplot(aes(x = reorder(customer, Jumlah_Transaksi), y = Jumlah_Transaksi)) +
geom_bar(stat = "identity", fill = "purple") +
coord_flip() +
labs(title = "10 Customer dengan Jumlah Transaksi Terbanyak",
x = "Customer", y = "Jumlah Transaksi") +
theme_minimal()
lego_sales %>%
group_by(theme) %>%
summarise(Total_Quantity = sum(quantity)) %>%
arrange(desc(Total_Quantity)) %>%
slice_max(Total_Quantity, n = 10) %>%
ggplot(aes(x = reorder(theme, Total_Quantity), y = Total_Quantity)) +
geom_bar(stat = "identity", fill = "skyblue") +
coord_flip() +
labs(title = "10 Tema LEGO Terpopuler Berdasarkan Penjualan", x = "Tema", y = "Unit Terjual") +
theme_light()
ggplot(lego_sales, aes(x =pieces, y = us_price)) +
geom_point(alpha = 0.6, color = "purple") +
labs(title = "Sebaran Jumlah Pieces dan Harga", x = "Jumlah Pieces", y = "Harga") +
theme_minimal()
## Warning: Removed 69 rows containing missing values or values outside the scale range
## (`geom_point()`).
lego_sales %>%
group_by(age) %>%
summarise(Total_Quantity = sum(quantity, na.rm = TRUE)) %>%
arrange(desc(Total_Quantity)) %>%
ggplot(aes(x = factor(age), y = Total_Quantity)) +
geom_bar(stat = "identity", fill = "red") +
labs(title = "Komposisi Penjualan Berdasarkan Usia",
x = "Usia",
y = "Unit Terjual") +
theme_minimal()
lego_numeric <- lego_sales %>%
select(pieces, us_price, quantity) %>%
drop_na()
cor_matrix <- cor(lego_numeric)
corrplot(cor_matrix,
method = "color",
type = "upper",
tl.cex = 0.8,
addCoef.col = "black",
tl.col = "black",
tl.srt = 45,
col = colorRampPalette(c("red", "white", "blue"))(200),
mar = c(0,0,1,0),
title = "Heatmap Korelasi Antar Variabel Numerik")