a.Goals: 1, Menentukan Produk Unggulan untuk penjualan. 2, Saran Penjualan Berdasarkan Tren yang telah terjadi.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
library(ggplot2)
library(skimr)
## Warning: package 'skimr' was built under R version 4.5.2
library(corrplot) #digunakan untuk visualisasi korelasi
## Warning: package 'corrplot' was built under R version 4.5.2
## corrplot 0.95 loaded
onret <- read_excel("Online Retail.xlsx")
onret
## # A tibble: 541,909 × 8
## InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice
## <chr> <chr> <chr> <dbl> <dttm> <dbl>
## 1 536365 85123A WHITE HANGING HEA… 6 2010-12-01 08:26:00 2.55
## 2 536365 71053 WHITE METAL LANTE… 6 2010-12-01 08:26:00 3.39
## 3 536365 84406B CREAM CUPID HEART… 8 2010-12-01 08:26:00 2.75
## 4 536365 84029G KNITTED UNION FLA… 6 2010-12-01 08:26:00 3.39
## 5 536365 84029E RED WOOLLY HOTTIE… 6 2010-12-01 08:26:00 3.39
## 6 536365 22752 SET 7 BABUSHKA NE… 2 2010-12-01 08:26:00 7.65
## 7 536365 21730 GLASS STAR FROSTE… 6 2010-12-01 08:26:00 4.25
## 8 536366 22633 HAND WARMER UNION… 6 2010-12-01 08:28:00 1.85
## 9 536366 22632 HAND WARMER RED P… 6 2010-12-01 08:28:00 1.85
## 10 536367 84879 ASSORTED COLOUR B… 32 2010-12-01 08:34:00 1.69
## # ℹ 541,899 more rows
## # ℹ 2 more variables: CustomerID <dbl>, Country <chr>
summary(onret)
## InvoiceNo StockCode Description Quantity
## Length:541909 Length:541909 Length:541909 Min. :-80995.000
## Class :character Class :character Class :character 1st Qu.: 1.000
## Mode :character Mode :character Mode :character Median : 3.000
## Mean : 9.552
## 3rd Qu.: 10.000
## Max. : 80995.000
##
## InvoiceDate UnitPrice CustomerID
## Min. :2010-12-01 08:26:00 Min. :-11062.060 Min. :12346
## 1st Qu.:2011-03-28 11:34:00 1st Qu.: 1.250 1st Qu.:13953
## Median :2011-07-19 17:17:00 Median : 2.080 Median :15152
## Mean :2011-07-04 13:34:57 Mean : 4.611 Mean :15288
## 3rd Qu.:2011-10-19 11:27:00 3rd Qu.: 4.130 3rd Qu.:16791
## Max. :2011-12-09 12:50:00 Max. : 38970.000 Max. :18287
## NA's :135080
## Country
## Length:541909
## Class :character
## Mode :character
##
##
##
##
str(onret)
## tibble [541,909 × 8] (S3: tbl_df/tbl/data.frame)
## $ InvoiceNo : chr [1:541909] "536365" "536365" "536365" "536365" ...
## $ StockCode : chr [1:541909] "85123A" "71053" "84406B" "84029G" ...
## $ Description: chr [1:541909] "WHITE HANGING HEART T-LIGHT HOLDER" "WHITE METAL LANTERN" "CREAM CUPID HEARTS COAT HANGER" "KNITTED UNION FLAG HOT WATER BOTTLE" ...
## $ Quantity : num [1:541909] 6 6 8 6 6 2 6 6 6 32 ...
## $ InvoiceDate: POSIXct[1:541909], format: "2010-12-01 08:26:00" "2010-12-01 08:26:00" ...
## $ UnitPrice : num [1:541909] 2.55 3.39 2.75 3.39 3.39 7.65 4.25 1.85 1.85 1.69 ...
## $ CustomerID : num [1:541909] 17850 17850 17850 17850 17850 ...
## $ Country : chr [1:541909] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" ...
colSums(is.na(onret)) #cek missing value
## InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice
## 0 0 1454 0 0 0
## CustomerID Country
## 135080 0
onret_1 <- onret %>% #menghapus baris yang mengandung missing value
filter(if_all(c(Description, CustomerID),~!is.na(.)))
colSums(is.na(onret_1)) #cek apakah masih ada
## InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice
## 0 0 0 0 0 0
## CustomerID Country
## 0 0
onret_2 <- onret_1 %>% #cek duplikasi
filter(duplicated(.))
onret_3 <- onret_2 %>%
group_by(across(everything())) %>%
filter(n()>1)
onret_3
## # A tibble: 678 × 8
## # Groups: InvoiceNo, StockCode, Description, Quantity, InvoiceDate,
## # UnitPrice, CustomerID, Country [290]
## InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice
## <chr> <chr> <chr> <dbl> <dttm> <dbl>
## 1 536412 21448 12 DAISY PEGS IN … 2 2010-12-01 11:49:00 1.65
## 2 536412 21448 12 DAISY PEGS IN … 2 2010-12-01 11:49:00 1.65
## 3 536464 22866 HAND WARMER SCOTT… 1 2010-12-01 12:23:00 2.1
## 4 536464 22866 HAND WARMER SCOTT… 1 2010-12-01 12:23:00 2.1
## 5 536749 21415 CLAM SHELL SMALL 2 2010-12-02 13:49:00 2.1
## 6 536749 22174 PHOTO CUBE 2 2010-12-02 13:49:00 1.65
## 7 536749 21415 CLAM SHELL SMALL 2 2010-12-02 13:49:00 2.1
## 8 536749 22174 PHOTO CUBE 2 2010-12-02 13:49:00 1.65
## 9 536796 21967 PACK OF 12 SKULL … 1 2010-12-02 15:46:00 0.29
## 10 536796 21967 PACK OF 12 SKULL … 1 2010-12-02 15:46:00 0.29
## # ℹ 668 more rows
## # ℹ 2 more variables: CustomerID <dbl>, Country <chr>
onret_4 <- onret_3 %>% #menghapus duplikasi
distinct()
duplicated(onret_4)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [265] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [277] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [289] FALSE FALSE
dacl <- onret_4 %>% #mengubah kolom menjadi biner
mutate(UnitPrice = ifelse(UnitPrice > 3, 0, 1))
dacl
## # A tibble: 290 × 8
## # Groups: InvoiceNo, StockCode, Description, Quantity, InvoiceDate,
## # UnitPrice, CustomerID, Country [290]
## InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice
## <chr> <chr> <chr> <dbl> <dttm> <dbl>
## 1 536412 21448 12 DAISY PEGS IN … 2 2010-12-01 11:49:00 1
## 2 536464 22866 HAND WARMER SCOTT… 1 2010-12-01 12:23:00 1
## 3 536749 21415 CLAM SHELL SMALL 2 2010-12-02 13:49:00 1
## 4 536749 22174 PHOTO CUBE 2 2010-12-02 13:49:00 1
## 5 536796 21967 PACK OF 12 SKULL … 1 2010-12-02 15:46:00 1
## 6 536874 22866 HAND WARMER SCOTT… 1 2010-12-03 11:35:00 1
## 7 537042 21579 LOLITA DESIGN C… 1 2010-12-05 10:45:00 1
## 8 537051 22730 ALARM CLOCK BAKEL… 1 2010-12-05 11:12:00 0
## 9 537144 22086 PAPER CHAIN KIT 5… 1 2010-12-05 13:00:00 1
## 10 537144 21882 SKULLS TAPE 1 2010-12-05 13:00:00 1
## # ℹ 280 more rows
## # ℹ 2 more variables: CustomerID <dbl>, Country <chr>
head(dacl)
## # A tibble: 6 × 8
## # Groups: InvoiceNo, StockCode, Description, Quantity, InvoiceDate,
## # UnitPrice, CustomerID, Country [6]
## InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice
## <chr> <chr> <chr> <dbl> <dttm> <dbl>
## 1 536412 21448 12 DAISY PEGS IN W… 2 2010-12-01 11:49:00 1
## 2 536464 22866 HAND WARMER SCOTTY… 1 2010-12-01 12:23:00 1
## 3 536749 21415 CLAM SHELL SMALL 2 2010-12-02 13:49:00 1
## 4 536749 22174 PHOTO CUBE 2 2010-12-02 13:49:00 1
## 5 536796 21967 PACK OF 12 SKULL T… 1 2010-12-02 15:46:00 1
## 6 536874 22866 HAND WARMER SCOTTY… 1 2010-12-03 11:35:00 1
## # ℹ 2 more variables: CustomerID <dbl>, Country <chr>
str(dacl)
## gropd_df [290 × 8] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ InvoiceNo : chr [1:290] "536412" "536464" "536749" "536749" ...
## $ StockCode : chr [1:290] "21448" "22866" "21415" "22174" ...
## $ Description: chr [1:290] "12 DAISY PEGS IN WOOD BOX" "HAND WARMER SCOTTY DOG DESIGN" "CLAM SHELL SMALL" "PHOTO CUBE" ...
## $ Quantity : num [1:290] 2 1 2 2 1 1 1 1 1 1 ...
## $ InvoiceDate: POSIXct[1:290], format: "2010-12-01 11:49:00" "2010-12-01 12:23:00" ...
## $ UnitPrice : num [1:290] 1 1 1 1 1 1 1 0 1 1 ...
## $ CustomerID : num [1:290] 17920 17968 17976 17976 15574 ...
## $ Country : chr [1:290] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" ...
## - attr(*, "groups")= tibble [290 × 9] (S3: tbl_df/tbl/data.frame)
## ..$ InvoiceNo : chr [1:290] "536412" "536464" "536749" "536749" ...
## ..$ StockCode : chr [1:290] "21448" "22866" "21415" "22174" ...
## ..$ Description: chr [1:290] "12 DAISY PEGS IN WOOD BOX" "HAND WARMER SCOTTY DOG DESIGN" "CLAM SHELL SMALL" "PHOTO CUBE" ...
## ..$ Quantity : num [1:290] 2 1 2 2 1 1 1 1 1 1 ...
## ..$ InvoiceDate: POSIXct[1:290], format: "2010-12-01 11:49:00" "2010-12-01 12:23:00" ...
## ..$ UnitPrice : num [1:290] 1 1 1 1 1 1 1 0 1 1 ...
## ..$ CustomerID : num [1:290] 17920 17968 17976 17976 15574 ...
## ..$ Country : chr [1:290] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" ...
## ..$ .rows : list<int> [1:290]
## .. ..$ : int 1
## .. ..$ : int 2
## .. ..$ : int 3
## .. ..$ : int 4
## .. ..$ : int 5
## .. ..$ : int 6
## .. ..$ : int 7
## .. ..$ : int 8
## .. ..$ : int 10
## .. ..$ : int 9
## .. ..$ : int 11
## .. ..$ : int 12
## .. ..$ : int 13
## .. ..$ : int 15
## .. ..$ : int 14
## .. ..$ : int 17
## .. ..$ : int 16
## .. ..$ : int 18
## .. ..$ : int 19
## .. ..$ : int 20
## .. ..$ : int 22
## .. ..$ : int 21
## .. ..$ : int 23
## .. ..$ : int 25
## .. ..$ : int 24
## .. ..$ : int 26
## .. ..$ : int 30
## .. ..$ : int 31
## .. ..$ : int 33
## .. ..$ : int 32
## .. ..$ : int 35
## .. ..$ : int 34
## .. ..$ : int 36
## .. ..$ : int 37
## .. ..$ : int 38
## .. ..$ : int 39
## .. ..$ : int 40
## .. ..$ : int 41
## .. ..$ : int 43
## .. ..$ : int 42
## .. ..$ : int 45
## .. ..$ : int 44
## .. ..$ : int 46
## .. ..$ : int 47
## .. ..$ : int 49
## .. ..$ : int 48
## .. ..$ : int 51
## .. ..$ : int 53
## .. ..$ : int 52
## .. ..$ : int 54
## .. ..$ : int 55
## .. ..$ : int 56
## .. ..$ : int 57
## .. ..$ : int 58
## .. ..$ : int 59
## .. ..$ : int 60
## .. ..$ : int 61
## .. ..$ : int 62
## .. ..$ : int 63
## .. ..$ : int 64
## .. ..$ : int 67
## .. ..$ : int 66
## .. ..$ : int 65
## .. ..$ : int 71
## .. ..$ : int 68
## .. ..$ : int 69
## .. ..$ : int 70
## .. ..$ : int 72
## .. ..$ : int 73
## .. ..$ : int 74
## .. ..$ : int 75
## .. ..$ : int 76
## .. ..$ : int 77
## .. ..$ : int 78
## .. ..$ : int 79
## .. ..$ : int 80
## .. ..$ : int 81
## .. ..$ : int 82
## .. ..$ : int 84
## .. ..$ : int 83
## .. ..$ : int 85
## .. ..$ : int 86
## .. ..$ : int 87
## .. ..$ : int 89
## .. ..$ : int 88
## .. ..$ : int 90
## .. ..$ : int 91
## .. ..$ : int 92
## .. ..$ : int 93
## .. ..$ : int 94
## .. ..$ : int 95
## .. ..$ : int 97
## .. ..$ : int 96
## .. ..$ : int 98
## .. ..$ : int 99
## .. ..$ : int 100
## .. ..$ : int 101
## .. ..$ : int 103
## .. ..$ : int 102
## .. .. [list output truncated]
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
Yang diperlukan
dacl_1 <- dacl %>% #buat kolom TotalSales
mutate(TotalSales = Quantity*UnitPrice) #mutate untuk menambahkan kolom
skim(dacl_1[, c("Quantity", "UnitPrice", "TotalSales")])
| Name | dacl_1[, c(“Quantity”, “U… |
| Number of rows | 290 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | Quantity, UnitPrice |
Variable type: numeric
| skim_variable | Quantity | UnitPrice | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| TotalSales | -12 | 1 | 0 | 1 | -12 | NA | -12 | -12 | -12 | -12 | -12 | ▁▁▇▁▁ |
| TotalSales | -1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| TotalSales | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| TotalSales | 1 | 1 | 0 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | ▁▁▇▁▁ |
| TotalSales | 2 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| TotalSales | 2 | 1 | 0 | 1 | 2 | 0 | 2 | 2 | 2 | 2 | 2 | ▁▁▇▁▁ |
| TotalSales | 3 | 0 | 0 | 1 | 0 | NA | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| TotalSales | 3 | 1 | 0 | 1 | 3 | NA | 3 | 3 | 3 | 3 | 3 | ▁▁▇▁▁ |
| TotalSales | 4 | 1 | 0 | 1 | 4 | 0 | 4 | 4 | 4 | 4 | 4 | ▁▁▇▁▁ |
| TotalSales | 5 | 1 | 0 | 1 | 5 | NA | 5 | 5 | 5 | 5 | 5 | ▁▁▇▁▁ |
| TotalSales | 6 | 1 | 0 | 1 | 6 | 0 | 6 | 6 | 6 | 6 | 6 | ▁▁▇▁▁ |
| TotalSales | 8 | 1 | 0 | 1 | 8 | 0 | 8 | 8 | 8 | 8 | 8 | ▁▁▇▁▁ |
| TotalSales | 10 | 1 | 0 | 1 | 10 | NA | 10 | 10 | 10 | 10 | 10 | ▁▁▇▁▁ |
| TotalSales | 12 | 1 | 0 | 1 | 12 | 0 | 12 | 12 | 12 | 12 | 12 | ▁▁▇▁▁ |
| TotalSales | 24 | 1 | 0 | 1 | 24 | 0 | 24 | 24 | 24 | 24 | 24 | ▁▁▇▁▁ |
| TotalSales | 25 | 1 | 0 | 1 | 25 | NA | 25 | 25 | 25 | 25 | 25 | ▁▁▇▁▁ |
| TotalSales | 48 | 1 | 0 | 1 | 48 | NA | 48 | 48 | 48 | 48 | 48 | ▁▁▇▁▁ |
Visualisasi Distribusi
ggplot(dacl_1, aes(x = Quantity)) +
geom_histogram(binwidth = 3, fill = "dodgerblue") +
theme_minimal() +
labs(title = "Distribusi Quantity")
ggplot(dacl_1, aes(x = UnitPrice)) +
geom_histogram(binwidth = 0.5, fill = "turquoise") +
theme_light() +
labs(title = "Distribusi UnitPrice")
ggplot(dacl_1, aes(x = TotalSales)) +
geom_histogram(binwidth = 5, fill = "royalblue") +
theme_classic() +
labs(title = "Distribusi TotalSales")
produk_unggulan <- dacl_1 %>%
group_by(StockCode) %>% #pengelompokkan data berdasarkan stockcode
summarise(TotalSales = sum(TotalSales)) %>% #menghitung total penjualan perproduk
arrange(desc(TotalSales)) %>% #Pengurutan
slice_head(n = 5) #mengambil 5 produk teratas
ggplot(produk_unggulan, aes(x = reorder(StockCode, TotalSales), y = TotalSales)) +
#membuat grafik dari data produk_terlaris
geom_bar(stat = "identity", fill = "olivedrab") + #Untuk membuat grafik
coord_flip() + #Untuk memudahkan membaca nama produk
labs(title = "5 Produk Unggulan Berdasarkan StockCode", x = "StockCode", y = "Total Penjualan") + #Pelabelan dan penjudulan
theme_minimal() +
theme(axis.text.y = element_text(size = 10))
produk_unggulan
## # A tibble: 5 × 2
## StockCode TotalSales
## <chr> <dbl>
## 1 M 48
## 2 21497 25
## 3 17136A 24
## 4 20668 24
## 5 22294 24
tren_penjualan <- dacl_1 %>%
group_by(Tanggal = as.Date(InvoiceDate)) %>% #pengelompokkan data
summarise(TotalSales = sum(TotalSales)) #penjumlahkan total penjualan perhari
ggplot(tren_penjualan, aes(x = Tanggal, y = TotalSales)) +
geom_line(color = "gray13") + #menampilkan grafik
theme_minimal() +
labs(title = "Tren Penjualan Harian", x = "Tanggal", y = "Total Penjualan") #penjudulan dan pelabelan sumbu bertujan untuk mempermudah membaca
tren_penjualan
## # A tibble: 121 × 2
## Tanggal TotalSales
## <date> <dbl>
## 1 2010-12-01 3
## 2 2010-12-02 5
## 3 2010-12-03 1
## 4 2010-12-05 6
## 5 2010-12-06 15
## 6 2010-12-07 1
## 7 2010-12-08 3
## 8 2010-12-10 0
## 9 2010-12-12 1
## 10 2010-12-13 0
## # ℹ 111 more rows