library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
## Warning: package 'readxl' was built under R version 4.5.2
data_or <- read_excel("11 online retail.xlsx")
data_or
## # A tibble: 541,909 × 8
## InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice
## <chr> <chr> <chr> <dbl> <dttm> <dbl>
## 1 536365 85123A WHITE HANGING HEA… 6 2010-12-01 08:26:00 2.55
## 2 536365 71053 WHITE METAL LANTE… 6 2010-12-01 08:26:00 3.39
## 3 536365 84406B CREAM CUPID HEART… 8 2010-12-01 08:26:00 2.75
## 4 536365 84029G KNITTED UNION FLA… 6 2010-12-01 08:26:00 3.39
## 5 536365 84029E RED WOOLLY HOTTIE… 6 2010-12-01 08:26:00 3.39
## 6 536365 22752 SET 7 BABUSHKA NE… 2 2010-12-01 08:26:00 7.65
## 7 536365 21730 GLASS STAR FROSTE… 6 2010-12-01 08:26:00 4.25
## 8 536366 22633 HAND WARMER UNION… 6 2010-12-01 08:28:00 1.85
## 9 536366 22632 HAND WARMER RED P… 6 2010-12-01 08:28:00 1.85
## 10 536367 84879 ASSORTED COLOUR B… 32 2010-12-01 08:34:00 1.69
## # ℹ 541,899 more rows
## # ℹ 2 more variables: CustomerID <dbl>, Country <chr>
#DATA CLEANING
summary(data_or)
## InvoiceNo StockCode Description Quantity
## Length:541909 Length:541909 Length:541909 Min. :-80995.000
## Class :character Class :character Class :character 1st Qu.: 1.000
## Mode :character Mode :character Mode :character Median : 3.000
## Mean : 9.552
## 3rd Qu.: 10.000
## Max. : 80995.000
##
## InvoiceDate UnitPrice CustomerID
## Min. :2010-12-01 08:26:00 Min. :-11062.060 Min. :12346
## 1st Qu.:2011-03-28 11:34:00 1st Qu.: 1.250 1st Qu.:13953
## Median :2011-07-19 17:17:00 Median : 2.080 Median :15152
## Mean :2011-07-04 13:34:57 Mean : 4.611 Mean :15288
## 3rd Qu.:2011-10-19 11:27:00 3rd Qu.: 4.130 3rd Qu.:16791
## Max. :2011-12-09 12:50:00 Max. : 38970.000 Max. :18287
## NA's :135080
## Country
## Length:541909
## Class :character
## Mode :character
##
##
##
##
str(data_or)
## tibble [541,909 × 8] (S3: tbl_df/tbl/data.frame)
## $ InvoiceNo : chr [1:541909] "536365" "536365" "536365" "536365" ...
## $ StockCode : chr [1:541909] "85123A" "71053" "84406B" "84029G" ...
## $ Description: chr [1:541909] "WHITE HANGING HEART T-LIGHT HOLDER" "WHITE METAL LANTERN" "CREAM CUPID HEARTS COAT HANGER" "KNITTED UNION FLAG HOT WATER BOTTLE" ...
## $ Quantity : num [1:541909] 6 6 8 6 6 2 6 6 6 32 ...
## $ InvoiceDate: POSIXct[1:541909], format: "2010-12-01 08:26:00" "2010-12-01 08:26:00" ...
## $ UnitPrice : num [1:541909] 2.55 3.39 2.75 3.39 3.39 7.65 4.25 1.85 1.85 1.69 ...
## $ CustomerID : num [1:541909] 17850 17850 17850 17850 17850 ...
## $ Country : chr [1:541909] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" ...
#cek missing value
colSums(is.na(data_or))
## InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice
## 0 0 1454 0 0 0
## CustomerID Country
## 135080 0
#hapus baris yang mengandung missing value
data_or1 <- data_or %>%
filter(if_all(c(Description, CustomerID),~!is.na(.)))
#cek lagi apakah masih ada untuk memastikan
colSums(is.na(data_or1))
## InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice
## 0 0 0 0 0 0
## CustomerID Country
## 0 0
#cek duplikasi
data_or2 <- data_or1 %>%
filter(duplicated(.))
#atau duplicated(nama_data)
data_or3 <- data_or2 %>%
group_by(across(everything())) %>%
filter(n()>1)
data_or3
## # A tibble: 678 × 8
## # Groups: InvoiceNo, StockCode, Description, Quantity, InvoiceDate,
## # UnitPrice, CustomerID, Country [290]
## InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice
## <chr> <chr> <chr> <dbl> <dttm> <dbl>
## 1 536412 21448 12 DAISY PEGS IN … 2 2010-12-01 11:49:00 1.65
## 2 536412 21448 12 DAISY PEGS IN … 2 2010-12-01 11:49:00 1.65
## 3 536464 22866 HAND WARMER SCOTT… 1 2010-12-01 12:23:00 2.1
## 4 536464 22866 HAND WARMER SCOTT… 1 2010-12-01 12:23:00 2.1
## 5 536749 21415 CLAM SHELL SMALL 2 2010-12-02 13:49:00 2.1
## 6 536749 22174 PHOTO CUBE 2 2010-12-02 13:49:00 1.65
## 7 536749 21415 CLAM SHELL SMALL 2 2010-12-02 13:49:00 2.1
## 8 536749 22174 PHOTO CUBE 2 2010-12-02 13:49:00 1.65
## 9 536796 21967 PACK OF 12 SKULL … 1 2010-12-02 15:46:00 0.29
## 10 536796 21967 PACK OF 12 SKULL … 1 2010-12-02 15:46:00 0.29
## # ℹ 668 more rows
## # ℹ 2 more variables: CustomerID <dbl>, Country <chr>
#delete/menghapus duplicate
data_or4 <- data_or3 %>%
distinct()
duplicated(data_or4)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [265] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [277] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [289] FALSE FALSE
#ubah sebuah kolom menjadi biner
data_or_clean <- data_or4 %>%
mutate(UnitPrice = ifelse(UnitPrice > 3, 0, 1))
data_or_clean
## # A tibble: 290 × 8
## # Groups: InvoiceNo, StockCode, Description, Quantity, InvoiceDate,
## # UnitPrice, CustomerID, Country [290]
## InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice
## <chr> <chr> <chr> <dbl> <dttm> <dbl>
## 1 536412 21448 12 DAISY PEGS IN … 2 2010-12-01 11:49:00 1
## 2 536464 22866 HAND WARMER SCOTT… 1 2010-12-01 12:23:00 1
## 3 536749 21415 CLAM SHELL SMALL 2 2010-12-02 13:49:00 1
## 4 536749 22174 PHOTO CUBE 2 2010-12-02 13:49:00 1
## 5 536796 21967 PACK OF 12 SKULL … 1 2010-12-02 15:46:00 1
## 6 536874 22866 HAND WARMER SCOTT… 1 2010-12-03 11:35:00 1
## 7 537042 21579 LOLITA DESIGN C… 1 2010-12-05 10:45:00 1
## 8 537051 22730 ALARM CLOCK BAKEL… 1 2010-12-05 11:12:00 0
## 9 537144 22086 PAPER CHAIN KIT 5… 1 2010-12-05 13:00:00 1
## 10 537144 21882 SKULLS TAPE 1 2010-12-05 13:00:00 1
## # ℹ 280 more rows
## # ℹ 2 more variables: CustomerID <dbl>, Country <chr>
head(data_or_clean)
## # A tibble: 6 × 8
## # Groups: InvoiceNo, StockCode, Description, Quantity, InvoiceDate,
## # UnitPrice, CustomerID, Country [6]
## InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice
## <chr> <chr> <chr> <dbl> <dttm> <dbl>
## 1 536412 21448 12 DAISY PEGS IN W… 2 2010-12-01 11:49:00 1
## 2 536464 22866 HAND WARMER SCOTTY… 1 2010-12-01 12:23:00 1
## 3 536749 21415 CLAM SHELL SMALL 2 2010-12-02 13:49:00 1
## 4 536749 22174 PHOTO CUBE 2 2010-12-02 13:49:00 1
## 5 536796 21967 PACK OF 12 SKULL T… 1 2010-12-02 15:46:00 1
## 6 536874 22866 HAND WARMER SCOTTY… 1 2010-12-03 11:35:00 1
## # ℹ 2 more variables: CustomerID <dbl>, Country <chr>
str(data_or_clean)
## gropd_df [290 × 8] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ InvoiceNo : chr [1:290] "536412" "536464" "536749" "536749" ...
## $ StockCode : chr [1:290] "21448" "22866" "21415" "22174" ...
## $ Description: chr [1:290] "12 DAISY PEGS IN WOOD BOX" "HAND WARMER SCOTTY DOG DESIGN" "CLAM SHELL SMALL" "PHOTO CUBE" ...
## $ Quantity : num [1:290] 2 1 2 2 1 1 1 1 1 1 ...
## $ InvoiceDate: POSIXct[1:290], format: "2010-12-01 11:49:00" "2010-12-01 12:23:00" ...
## $ UnitPrice : num [1:290] 1 1 1 1 1 1 1 0 1 1 ...
## $ CustomerID : num [1:290] 17920 17968 17976 17976 15574 ...
## $ Country : chr [1:290] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" ...
## - attr(*, "groups")= tibble [290 × 9] (S3: tbl_df/tbl/data.frame)
## ..$ InvoiceNo : chr [1:290] "536412" "536464" "536749" "536749" ...
## ..$ StockCode : chr [1:290] "21448" "22866" "21415" "22174" ...
## ..$ Description: chr [1:290] "12 DAISY PEGS IN WOOD BOX" "HAND WARMER SCOTTY DOG DESIGN" "CLAM SHELL SMALL" "PHOTO CUBE" ...
## ..$ Quantity : num [1:290] 2 1 2 2 1 1 1 1 1 1 ...
## ..$ InvoiceDate: POSIXct[1:290], format: "2010-12-01 11:49:00" "2010-12-01 12:23:00" ...
## ..$ UnitPrice : num [1:290] 1 1 1 1 1 1 1 0 1 1 ...
## ..$ CustomerID : num [1:290] 17920 17968 17976 17976 15574 ...
## ..$ Country : chr [1:290] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" ...
## ..$ .rows : list<int> [1:290]
## .. ..$ : int 1
## .. ..$ : int 2
## .. ..$ : int 3
## .. ..$ : int 4
## .. ..$ : int 5
## .. ..$ : int 6
## .. ..$ : int 7
## .. ..$ : int 8
## .. ..$ : int 10
## .. ..$ : int 9
## .. ..$ : int 11
## .. ..$ : int 12
## .. ..$ : int 13
## .. ..$ : int 15
## .. ..$ : int 14
## .. ..$ : int 17
## .. ..$ : int 16
## .. ..$ : int 18
## .. ..$ : int 19
## .. ..$ : int 20
## .. ..$ : int 22
## .. ..$ : int 21
## .. ..$ : int 23
## .. ..$ : int 25
## .. ..$ : int 24
## .. ..$ : int 26
## .. ..$ : int 30
## .. ..$ : int 31
## .. ..$ : int 33
## .. ..$ : int 32
## .. ..$ : int 35
## .. ..$ : int 34
## .. ..$ : int 36
## .. ..$ : int 37
## .. ..$ : int 38
## .. ..$ : int 39
## .. ..$ : int 40
## .. ..$ : int 41
## .. ..$ : int 43
## .. ..$ : int 42
## .. ..$ : int 45
## .. ..$ : int 44
## .. ..$ : int 46
## .. ..$ : int 47
## .. ..$ : int 49
## .. ..$ : int 48
## .. ..$ : int 51
## .. ..$ : int 53
## .. ..$ : int 52
## .. ..$ : int 54
## .. ..$ : int 55
## .. ..$ : int 56
## .. ..$ : int 57
## .. ..$ : int 58
## .. ..$ : int 59
## .. ..$ : int 60
## .. ..$ : int 61
## .. ..$ : int 62
## .. ..$ : int 63
## .. ..$ : int 64
## .. ..$ : int 67
## .. ..$ : int 66
## .. ..$ : int 65
## .. ..$ : int 71
## .. ..$ : int 68
## .. ..$ : int 69
## .. ..$ : int 70
## .. ..$ : int 72
## .. ..$ : int 73
## .. ..$ : int 74
## .. ..$ : int 75
## .. ..$ : int 76
## .. ..$ : int 77
## .. ..$ : int 78
## .. ..$ : int 79
## .. ..$ : int 80
## .. ..$ : int 81
## .. ..$ : int 82
## .. ..$ : int 84
## .. ..$ : int 83
## .. ..$ : int 85
## .. ..$ : int 86
## .. ..$ : int 87
## .. ..$ : int 89
## .. ..$ : int 88
## .. ..$ : int 90
## .. ..$ : int 91
## .. ..$ : int 92
## .. ..$ : int 93
## .. ..$ : int 94
## .. ..$ : int 95
## .. ..$ : int 97
## .. ..$ : int 96
## .. ..$ : int 98
## .. ..$ : int 99
## .. ..$ : int 100
## .. ..$ : int 101
## .. ..$ : int 103
## .. ..$ : int 102
## .. .. [list output truncated]
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
#buat kolom TotalSales
data_or_clean <- data_or_clean %>%
mutate(TotalSales = Quantity*UnitPrice)
#mutate untuk menambahkan kolom (bisa juga untuk modifikasi)
#ringkasan statistik
library(skimr)
## Warning: package 'skimr' was built under R version 4.5.2
skim(data_or_clean[, c("Quantity", "UnitPrice", "TotalSales")])
| Name | data_or_clean[, c(“Quanti… |
| Number of rows | 290 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | Quantity, UnitPrice |
Variable type: numeric
| skim_variable | Quantity | UnitPrice | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| TotalSales | -12 | 1 | 0 | 1 | -12 | NA | -12 | -12 | -12 | -12 | -12 | ▁▁▇▁▁ |
| TotalSales | -1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| TotalSales | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| TotalSales | 1 | 1 | 0 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | ▁▁▇▁▁ |
| TotalSales | 2 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| TotalSales | 2 | 1 | 0 | 1 | 2 | 0 | 2 | 2 | 2 | 2 | 2 | ▁▁▇▁▁ |
| TotalSales | 3 | 0 | 0 | 1 | 0 | NA | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| TotalSales | 3 | 1 | 0 | 1 | 3 | NA | 3 | 3 | 3 | 3 | 3 | ▁▁▇▁▁ |
| TotalSales | 4 | 1 | 0 | 1 | 4 | 0 | 4 | 4 | 4 | 4 | 4 | ▁▁▇▁▁ |
| TotalSales | 5 | 1 | 0 | 1 | 5 | NA | 5 | 5 | 5 | 5 | 5 | ▁▁▇▁▁ |
| TotalSales | 6 | 1 | 0 | 1 | 6 | 0 | 6 | 6 | 6 | 6 | 6 | ▁▁▇▁▁ |
| TotalSales | 8 | 1 | 0 | 1 | 8 | 0 | 8 | 8 | 8 | 8 | 8 | ▁▁▇▁▁ |
| TotalSales | 10 | 1 | 0 | 1 | 10 | NA | 10 | 10 | 10 | 10 | 10 | ▁▁▇▁▁ |
| TotalSales | 12 | 1 | 0 | 1 | 12 | 0 | 12 | 12 | 12 | 12 | 12 | ▁▁▇▁▁ |
| TotalSales | 24 | 1 | 0 | 1 | 24 | 0 | 24 | 24 | 24 | 24 | 24 | ▁▁▇▁▁ |
| TotalSales | 25 | 1 | 0 | 1 | 25 | NA | 25 | 25 | 25 | 25 | 25 | ▁▁▇▁▁ |
| TotalSales | 48 | 1 | 0 | 1 | 48 | NA | 48 | 48 | 48 | 48 | 48 | ▁▁▇▁▁ |
#menjalankan fungsi skim hanya untuk tiga kolom tsb
#sd = standar deviasi
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.5.2
## corrplot 0.95 loaded
#corrplot biasanya digunakan untuk visualisasi matriks korelasi
cor_matrix <- cor(data_or_clean[, c("Quantity", "UnitPrice", "TotalSales")])
#cor() untuk menghitung korelasi pearson (ukuran statistik yang menunjukkan seberapa kuat dan juga menunjukkan arah hubungan linear antara dua variabel numerik) antar kolom numerik yang sudah kita pilih
#data_or_clean [,c(...)] agar hanya memilih kolom tertentu
corrplot(cor_matrix, method = "circle", type = "upper")
#corrplot() dipakai untuk membuat grafik korelasi dari matriks yang sudah dihitung
#method = "circle" untuk menampilkan korelasi dengan menggunakan bentuk lingkaran
#type = "upper" agar gambar hanya menampilkan bagian atas dari matriks (karena bagian bawahnya juga simetris, kalau mau coba bisa pakai "lower" untuk melihat bagian bawahnya saja atau "full" untuk melihat semuanya)
#
library(ggplot2)
#Untuk memanggil package ggplot2
ggplot(data_or_clean, aes(x = Quantity)) +
#Untuk membuat plot dengan sumbu X sebagai Quantity
#+ digunakan untuk menambahkan elemen grafik satu per satu
geom_histogram(binwidth = 3, fill = "skyblue") +
#binwidth itu menyatakan lebar tiap batangnya, dalam hal ini 1 yang artinya 3 unit
#fill untuk warna histogram
theme_minimal() +
#untuk mengatur tema yang digunakan, untuk minimal dapat menghapus latar belakang abu-abu dan menipiskan garis grid agar dapat lebih fokus ke data saja
labs(title = "Distribusi Quantity")
#untuk memberi judul grafik
ggplot(data_or_clean, aes(x = UnitPrice)) +
geom_histogram(binwidth = 0.5, fill = "salmon") +
theme_light() +
labs(title = "Distribusi UnitPrice")
ggplot(data_or_clean, aes(x = TotalSales)) +
geom_histogram(binwidth = 5, fill = "lightgreen") +
#binwidth = 5 karena nilai penjualannya lebih besar, agar seimbang jadi lebar batangnya diperbesar juga
theme_classic() +
labs(title = "Distribusi TotalSales")
- d.) Produk Terlaris
produk_terlaris <- data_or_clean %>%
group_by(StockCode) %>%
#mengelompokkan data berdasarkan kode produk (stockcode)
summarise(TotalSales = sum(TotalSales)) %>%
#menghitung total penjualan setiap produk
arrange(desc(TotalSales)) %>%
#mengurutkan hasil dari yang penjualannya paling tinggi ke paling rendah
slice_head(n = 5)
#mengambil 5 produk teratas
ggplot(produk_terlaris, aes(x = reorder(StockCode, TotalSales), y = TotalSales)) +
#membuat grafik dari data produk_terlaris
#aes(x=reorder(...), y=total sales) artinya untuk sumbu X adalah nama produk, diurutkan berdasarkan totalsales, untuk sumbu Y adalah nilai penjualan
geom_bar(stat = "identity", fill = "maroon") +
#Untuk membuat grafik batang dengan tinggi sesuai nilai TotalSales
coord_flip() +
#Untuk membalikkan sumbu X dan Y agar menjadi grafik horizontal, supaya lebih mudah membaca nama produk
labs(title = "5 Produk Terlaris Berdasarkan StockCode", x = "StockCode", y = "Total Penjualan") +
#untuk menambahkan label dan judul ke grafik
theme_minimal() +
theme(axis.text.y = element_text(size = 10))
produk_terlaris
## # A tibble: 5 × 2
## StockCode TotalSales
## <chr> <dbl>
## 1 M 48
## 2 21497 25
## 3 17136A 24
## 4 20668 24
## 5 22294 24
penjualan_harian <- data_or_clean %>%
group_by(Tanggal = as.Date(InvoiceDate)) %>%
#mengelompokkan data berdasarkan tanggal transaksi
summarise(TotalSales = sum(TotalSales))
#untuk menjumlahkan total penjualan per hari
ggplot(penjualan_harian, aes(x = Tanggal, y = TotalSales)) +
geom_line(color = "darkblue") +
#menampilkan grafik garis berwarna biru tua
theme_minimal() +
labs(title = "Tren Penjualan Harian", x = "Tanggal", y = "Total Penjualan")
#untuk memberi judul dan label sumbu agar grafik mudah dibaca