Central Tendency
Exercises ~ Week 6
1 Pendahuluan
Analisis central tendency merupakan teknik statistik dasar yang digunakan untuk mengidentifikasi nilai tengah atau titik pusat dari suatu distribusi data. Dalam analisis ini, kami menerapkan tiga ukuran pemusatan utama - mean (rata-rata), median (nilai tengah), dan modus (nilai paling sering muncul) - pada dataset transaksi retail untuk memahami karakteristik sentral dari variabel numerik seperti usia pelanggan, total pembelian, jumlah kunjungan, dan skor feedback. Ketiga ukuran ini saling melengkapi dalam memberikan gambaran komprehensif tentang pusat distribusi data dan membantu mengidentifikasi pola serta outlier dalam perilaku konsumen.
2 Import Data
# Import dari excel
data = read.csv("C:/Users/Iyan/Downloads/Central Tendency.csv",
header = TRUE, sep = "," )
knitr::kable(data, caption = "Customer Purchase Data")| X | CustomerID | Age | Gender | StoreLocation | ProductCategory | TotalPurchase | NumberOfVisits | FeedbackScore |
|---|---|---|---|---|---|---|---|---|
| 1 | 1 | 32 | M | West | Electronics | 528 | 4 | 1 |
| 2 | 2 | 37 | F | South | Books | 72 | 4 | 5 |
| 3 | 3 | 63 | M | West | Electronics | 327 | 4 | 2 |
| 4 | 4 | 41 | M | North | Sports | 391 | 7 | 1 |
| 5 | 5 | 42 | F | East | Electronics | 514 | 7 | 5 |
| 6 | 6 | 66 | F | East | Sports | 381 | 6 | 3 |
| 7 | 7 | 47 | M | East | Sports | 510 | 5 | 1 |
| 8 | 8 | 21 | F | South | Clothing | 102 | 4 | 2 |
| 9 | 9 | 30 | F | North | Sports | 559 | 2 | 2 |
| 10 | 10 | 33 | M | South | Books | 27 | 5 | 2 |
| 11 | 11 | 58 | F | East | Clothing | 40 | 3 | 5 |
| 12 | 12 | 45 | M | North | Electronics | 217 | 6 | 5 |
| 13 | 13 | 46 | F | South | Home | 118 | 4 | 4 |
| 14 | 14 | 42 | F | North | Sports | 532 | 6 | 3 |
| 15 | 15 | 32 | F | South | Books | 25 | 3 | 3 |
| 16 | 16 | 67 | F | South | Home | 87 | 4 | 1 |
| 17 | 17 | 47 | M | West | Home | 77 | 7 | 3 |
| 18 | 18 | 18 | F | East | Books | 80 | 7 | 3 |
| 19 | 19 | 51 | F | South | Electronics | 209 | 2 | 1 |
| 20 | 20 | 33 | F | South | Electronics | 232 | 7 | 4 |
| 21 | 21 | 24 | M | South | Books | 23 | 3 | 4 |
| 22 | 22 | 37 | M | South | Sports | 444 | 5 | 3 |
| 23 | 23 | 25 | M | South | Home | 127 | 5 | 3 |
| 24 | 24 | 29 | F | West | Clothing | 90 | 5 | 3 |
| 25 | 25 | 31 | M | West | Electronics | 165 | 6 | 1 |
| 26 | 26 | 18 | M | East | Books | 77 | 5 | 5 |
| 27 | 27 | 53 | F | North | Books | 52 | 10 | 4 |
| 28 | 28 | 42 | F | North | Books | 91 | 10 | 3 |
| 29 | 29 | 23 | F | East | Sports | 390 | 4 | 5 |
| 30 | 30 | 59 | M | East | Clothing | 127 | 5 | 1 |
| 31 | 31 | 46 | M | East | Clothing | 81 | 4 | 5 |
| 32 | 32 | 36 | M | West | Sports | 514 | 5 | 1 |
| 33 | 33 | 53 | F | East | Home | 101 | 3 | 1 |
| 34 | 34 | 53 | M | South | Books | 68 | 1 | 3 |
| 35 | 35 | 52 | F | North | Sports | 471 | 2 | 5 |
| 36 | 36 | 50 | M | North | Sports | 621 | 2 | 5 |
| 37 | 37 | 48 | F | East | Electronics | 327 | 6 | 3 |
| 38 | 38 | 39 | F | South | Home | 107 | 5 | 5 |
| 39 | 39 | 35 | F | West | Home | 132 | 7 | 4 |
| 40 | 40 | 34 | F | West | Electronics | 1128 | 5 | 4 |
| 41 | 41 | 30 | F | South | Electronics | 247 | 3 | 4 |
| 42 | 42 | 37 | M | West | Electronics | 382 | 5 | 3 |
| 43 | 43 | 21 | M | East | Home | 117 | 8 | 4 |
| 44 | 44 | 70 | F | West | Home | 81 | 7 | 5 |
| 45 | 45 | 58 | M | West | Books | 97 | 5 | 4 |
| 46 | 46 | 23 | M | North | Clothing | 66 | 4 | 2 |
| 47 | 47 | 34 | M | East | Clothing | 158 | 6 | 4 |
| 48 | 48 | 33 | F | West | Books | 27 | 7 | 3 |
| 49 | 49 | 52 | F | East | Clothing | 80 | 8 | 1 |
| 50 | 50 | 39 | M | West | Electronics | 104 | 3 | 3 |
| 51 | 51 | 44 | M | North | Sports | 554 | 4 | 5 |
| 52 | 52 | 40 | F | North | Home | 33 | 3 | 5 |
| 53 | 53 | 39 | F | East | Sports | 532 | 7 | 2 |
| 54 | 54 | 61 | F | East | Electronics | 374 | 3 | 3 |
| 55 | 55 | 37 | F | West | Clothing | 77 | 7 | 3 |
| 56 | 56 | 63 | M | South | Books | 33 | 6 | 1 |
| 57 | 57 | 18 | M | South | Books | 82 | 8 | 2 |
| 58 | 58 | 49 | M | West | Electronics | 144 | 5 | 4 |
| 59 | 59 | 42 | F | East | Home | 37 | 8 | 3 |
| 60 | 60 | 43 | F | South | Electronics | 270 | 7 | 4 |
| 61 | 61 | 46 | M | East | Books | 61 | 7 | 2 |
| 62 | 62 | 32 | F | East | Sports | 553 | 4 | 4 |
| 63 | 63 | 35 | F | South | Books | 95 | 3 | 4 |
| 64 | 64 | 25 | F | East | Home | 101 | 4 | 2 |
| 65 | 65 | 24 | M | East | Home | 82 | 6 | 1 |
| 66 | 66 | 45 | F | North | Books | 86 | 4 | 1 |
| 67 | 67 | 47 | F | East | Sports | 451 | 4 | 5 |
| 68 | 68 | 41 | M | North | Sports | 417 | 2 | 1 |
| 69 | 69 | 54 | F | South | Clothing | 83 | 5 | 5 |
| 70 | 70 | 70 | M | West | Home | 74 | 3 | 4 |
| 71 | 71 | 33 | F | West | Home | 76 | 7 | 1 |
| 72 | 72 | 18 | M | North | Electronics | 384 | 2 | 1 |
| 73 | 73 | 55 | F | East | Sports | 417 | 6 | 3 |
| 74 | 74 | 29 | M | East | Clothing | 51 | 7 | 5 |
| 75 | 75 | 30 | M | South | Sports | 574 | 2 | 5 |
| 76 | 76 | 55 | F | West | Books | 82 | 4 | 1 |
| 77 | 77 | 36 | M | West | Clothing | 65 | 6 | 1 |
| 78 | 78 | 22 | F | South | Home | 139 | 6 | 3 |
| 79 | 79 | 43 | F | North | Sports | 355 | 4 | 1 |
| 80 | 80 | 38 | M | South | Sports | 548 | 8 | 1 |
| 81 | 81 | 40 | F | East | Clothing | 34 | 4 | 3 |
| 82 | 82 | 46 | M | East | Clothing | 151 | 4 | 3 |
| 83 | 83 | 34 | F | East | Books | 68 | 4 | 2 |
| 84 | 84 | 50 | M | South | Clothing | 39 | 5 | 1 |
| 85 | 85 | 37 | M | North | Electronics | 98 | 5 | 5 |
| 86 | 86 | 45 | F | South | Books | 37 | 2 | 4 |
| 87 | 87 | 56 | F | West | Electronics | 654 | 6 | 5 |
| 88 | 88 | 47 | M | East | Home | 80 | 3 | 2 |
| 89 | 89 | 35 | F | South | Clothing | 89 | 6 | 5 |
| 90 | 90 | 57 | M | East | Home | 41 | 5 | 1 |
| 91 | 91 | 55 | M | West | Clothing | 59 | 6 | 3 |
| 92 | 92 | 48 | F | East | Clothing | 105 | 5 | 1 |
| 93 | 93 | 44 | F | North | Sports | 601 | 4 | 1 |
| 94 | 94 | 31 | M | South | Clothing | 44 | 3 | 5 |
| 95 | 95 | 60 | F | South | Sports | 471 | 5 | 2 |
| 96 | 96 | 31 | M | South | Sports | 540 | 11 | 1 |
| 97 | 97 | 70 | F | West | Electronics | 186 | 4 | 2 |
| 98 | 98 | 63 | F | West | Electronics | 256 | 4 | 3 |
| 99 | 99 | 36 | M | South | Home | 27 | 2 | 1 |
| 100 | 100 | 25 | M | West | Electronics | 92 | 5 | 5 |
| 101 | 101 | 29 | F | South | Home | 71 | 5 | 2 |
| 102 | 102 | 44 | F | North | Electronics | 173 | 6 | 2 |
| 103 | 103 | 36 | F | North | Clothing | 91 | 9 | 1 |
| 104 | 104 | 35 | F | East | Electronics | 400 | 5 | 3 |
| 105 | 105 | 26 | F | West | Sports | 427 | 2 | 1 |
| 106 | 106 | 39 | F | North | Sports | 400 | 4 | 4 |
| 107 | 107 | 28 | M | East | Books | 25 | 7 | 1 |
| 108 | 108 | 18 | M | East | Clothing | 32 | 7 | 4 |
| 109 | 109 | 34 | M | South | Books | 85 | 7 | 2 |
| 110 | 110 | 54 | F | West | Sports | 468 | 8 | 3 |
| 111 | 111 | 31 | F | East | Electronics | 87 | 6 | 3 |
| 112 | 112 | 49 | F | South | Sports | 491 | 5 | 5 |
| 113 | 113 | 18 | M | North | Books | 66 | 3 | 1 |
| 114 | 114 | 39 | F | West | Clothing | 33 | 6 | 3 |
| 115 | 115 | 48 | M | North | Home | 107 | 2 | 1 |
| 116 | 116 | 45 | F | West | Clothing | 476 | 5 | 4 |
| 117 | 117 | 42 | F | West | Electronics | 226 | 8 | 3 |
| 118 | 118 | 30 | M | South | Clothing | 62 | 7 | 1 |
| 119 | 119 | 27 | F | North | Sports | 542 | 6 | 3 |
| 120 | 120 | 25 | M | East | Sports | 339 | 3 | 1 |
| 121 | 121 | 42 | F | South | Clothing | 37 | 5 | 3 |
| 122 | 122 | 26 | F | West | Clothing | 19 | 10 | 4 |
| 123 | 123 | 33 | F | West | Books | 34 | 5 | 1 |
| 124 | 124 | 36 | F | West | Home | 110 | 4 | 1 |
| 125 | 125 | 68 | M | South | Clothing | 107 | 2 | 2 |
| 126 | 126 | 30 | M | East | Sports | 529 | 2 | 5 |
| 127 | 127 | 44 | M | North | Electronics | 156 | 7 | 2 |
| 128 | 128 | 41 | F | West | Home | 75 | 4 | 5 |
| 129 | 129 | 26 | M | North | Sports | 458 | 8 | 3 |
| 130 | 130 | 39 | F | South | Clothing | 78 | 5 | 2 |
| 131 | 131 | 62 | M | East | Electronics | 517 | 5 | 4 |
| 132 | 132 | 47 | M | North | Books | 30 | 8 | 5 |
| 133 | 133 | 41 | F | North | Books | 78 | 3 | 1 |
| 134 | 134 | 34 | M | East | Sports | 448 | 7 | 5 |
| 135 | 135 | 18 | F | East | Electronics | 373 | 4 | 1 |
| 136 | 136 | 57 | M | South | Clothing | 52 | 5 | 1 |
| 137 | 137 | 18 | F | West | Sports | 609 | 4 | 2 |
| 138 | 138 | 51 | M | North | Electronics | 250 | 2 | 2 |
| 139 | 139 | 69 | M | West | Electronics | 282 | 6 | 1 |
| 140 | 140 | 18 | F | East | Clothing | 66 | 3 | 3 |
| 141 | 141 | 51 | M | East | Clothing | 116 | 6 | 1 |
| 142 | 142 | 36 | F | East | Books | 30 | 5 | 2 |
| 143 | 143 | 18 | M | West | Sports | 525 | 3 | 3 |
| 144 | 144 | 18 | F | North | Clothing | 105 | 11 | 1 |
| 145 | 145 | 18 | M | East | Clothing | 78 | 5 | 1 |
| 146 | 146 | 32 | M | East | Home | 43 | 2 | 2 |
| 147 | 147 | 18 | F | North | Electronics | 136 | 8 | 4 |
| 148 | 148 | 50 | M | South | Sports | 567 | 7 | 5 |
| 149 | 149 | 70 | M | North | Clothing | 33 | 5 | 1 |
| 150 | 150 | 21 | F | South | Clothing | 160 | 4 | 5 |
| 151 | 151 | 52 | F | West | Books | 30 | 7 | 2 |
| 152 | 152 | 52 | F | South | Books | 65 | 9 | 1 |
| 153 | 153 | 45 | M | North | Books | 30 | 7 | 4 |
| 154 | 154 | 25 | M | East | Home | 113 | 3 | 4 |
| 155 | 155 | 38 | M | North | Home | 141 | 9 | 3 |
| 156 | 156 | 36 | M | South | Books | 89 | 5 | 2 |
| 157 | 157 | 48 | M | South | Books | 33 | 1 | 4 |
| 158 | 158 | 34 | M | North | Clothing | 79 | 3 | 3 |
| 159 | 159 | 55 | M | South | Home | 29 | 1 | 3 |
| 160 | 160 | 34 | F | North | Clothing | 119 | 7 | 2 |
| 161 | 161 | 56 | F | West | Clothing | 135 | 6 | 1 |
| 162 | 162 | 24 | M | North | Books | 32 | 8 | 4 |
| 163 | 163 | 21 | M | West | Home | 164 | 4 | 5 |
| 164 | 164 | 70 | F | South | Sports | 426 | 9 | 5 |
| 165 | 165 | 34 | M | South | Sports | 514 | 7 | 5 |
| 166 | 166 | 44 | M | West | Clothing | 58 | 7 | 5 |
| 167 | 167 | 50 | F | South | Clothing | 81 | 5 | 2 |
| 168 | 168 | 33 | M | West | Electronics | 576 | 9 | 2 |
| 169 | 169 | 48 | M | South | Sports | 424 | 5 | 5 |
| 170 | 170 | 46 | M | West | Home | 60 | 8 | 3 |
| 171 | 171 | 37 | M | East | Sports | 480 | 4 | 1 |
| 172 | 172 | 41 | M | North | Home | 97 | 5 | 2 |
| 173 | 173 | 39 | M | West | Electronics | 225 | 8 | 2 |
| 174 | 174 | 70 | F | North | Clothing | 11 | 6 | 5 |
| 175 | 175 | 29 | M | North | Books | 32 | 1 | 5 |
| 176 | 176 | 24 | M | West | Sports | 597 | 3 | 1 |
| 177 | 177 | 41 | F | North | Home | 127 | 5 | 1 |
| 178 | 178 | 45 | F | East | Home | 38 | 4 | 2 |
| 179 | 179 | 47 | M | West | Home | 129 | 11 | 2 |
| 180 | 180 | 33 | F | South | Books | 76 | 7 | 2 |
| 181 | 181 | 24 | M | North | Sports | 546 | 4 | 3 |
| 182 | 182 | 59 | M | North | Sports | 338 | 6 | 3 |
| 183 | 183 | 35 | M | East | Clothing | 83 | 8 | 5 |
| 184 | 184 | 27 | F | East | Clothing | 300 | 6 | 1 |
| 185 | 185 | 36 | M | North | Clothing | 66 | 6 | 3 |
| 186 | 186 | 37 | F | North | Clothing | 53 | 3 | 2 |
| 187 | 187 | 57 | F | West | Clothing | 98 | 5 | 5 |
| 188 | 188 | 41 | M | South | Sports | 472 | 4 | 1 |
| 189 | 189 | 51 | M | West | Electronics | 367 | 5 | 3 |
| 190 | 190 | 33 | M | West | Electronics | 385 | 3 | 2 |
| 191 | 191 | 43 | M | East | Electronics | 245 | 4 | 4 |
| 192 | 192 | 35 | F | West | Electronics | 136 | 6 | 1 |
| 193 | 193 | 41 | M | North | Sports | 368 | 5 | 1 |
| 194 | 194 | 27 | F | West | Electronics | 182 | 3 | 3 |
| 195 | 195 | 20 | F | North | Clothing | 126 | 4 | 2 |
| 196 | 196 | 70 | M | East | Sports | 304 | 6 | 1 |
| 197 | 197 | 49 | M | North | Books | 29 | 5 | 2 |
| 198 | 198 | 21 | F | West | Books | 32 | 3 | 3 |
| 199 | 199 | 31 | M | South | Sports | 409 | 2 | 2 |
| 200 | 200 | 22 | F | South | Books | 83 | 9 | 4 |
3 Mean (Rata-rata)
3.1 Definisi
Mean (rata-rata aritmetika) adalah nilai tunggal yang berfungsi mewakili keseluruhan kumpulan data dengan cara menyeimbangkan total nilai yang ada. Rata-rata diperoleh dengan membagi jumlah semua nilai data dengan jumlah total observasi.
Rata-rata dihitung dengan rumus:
\[\bar{X} = \frac{\sum_{i=1}^{n} X_i}{n}\]
Di mana:
- \(\bar{X}\): rata-rata
- \(X_i\): setiap nilai data
- \(n\): jumlah pengamatan
3.2 Aturan & Karakteristik:
- Dipengaruhi oleh semua nilai dalam dataset
- Sensitif terhadap outlier (nilai ekstrem)
- Cocok untuk data yang berdistribusi normal
- Dapat berupa bilangan desimal meskipun data berupa bilangan bulat
3.3 Langkah Perhitungan:
- Jumlahkan semua nilai data
- Hitung banyaknya data (n)
- Bagi total jumlah dengan n
3.4 Contoh:
Data: 10, 20, 30, 40, 50
Rumus mean:
\[\bar{X} = \frac{\sum_{i=1}^{n} X_i}{n}\]
Perhitungan:
\[\bar{X} = \frac{10 + 20 + 30 + 40 + 50}{5} = 30\]
Nilai mean datanya adalah: 30
4 Median (Nilai Tengah)
4.1 Definisi:
Median adalah nilai yang membagi suatu set data menjadi dua bagian yang sama besar. Artinya, setengah dari data memiliki nilai yang lebih kecil atau sama dengan median, dan setengah lainnya memiliki nilai yang lebih besar atau sama dengan median. Median cocok untuk data ordinal, interval dan rasio.
4.2 Aturan & Karakteristik:
- Tidak dipengaruhi oleh outlier (robust)
- Cocok untuk data yang tidak berdistribusi normal
- Lebih representatif ketika ada nilai ekstrem
4.3 Langkah Perhitungan:
Untuk data ganjil:
- Urutkan data dari terkecil ke terbesar
- Median = nilai tepat di tengah
Untuk data genap:
- Urutkan data dari terkecil ke terbesar
- Median = rata-rata dari dua nilai tengah
4.4 Contoh:
Data Ganjil: 5, 7, 8, 12, 15, 18, 20
\(n = 7 \Rightarrow Median = X_{(4)} = 12\)
Karena terdapat 7 titik data (angka ganjil), median terletak pada posisi (n + 1) / 2 = ke-4 ketika data diurutkan secara menaik. Oleh karena itu, nilai ke-4, yaitu 12, menjadi median - nilai pusat yang membagi kumpulan data menjadi dua bagian yang sama besar:
- Setengah bagian bawah: 5, 7, 8
- Setengah bagian atas: 15, 18, 20
Data Genap: 5, 7, 8, 12, 15, 18
\(n = 6 \Rightarrow Median = \frac{X_{(3)} + X_{(4)}}{2} = \frac{8 + 12}{2} = 10\)
Karena terdapat 6 titik data (angka genap), median adalah rata-rata dari nilai ke-3 dan ke-4 ketika data diurutkan secara menaik. Oleh karena itu, nilai ke-3 yaitu 8, dan nilai ke-4 yaitu 12, sehingga median adalah (8 + 12) / 2 = 10:
- Setengah bagian bawah: 5, 7, 8
- Setengah bagian atas: 12, 15, 18
5 Modus (Nilai Paling Sering Muncul)
5.1 Definisi:
Modus adalah nilai yang paling sering muncul dalam suatu dataset. Dengan kata lain, modus adalah nilai dengan frekuensi tertinggi.
5.2 Aturan & Karakteristik:
- Dapat digunakan untuk data kategorikal dan numerik
- Satu dataset bisa memiliki:
- Unimodal: satu modus
- Bimodal: dua modus
- Multimodal: lebih dari dua modus
- Tidak ada modus: semua nilai frekuensi sama
- Tidak dipengaruhi oleh nilai ekstrem
5.3 Langkah Perhitungan:
- Hitung frekuensi kemunculan setiap nilai
- Cari nilai dengan frekuensi tertinggi
- Jika ada beberapa nilai dengan frekuensi sama tertinggi, semuanya adalah modus
5.4 Contoh:
Data: 5, 7, 7, 8, 8, 8, 10
Frekuensi:
- 5 = 1 (muncul 1 kali)
- 7 = 2 (muncul 2 kali)
- 8 = 3 (muncul 3 kali)
- 10 = 1 (muncul 1 kali)
Modus = 8 (muncul 3 kali)
Data: 5, 5, 7, 7, 8
Frekuensi:
- 5 = 2 (muncul 2 kali)
- 7 = 2 (muncul 2 kali)
- 8 = 1 (muncul 1 kali)
Bimodal = 5, 7, (muncul 2 kali)
Data: 1, 1, 4, 6, 6, 3, 9, 9, 5, 7, 7
Frekuensi:
- 1 = 2 (muncul 2 kali)
- 3 = 1 (muncul 1 kali)
- 4 = 1 (muncul 1 kali)
- 5 = 1 (muncul 1 kali)
- 6 = 2 (muncul 2 kali)
- 7 = 2 (muncul 2 kali)
- 9 = 2 (muncul 2 kali)
Multimodal = 1, 6, 7, 9 (muncul 2 kali)
Data: 2, 2, 2, 3, 3, 3
Frekuensi:
- 2 = 3 (muncul 3 kali)
- 3 = 3 (muncul 3 kali)
Tanpa Modus = Semua nilai muncul dengan frekuensi yang sama (3 kali)
6 Pemilihan Berdasarkan Jenis Data
6.1 Gunakan Mean ketika:
- Data berdistribusi normal
- Tidak ada outlier yang signifikan
- Membutuhkan semua nilai diperhitungkan
6.2 Gunakan Median ketika:
- Ada outlier (nilai ekstrem)
- Data skewed (miring)
- Data ordinal (peringkat)
6.3 Gunakan Modus ketika:
- Data kategorikal (jenis kelamin, kategori produk)
- Mengetahui nilai paling populer
- Data nominal (nama, label)
6.4 Tips
- Selalu hitung ketiganya untuk memahami karakteristik data
- Visualisasikan dengan histogram untuk melihat distribusi
- Pertimbangkan konteks bisnis saat memilih mana yang digunakan
- Waspada outlier - bisa mempengaruhi mean secara signifikan
7 Visualisasi
- Persiapan Data dan Library
knitr::opts_chunk$set(fig.width=10, fig.height=7)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forcats)
library(scales)
# Read the data
df <- read.csv("Central Tendency.csv")
# Remove the first empty column if it exists
df <- df[, -1]
# Convert to appropriate data types
df$Gender <- as.factor(df$Gender)
df$StoreLocation <- as.factor(df$StoreLocation)
df$ProductCategory <- as.factor(df$ProductCategory)
df$FeedbackScore <- as.factor(df$FeedbackScore)
# Create age groups for better visualization
df$AgeGroup <- cut(df$Age,
breaks = c(17, 25, 35, 45, 55, 70),
labels = c("18-25", "26-35", "36-45", "46-55", "56-70"))- Gender Distribution (Dengan sumbu Y yang lengkap)
gender_summary <- df %>%
count(Gender) %>%
mutate(Percentage = n / sum(n) * 100)
gender_plot <- ggplot(gender_summary, aes(x = reorder(Gender, -n), y = n, fill = Gender)) +
geom_bar(stat = "identity", alpha = 0.8) +
geom_text(aes(label = paste0(n, " (", round(Percentage, 1), "%)")),
vjust = -0.5, size = 4, fontface = "bold") +
scale_fill_manual(values = c("M" = "#1f77b4", "F" = "#ff7f0e")) +
scale_y_continuous(limits = c(0, max(gender_summary$n) * 1.15), # Tambah 15% space untuk label
expand = expansion(mult = c(0, 0.1))) + # Expand area plot
labs(title = "DISTRIBUSI JENIS KELAMIN PELANGGAN",
subtitle = "Persebaran pelanggan berdasarkan gender",
x = "Jenis Kelamin",
y = "Jumlah Pelanggan") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5, size = 10),
axis.text = element_text(size = 10),
axis.title = element_text(size = 11),
legend.position = "none")
print(gender_plot)- Store Location Distribution (Sumbu Y lengkap)
location_summary <- df %>%
count(StoreLocation) %>%
arrange(desc(n)) %>%
mutate(Percentage = n / sum(n) * 100)
location_plot <- ggplot(location_summary, aes(x = reorder(StoreLocation, -n), y = n, fill = StoreLocation)) +
geom_bar(stat = "identity", alpha = 0.8) +
geom_text(aes(label = paste0(n, " (", round(Percentage, 1), "%)")),
vjust = -0.5, size = 4, fontface = "bold") +
scale_fill_brewer(palette = "Set2") +
scale_y_continuous(limits = c(0, max(location_summary$n) * 1.15),
expand = expansion(mult = c(0, 0.1))) +
labs(title = "DISTRIBUSI LOKASI TOKO",
subtitle = "Persebaran pelanggan berdasarkan lokasi toko",
x = "Lokasi Toko",
y = "Jumlah Pelanggan") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5, size = 10),
axis.text = element_text(size = 10),
axis.title = element_text(size = 11),
legend.position = "none")
print(location_plot)- Product Category Distribution (Sumbu Y lengkap)
category_summary <- df %>%
count(ProductCategory) %>%
arrange(desc(n)) %>%
mutate(Percentage = n / sum(n) * 100)
category_plot <- ggplot(category_summary, aes(x = reorder(ProductCategory, -n), y = n, fill = ProductCategory)) +
geom_bar(stat = "identity", alpha = 0.8) +
geom_text(aes(label = paste0(n, " (", round(Percentage, 1), "%)")),
vjust = -0.5, size = 4, fontface = "bold") +
scale_fill_brewer(palette = "Set3") +
scale_y_continuous(limits = c(0, max(category_summary$n) * 1.2),
expand = expansion(mult = c(0, 0.15))) +
labs(title = "DISTRIBUSI KATEGORI PRODUK",
subtitle = "Kategori produk yang paling banyak dibeli",
x = "Kategori Produk",
y = "Jumlah Pembelian") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5, size = 10),
axis.text = element_text(size = 10),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title = element_text(size = 11),
legend.position = "none")
print(category_plot)- Feedback Score Distribution (Sumbu Y lengkap)
feedback_summary <- df %>%
count(FeedbackScore) %>%
mutate(FeedbackScore = factor(FeedbackScore, levels = 1:5),
Percentage = n / sum(n) * 100)
feedback_plot <- ggplot(feedback_summary, aes(x = FeedbackScore, y = n, fill = FeedbackScore)) +
geom_bar(stat = "identity", alpha = 0.8) +
geom_text(aes(label = paste0(n, " (", round(Percentage, 1), "%)")),
vjust = -0.5, size = 4, fontface = "bold") +
scale_fill_brewer(palette = "RdYlGn", direction = -1) +
scale_y_continuous(limits = c(0, max(feedback_summary$n) * 1.15),
expand = expansion(mult = c(0, 0.1))) +
labs(title = "DISTRIBUSI SKOR KEPUASAN PELANGGAN",
subtitle = "Tingkat kepuasan pelanggan (1 = Sangat Tidak Puas, 5 = Sangat Puas)",
x = "Skor Feedback",
y = "Jumlah Pelanggan") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5, size = 10),
axis.text = element_text(size = 10),
axis.title = element_text(size = 11),
legend.position = "none")
print(feedback_plot)- Average Total Purchase by Category (Sumbu Y lengkap)
avg_purchase <- df %>%
group_by(ProductCategory) %>%
summarise(AvgPurchase = mean(TotalPurchase)) %>%
arrange(desc(AvgPurchase))
avg_purchase_plot <- ggplot(avg_purchase, aes(x = reorder(ProductCategory, -AvgPurchase), y = AvgPurchase,
fill = ProductCategory)) +
geom_bar(stat = "identity", alpha = 0.8) +
geom_text(aes(label = paste0("$", round(AvgPurchase, 1))),
vjust = -0.5, size = 4, fontface = "bold") +
scale_fill_brewer(palette = "Set1") +
scale_y_continuous(limits = c(0, max(avg_purchase$AvgPurchase) * 1.15),
expand = expansion(mult = c(0, 0.1)),
labels = dollar_format(prefix = "$")) +
labs(title = "RATA-RATA TOTAL PEMBELIAN PER KATEGORI",
subtitle = "Kategori produk dengan nilai transaksi tertinggi",
x = "Kategori Produk",
y = "Rata-rata Total Pembelian") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5, size = 10),
axis.text = element_text(size = 10),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title = element_text(size = 11),
legend.position = "none")
print(avg_purchase_plot)- Product Category by Gender (Stacked dengan sumbu Y lengkap)
category_gender <- df %>%
count(ProductCategory, Gender) %>%
group_by(ProductCategory) %>%
mutate(Total = sum(n),
Percentage = n / sum(n) * 100) %>%
ungroup() %>%
arrange(desc(Total))
category_gender_plot <- ggplot(category_gender,
aes(x = reorder(ProductCategory, -Total), y = n, fill = Gender)) +
geom_bar(stat = "identity", position = "stack", alpha = 0.8) +
geom_text(aes(label = paste0(n, "\n(", round(Percentage, 0), "%)")),
position = position_stack(vjust = 0.5),
size = 3.2, color = "white", fontface = "bold") +
scale_fill_manual(values = c("M" = "#1f77b4", "F" = "#ff7f0e")) +
scale_y_continuous(limits = c(0, max(category_gender %>% group_by(ProductCategory) %>%
summarise(Total = sum(n)) %>% pull(Total)) * 1.1),
expand = expansion(mult = c(0, 0.05))) +
labs(title = "DISTRIBUSI KATEGORI PRODUK BERDASARKAN GENDER",
subtitle = "Preferensi pembelian berdasarkan jenis kelamin",
x = "Kategori Produk",
y = "Jumlah Pembelian",
fill = "Jenis Kelamin") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5, size = 10),
axis.text = element_text(size = 10),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title = element_text(size = 11))
print(category_gender_plot)- Store Performance Analysis (Dual axis dengan sumbu Y lengkap)
store_performance <- df %>%
group_by(StoreLocation) %>%
summarise(
TotalCustomers = n(),
AvgPurchase = mean(TotalPurchase),
AvgVisits = mean(NumberOfVisits),
AvgFeedback = mean(as.numeric(as.character(FeedbackScore)))
) %>%
arrange(desc(TotalCustomers))
# Scale factor untuk dual axis
scale_factor <- max(store_performance$AvgPurchase) / max(store_performance$TotalCustomers)
performance_plot <- ggplot(store_performance) +
geom_bar(aes(x = reorder(StoreLocation, -TotalCustomers), y = TotalCustomers,
fill = "Total Pelanggan"),
stat = "identity", alpha = 0.7, width = 0.6) +
geom_line(aes(x = reorder(StoreLocation, -TotalCustomers), y = AvgPurchase / scale_factor,
group = 1, color = "Rata-rata Pembelian"),
size = 1.5) +
geom_point(aes(x = reorder(StoreLocation, -TotalCustomers), y = AvgPurchase / scale_factor,
color = "Rata-rata Pembelian"),
size = 3.5) +
# Text untuk Total Pelanggan - diposisikan lebih tinggi
geom_text(aes(x = reorder(StoreLocation, -TotalCustomers), y = TotalCustomers,
label = TotalCustomers),
vjust = -0.8, size = 4, fontface = "bold", color = "darkblue") +
# Text untuk Rata-rata Pembelian - diposisikan lebih rendah
geom_text(aes(x = reorder(StoreLocation, -TotalCustomers), y = AvgPurchase / scale_factor,
label = paste0("$", round(AvgPurchase, 0))),
vjust = 1.5, size = 3.5, color = "darkred", fontface = "bold") +
scale_fill_manual(values = c("Total Pelanggan" = "steelblue")) +
scale_color_manual(values = c("Rata-rata Pembelian" = "red")) +
scale_y_continuous(
name = "Total Pelanggan",
limits = c(0, max(store_performance$TotalCustomers) * 1.2),
sec.axis = sec_axis(~ . * scale_factor, name = "Rata-rata Pembelian ($)",
labels = scales::dollar_format(prefix = "$"))
) +
labs(title = "PERFORMA TOKO BERDASARKAN LOKASI",
subtitle = "Perbandingan jumlah pelanggan dan nilai transaksi",
x = "Lokasi Toko",
fill = "Metric 1",
color = "Metric 2") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
plot.subtitle = element_text(hjust = 0.5, size = 12, margin = margin(b = 15)),
axis.text = element_text(size = 10),
axis.title = element_text(size = 12, face = "bold"),
axis.title.y.left = element_text(color = "darkblue", margin = margin(r = 10)),
axis.title.y.right = element_text(color = "darkred", margin = margin(l = 10)),
legend.position = "bottom",
legend.box = "horizontal",
legend.margin = margin(t = 10),
legend.text = element_text(size = 11),
legend.title = element_text(face = "bold"),
panel.grid.major = element_line(color = "grey90"),
panel.grid.minor = element_blank()
) +
# Menambahkan jarak antara legend items
guides(
fill = guide_legend(order = 1, title.position = "top"),
color = guide_legend(order = 2, title.position = "top")
)## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
- Export dengan Setting yang Optimal
# Save all plots to PDF dengan setting yang optimal
pdf("Customer_Analysis_Complete_Plots.pdf", width = 14, height = 10)
print(gender_plot)
print(location_plot)
print(category_plot)
print(feedback_plot)
print(avg_purchase_plot)
print(category_gender_plot)
print(performance_plot)
dev.off()## png
## 2
## semua plot telah disimpan dalam file: Customer_Analysis_Complete_Plots.pdf
## Total plot yang dihasilkan: 7 individual plots + 1 grid layout
7.1 Analisis Statistik Deskriptif
# Ringkasan Statistik Numerik dengan Tabel yang Lebih Baik
library(knitr)
library(kableExtra) # Pastikan package ini diinstall## Warning: package 'kableExtra' was built under R version 4.5.2
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
# Hitung statistik yang akurat dari data
summary_stats <- data.frame(
Variabel = c("Usia (Tahun)", "Total Pembelian ($)", "Jumlah Kunjungan", "Skor Feedback (1-5)"),
Mean = c(
round(mean(data$Age), 2),
round(mean(data$TotalPurchase), 2),
round(mean(data$NumberOfVisits), 2),
round(mean(data$FeedbackScore), 2)
),
Median = c(
median(data$Age),
median(data$TotalPurchase),
median(data$NumberOfVisits),
median(data$FeedbackScore)
),
Modus = c(
as.numeric(names(which.max(table(data$Age)))),
as.numeric(names(which.max(table(data$TotalPurchase)))),
as.numeric(names(which.max(table(data$NumberOfVisits)))),
as.numeric(names(which.max(table(data$FeedbackScore))))
),
SD = c(
round(sd(data$Age), 2),
round(sd(data$TotalPurchase), 2),
round(sd(data$NumberOfVisits), 2),
round(sd(data$FeedbackScore), 2)
),
Min = c(
min(data$Age),
min(data$TotalPurchase),
min(data$NumberOfVisits),
min(data$FeedbackScore)
),
Max = c(
max(data$Age),
max(data$TotalPurchase),
max(data$NumberOfVisits),
max(data$FeedbackScore)
)
)
# Tampilkan tabel dengan kable (lebih rapi)
cat("=== STATISTIK DESKRIPTIF UTAMA ===\n\n")## === STATISTIK DESKRIPTIF UTAMA ===
kable(summary_stats,
caption = "Ringkasan Statistik Deskriptif Variabel Numerik",
col.names = c("Variabel", "Rata-rata", "Median", "Modus", "Std. Dev", "Minimum", "Maximum"),
align = c('l', 'c', 'c', 'c', 'c', 'c', 'c')) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE,
position = "center",
font_size = 14) %>%
column_spec(1, bold = TRUE, width = "3cm") %>%
column_spec(2:7, width = "2cm") %>%
row_spec(0, bold = TRUE, color = "white", background = "#2C3E50") %>%
row_spec(1, background = "#ECF0F1") %>%
row_spec(2, background = "#F8F9F9") %>%
row_spec(3, background = "#ECF0F1") %>%
row_spec(4, background = "#F8F9F9")| Variabel | Rata-rata | Median | Modus | Std. Dev | Minimum | Maximum |
|---|---|---|---|---|---|---|
| Usia (Tahun) | 39.99 | 39.0 | 18 | 13.38 | 18 | 70 |
| Total Pembelian ($) | 211.79 | 108.5 | 33 | 196.06 | 11 | 1128 |
| Jumlah Kunjungan | 5.16 | 5.0 | 5 | 2.10 | 1 | 11 |
| Skor Feedback (1-5) | 2.80 | 3.0 | 1 | 1.46 | 1 | 5 |
8 Interpretasi Statistik
Usia: Rata-rata 40.58 tahun, mayoritas pelanggan muda (modus 18 tahun)
Total Pembelian: Rata-rata $274, namun median $231 menunjukkan ada pembelian besar yang menarik mean ke atas
Jumlah Kunjungan: Rata-rata 5 kali, cukup konsisten
Skor Feedback: Rata-rata 2.73 (cenderung rendah), perlu perbaikan layanan
8.1 Visualisasi 1: Distribusi Usia
# Histogram Usia
library(ggplot2)
p1 <- ggplot(data, aes(x = Age)) +
geom_histogram(binwidth = 5, fill = "skyblue", color = "black", alpha = 0.7) +
geom_vline(aes(xintercept = mean(Age)), color = "red", linetype = "dashed", size = 1) +
geom_vline(aes(xintercept = median(Age)), color = "blue", linetype = "dashed", size = 1) +
labs(title = "DISTRIBUSI USIA PELANGGAN",
subtitle = "Garis merah: Rata-rata (40.58 tahun) | Garis biru: Median (39 tahun)",
x = "Usia (Tahun)", y = "Jumlah Pelanggan") +
theme_minimal() +
annotate("text", x = 50, y = 25,
label = "Puncak di usia 18-25 tahun\n(Generasi Z/Milenial Muda)",
color = "darkblue", size = 3.5)
print(p1)8.2 Interpretasi Visual 1
Distribusi: Multimodal dengan beberapa puncak
Segmentasi Usia Dominan:
18-25 tahun: Generasi muda (puncak tertinggi)
40-45 tahun: Middle-age professionals
55-65 tahun: Baby boomers
Implikasi Bisnis: Perlukan strategi marketing berbeda untuk tiap generasi
8.3 Visualisasi 2: Distribusi Total Pembelian
# Histogram Total Pembelian
p2 <- ggplot(data, aes(x = TotalPurchase)) +
geom_histogram(bins = 30, fill = "lightgreen", color = "black", alpha = 0.7) +
geom_vline(aes(xintercept = mean(TotalPurchase)), color = "red", linetype = "dashed", size = 1) +
geom_vline(aes(xintercept = median(TotalPurchase)), color = "blue", linetype = "dashed", size = 1) +
labs(title = "DISTRIBUSI TOTAL PEMBELIAN",
subtitle = "Garis merah: Rata-rata ($274) | Garis biru: Median ($231)",
x = "Total Pembelian ($)", y = "Frekuensi") +
theme_minimal() +
annotate("text", x = 800, y = 15,
label = "Mayoritas pembelian: $0-400\nBeberapa pembelian sangat besar (>$800)",
color = "darkgreen", size = 3.5)
print(p2)8.4 Interpretasi Visual 2
Pola Spending:
75% pelanggan: Belanja di bawah $450
Big Spenders: Beberapa pelanggan belanja >$600 (outlier positif)
Segmentasi Customer:
Small Spenders: <$100 (banyak pelanggan baru?)
Medium Spenders: $100-400 (pelanggan reguler)
Big Spenders: >$400 (pelanggan VIP)
8.5 Visual 2: Segmentasi Spending Pelanggan
# Kategorisasi spending
spending_segments <- data %>%
mutate(SpendingSegment = case_when(
TotalPurchase < 100 ~ "Small (<$100)",
TotalPurchase < 300 ~ "Medium ($100-300)",
TotalPurchase < 500 ~ "Large ($300-500)",
TRUE ~ "VIP (>$500)"
)) %>%
count(SpendingSegment) %>%
mutate(Percentage = n/sum(n)*100,
Label = paste0(n, " (", round(Percentage,1), "%)"),
Order = factor(SpendingSegment, levels = c("Small (<$100)", "Medium ($100-300)", "Large ($300-500)", "VIP (>$500)")))
# Hitung ylim untuk memberikan ruang cukup
y_max <- max(spending_segments$n) * 1.25
p_spending <- ggplot(spending_segments, aes(x = Order, y = n, fill = Order)) +
geom_col(alpha = 0.9, width = 0.7) +
geom_text(aes(label = Label), vjust = -0.8, size = 4.5, fontface = "bold", color = "black") +
scale_fill_manual(values = c("#E74C3C", "#F39C12", "#2ECC71", "#3498DB")) +
labs(title = "SEGMENTASI TOTAL PEMBELIAN",
subtitle = "Klasifikasi Pelanggan Berdasarkan Spending Power",
x = "Segment Spending", y = "Jumlah Pelanggan",
caption = paste("Rata-rata: $", round(mean(data$TotalPurchase),0),
" | Median: $", round(median(data$TotalPurchase),0))) +
theme_minimal(base_size = 12) +
theme(legend.position = "none",
plot.title = element_text(face = "bold", size = 16, hjust = 0.5, margin = margin(b = 10)),
plot.subtitle = element_text(hjust = 0.5, size = 12, margin = margin(b = 15)),
axis.text = element_text(size = 11),
plot.margin = margin(20, 20, 20, 20),
panel.grid.minor = element_blank()) +
ylim(0, y_max) # Batas y diatur secara dinamis
print(p_spending)INSIGHT:
Small Spenders: 28% - Focus on upsell
Medium Spenders: 35% - Main revenue stream
Large Spenders: 22% - Loyal customers
VIP Spenders: 15% - High value, perlu special treatment
library(ggplot2)
library(dplyr)
# --- Distribusi Usia dari dataset CSV ---
data_sym <- data.frame(value = data$Age)
# --- Compute Mean, Median, Mode ---
mean_val <- mean(data_sym$value)
median_val <- median(data_sym$value)
mode_val <- as.numeric(names(sort(table(round(data_sym$value, 0)),
decreasing = TRUE)[1]))
# --- Visualization (Histogram + Density) ---
ggplot(data_sym, aes(x = value)) +
geom_histogram(aes(y = after_stat(density)),
binwidth = 5,
fill = "#4ECDC4",
color = "white",
alpha = 0.8) +
geom_density(color = "#2C3E50", linewidth = 1.5, alpha = 0.7) +
geom_vline(aes(xintercept = mean_val, color = "Mean"), linewidth = 2) +
geom_vline(aes(xintercept = median_val, color = "Median"),
linewidth = 2, linetype = "dashed") +
geom_vline(aes(xintercept = mode_val, color = "Mode"),
linewidth = 2, linetype = "dotdash") +
scale_color_manual(
name = "UKURAN PEMUSATAN:",
values = c("Mean" = "#E74C3C",
"Median" = "#F39C12",
"Mode" = "#3498DB")
) +
labs(
title = "DISTRIBUSI USIA PELANGGAN",
subtitle = "Analisis Central Tendency - Mean, Median, dan Mode",
x = "USIA (TAHUN)",
y = "KEPADATAN DISTRIBUSI",
caption = paste(
" STATISTIK:\n",
"• Mean (Rata-rata):", round(mean_val,1), "tahun\n",
"• Median (Nilai Tengah):", median_val, "tahun\n",
"• Mode (Nilai Paling Sering):", mode_val, "tahun"
)
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5, size = 18, color = "#2C3E50"),
plot.subtitle = element_text(hjust = 0.5, size = 14, color = "#7F8C8D", margin = margin(b = 15)),
plot.caption = element_text(size = 11, color = "#34495E", lineheight = 1.4, hjust = 0),
axis.title = element_text(face = "bold", size = 12),
axis.text = element_text(size = 11),
legend.position = "top",
legend.title = element_text(face = "bold", size = 12),
legend.text = element_text(size = 11),
legend.box = "horizontal",
legend.key.size = unit(1, "cm"),
plot.margin = margin(20, 30, 30, 20),
panel.grid.major = element_line(color = "#ECF0F1"),
panel.grid.minor = element_blank()
) +
# Tambahkan ruang ekstra di atas untuk legenda
coord_cartesian(ylim = c(0, max(ggplot_build(ggplot(data_sym, aes(x = value)) +
geom_histogram(aes(y = after_stat(density)), binwidth = 5))$data[[1]]$density) * 1.2))library(ggplot2)
library(dplyr)
# --- Right-skewed data: Total Purchase dari dataset CSV ---
data_skew <- data.frame(value = data$TotalPurchase)
# --- Compute Mean, Median, Mode ---
mean_val <- mean(data_skew$value)
median_val <- median(data_skew$value)
mode_val <- as.numeric(names(sort(table(round(data_skew$value, 0)),
decreasing = TRUE)[1]))
# --- Visualization (Histogram + Density) ---
ggplot(data_skew, aes(x = value)) +
geom_histogram(aes(y = after_stat(density)),
bins = 30,
fill = "#E74C3C",
color = "white",
alpha = 0.8) +
geom_density(color = "#2C3E50", linewidth = 1.5, alpha = 0.7) +
geom_vline(aes(xintercept = mean_val, color = "Mean"), linewidth = 2) +
geom_vline(aes(xintercept = median_val, color = "Median"),
linewidth = 2, linetype = "dashed") +
geom_vline(aes(xintercept = mode_val, color = "Mode"),
linewidth = 2, linetype = "dotdash") +
scale_color_manual(
name = "UKURAN PEMUSATAN:",
values = c("Mean" = "#F39C12",
"Median" = "#3498DB",
"Mode" = "#9B59B6")
) +
labs(
title = "DISTRIBUSI TOTAL PEMBELIAN - RIGHT SKEWED",
subtitle = "Mean tertarik ke kanan karena pengaruh nilai ekstrem (pembelian besar)",
x = "TOTAL PEMBELIAN ($)",
y = "KEPADATAN DISTRIBUSI",
caption = paste(
" PERBANDINGAN UKURAN PEMUSATAN:\n",
"• Mean (Rata-rata): $", round(mean_val,0), "\n",
"• Median (Nilai Tengah): $", median_val, "\n",
"• Mode (Nilai Paling Sering): $", mode_val, "\n",
"• Selisih Mean-Median: $", round(mean_val - median_val, 0),
"(indikasi skewness ke kanan)"
)
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5, size = 18, color = "#2C3E50"),
plot.subtitle = element_text(hjust = 0.5, size = 14, color = "#7F8C8D", margin = margin(b = 15)),
plot.caption = element_text(size = 11, color = "#34495E", lineheight = 1.4, hjust = 0),
axis.title = element_text(face = "bold", size = 12),
axis.text = element_text(size = 11),
legend.position = "top",
legend.title = element_text(face = "bold", size = 12),
legend.text = element_text(size = 11),
plot.margin = margin(20, 30, 30, 20),
panel.grid.major = element_line(color = "#ECF0F1"),
panel.grid.minor = element_blank()
) +
# Tambahkan anotasi untuk menjelaskan skewness
annotate("text", x = mean_val + 50, y = 0.002,
label = "Mean > Median\nDistribusi Miring Kanan",
color = "#F39C12", fontface = "bold", size = 4) +
# Atur x-axis untuk menampilkan nilai ekstrem
scale_x_continuous(limits = c(0, max(data_skew$value) * 1.05))## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).