1. Đọc dữ liệu

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(DT)
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
d <- read.csv(file = 'C:/Users/Admin/Downloads/Supermarket Transactions.csv', header = T)

datatable(head(d), options = list(scrollX = TRUE))

2. Biến định tính


2.1. Biến Gender

tanso.gender <- table(d$Gender)
phantram.gender <- prop.table(tanso.gender) * 100
 
d %>% group_by(Gender) %>% summarise(n = n()) %>%
  mutate(percentage = n / sum(n) * 100) %>%
  ggplot(aes(x = '', y = n,fill = Gender)) +
    geom_col(color = 'black') +
    coord_polar('y') +
    geom_text(aes(x = 1.3, label = paste0(round(percentage, 1), "%")),position = position_stack(vjust = .5)) +
    theme_void() +
 labs(title = 'Hình 1: Biểu đồ tròn thể hiện số lượng khách theo giới tính', x = ' ', y = ' ')

Kết quả cho thấy có khách hàng nữ chiếm 51% và khách hàng nam chiếm 49%. Cửa hàng nhận được sự ủng hộ nhiều hơn từ khách hàng nữ, nhưng sự khác biệt giữa hai giới tính là không lớn, chỉ khoảng 2%.


2.2. Biến Marital Status

tanso.maritalstatus <- table(d$MaritalStatus)

phantram.maritalstatus <- prop.table(tanso.maritalstatus) * 100
ggplot(d, aes(x = MaritalStatus)) +
  geom_bar(fill = "steelblue") +
  geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.5) +
  labs(
    title = "Hình 2: Tần số phân bố theo tình trạng hôn nhân",
    x = "Tình trạng hôn nhân",
    y = "Số lượng"
  ) +
  theme_minimal()

Kết quả cho thấy tình trạng kết hôn không quá chênh lệch nhiều khi nhóm độc thân chiếm tỷ lệ nhỉnh hơn với 7193 người (51.2%), trong khi nhóm đã kết hôn là 6866 người (48.8%). Việc nhóm độc thân chiếm tỷ lệ cao hơn một chút có thể gợi ý rằng đối tượng khách hàng chưa lập gia đình đang chiếm ưu thế nhẹ trong tệp khách hàng hiện tại.


2.3. Biến Homeowner

tanso.homeowner <- table(d$Homeowner)

phantram.homeowner <- prop.table(tanso.homeowner) * 100
ggplot(d, aes(x = Homeowner)) +
  geom_bar(fill = "lightgreen") +
  geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.5) +
  labs(
    title = "Hình 3: Tần số phân bố theo tình trạng sở hữu nhà riêng",
    x = "Tình trạng sở hữu nhà riêng",
    y = "Số lượng"
  ) +
  theme_minimal()

Kết quả cho thấy rằng có 60,1% khách hàng sở hữu nhà riêng, trong khi 39,9% còn lại không sở hữu nhà.

LS0tDQp0aXRsZTogIlRI4buwQyBIw4BOSCINCmF1dGhvcjogIk5ndXnhu4VuIFBo4bqhbSBUaMO6eSBBbiINCmRhdGU6ICIyMDI1LTA1LTE5Ig0Kb3V0cHV0Og0KICBodG1sX2RvY3VtZW50Og0KICAgIHRvYzogdHJ1ZQ0KICAgIG51bWJlciBzZWN0aW9uOiB0cnVlDQogICAgdG9jX2Zsb2F0OiB0cnVlDQogICAgY29kZV9mb2xkaW5nOiBoaWRlDQogICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KLS0tDQoNCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQ0KYGBgDQoNCjxkaXYgc3R5bGU9InRleHQtYWxpZ246IGp1c3RpZnk7Ij4gDQoNCiMgKioxLiDEkOG7jWMgZOG7ryBsaeG7h3UqKg0KDQpgYGB7ciB3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KERUKQ0KbGlicmFyeShzY2FsZXMpDQoNCmQgPC0gcmVhZC5jc3YoZmlsZSA9ICdDOi9Vc2Vycy9BZG1pbi9Eb3dubG9hZHMvU3VwZXJtYXJrZXQgVHJhbnNhY3Rpb25zLmNzdicsIGhlYWRlciA9IFQpDQoNCmRhdGF0YWJsZShoZWFkKGQpLCBvcHRpb25zID0gbGlzdChzY3JvbGxYID0gVFJVRSkpDQpgYGANCg0KIyAqKjIuIEJp4bq/biDEkeG7i25oIHTDrW5oKioNCl9fXw0KIyMgKioyLjEuIEJp4bq/biBHZW5kZXIqKg0KDQpgYGB7cn0NCnRhbnNvLmdlbmRlciA8LSB0YWJsZShkJEdlbmRlcikNCnBoYW50cmFtLmdlbmRlciA8LSBwcm9wLnRhYmxlKHRhbnNvLmdlbmRlcikgKiAxMDANCiANCmQgJT4lIGdyb3VwX2J5KEdlbmRlcikgJT4lIHN1bW1hcmlzZShuID0gbigpKSAlPiUNCiAgbXV0YXRlKHBlcmNlbnRhZ2UgPSBuIC8gc3VtKG4pICogMTAwKSAlPiUNCiAgZ2dwbG90KGFlcyh4ID0gJycsIHkgPSBuLGZpbGwgPSBHZW5kZXIpKSArDQogICAgZ2VvbV9jb2woY29sb3IgPSAnYmxhY2snKSArDQogICAgY29vcmRfcG9sYXIoJ3knKSArDQogICAgZ2VvbV90ZXh0KGFlcyh4ID0gMS4zLCBsYWJlbCA9IHBhc3RlMChyb3VuZChwZXJjZW50YWdlLCAxKSwgIiUiKSkscG9zaXRpb24gPSBwb3NpdGlvbl9zdGFjayh2anVzdCA9IC41KSkgKw0KICAgIHRoZW1lX3ZvaWQoKSArDQogbGFicyh0aXRsZSA9ICdIw6xuaCAxOiBCaeG7g3UgxJHhu5MgdHLDsm4gdGjhu4MgaGnhu4duIHPhu5EgbMaw4bujbmcga2jDoWNoIHRoZW8gZ2nhu5tpIHTDrW5oJywgeCA9ICcgJywgeSA9ICcgJykNCmBgYA0KDQpL4bq/dCBxdeG6oyBjaG8gdGjhuqV5IGPDsyBraMOhY2ggaMOgbmcgbuG7ryBjaGnhur9tIDUxJSB2w6Aga2jDoWNoIGjDoG5nIG5hbSBjaGnhur9tIDQ5JS4gQ+G7rWEgaMOgbmcgbmjhuq1uIMSRxrDhu6NjIHPhu7Eg4bunbmcgaOG7mSBuaGnhu4F1IGjGoW4gdOG7qyBraMOhY2ggaMOgbmcgbuG7rywgbmjGsG5nIHPhu7Ega2jDoWMgYmnhu4d0IGdp4buvYSBoYWkgZ2nhu5tpIHTDrW5oIGzDoCBraMO0bmcgbOG7m24sIGNo4buJIGtob+G6o25nIDIlLg0KDQpfX18NCiMjICoqMi4yLiBCaeG6v24gTWFyaXRhbCBTdGF0dXMqKg0KDQpgYGB7cn0NCnRhbnNvLm1hcml0YWxzdGF0dXMgPC0gdGFibGUoZCRNYXJpdGFsU3RhdHVzKQ0KDQpwaGFudHJhbS5tYXJpdGFsc3RhdHVzIDwtIHByb3AudGFibGUodGFuc28ubWFyaXRhbHN0YXR1cykgKiAxMDANCmBgYA0KDQpgYGB7cn0NCmdncGxvdChkLCBhZXMoeCA9IE1hcml0YWxTdGF0dXMpKSArDQogIGdlb21fYmFyKGZpbGwgPSAic3RlZWxibHVlIikgKw0KICBnZW9tX3RleHQoc3RhdCA9ICJjb3VudCIsIGFlcyhsYWJlbCA9IGFmdGVyX3N0YXQoY291bnQpKSwgdmp1c3QgPSAtMC41KSArDQogIGxhYnMoDQogICAgdGl0bGUgPSAiSMOsbmggMjogVOG6p24gc+G7kSBwaMOibiBi4buRIHRoZW8gdMOsbmggdHLhuqFuZyBow7RuIG5ow6JuIiwNCiAgICB4ID0gIlTDrG5oIHRy4bqhbmcgaMO0biBuaMOibiIsDQogICAgeSA9ICJT4buRIGzGsOG7o25nIg0KICApICsNCiAgdGhlbWVfbWluaW1hbCgpDQpgYGANCg0KS+G6v3QgcXXhuqMgY2hvIHRo4bqleSB0w6xuaCB0cuG6oW5nIGvhur90IGjDtG4ga2jDtG5nIHF1w6EgY2jDqm5oIGzhu4djaCBuaGnhu4F1IGtoaSBuaMOzbSDEkeG7mWMgdGjDom4gY2hp4bq/bSB04bu3IGzhu4cgbmjhu4luaCBoxqFuIHbhu5tpIDcxOTMgbmfGsOG7nWkgKDUxLjIlKSwgdHJvbmcga2hpIG5ow7NtIMSRw6Mga+G6v3QgaMO0biBsw6AgNjg2NiBuZ8aw4budaSAoNDguOCUpLiBWaeG7h2MgbmjDs20gxJHhu5ljIHRow6JuIGNoaeG6v20gdOG7tyBs4buHIGNhbyBoxqFuIG3hu5l0IGNow7p0IGPDsyB0aOG7gyBn4bujaSDDvSBy4bqxbmcgxJHhu5FpIHTGsOG7o25nIGtow6FjaCBow6BuZyBjaMawYSBs4bqtcCBnaWEgxJHDrG5oIMSRYW5nIGNoaeG6v20gxrB1IHRo4bq/IG5o4bq5IHRyb25nIHThu4dwIGtow6FjaCBow6BuZyBoaeG7h24gdOG6oWkuDQoNCl9fXw0KIyMgKioyLjMuIEJp4bq/biBIb21lb3duZXIqKg0KDQpgYGB7cn0NCnRhbnNvLmhvbWVvd25lciA8LSB0YWJsZShkJEhvbWVvd25lcikNCg0KcGhhbnRyYW0uaG9tZW93bmVyIDwtIHByb3AudGFibGUodGFuc28uaG9tZW93bmVyKSAqIDEwMA0KYGBgDQoNCmBgYHtyfQ0KZ2dwbG90KGQsIGFlcyh4ID0gSG9tZW93bmVyKSkgKw0KICBnZW9tX2JhcihmaWxsID0gImxpZ2h0Z3JlZW4iKSArDQogIGdlb21fdGV4dChzdGF0ID0gImNvdW50IiwgYWVzKGxhYmVsID0gYWZ0ZXJfc3RhdChjb3VudCkpLCB2anVzdCA9IC0wLjUpICsNCiAgbGFicygNCiAgICB0aXRsZSA9ICJIw6xuaCAzOiBU4bqnbiBz4buRIHBow6JuIGLhu5EgdGhlbyB0w6xuaCB0cuG6oW5nIHPhu58gaOG7r3UgbmjDoCByacOqbmciLA0KICAgIHggPSAiVMOsbmggdHLhuqFuZyBz4bufIGjhu691IG5ow6AgcmnDqm5nIiwNCiAgICB5ID0gIlPhu5EgbMaw4bujbmciDQogICkgKw0KICB0aGVtZV9taW5pbWFsKCkNCmBgYA0KDQpL4bq/dCBxdeG6oyBjaG8gdGjhuqV5IHLhurFuZyBjw7MgNjAsMSUga2jDoWNoIGjDoG5nIHPhu58gaOG7r3UgbmjDoCByacOqbmcsIHRyb25nIGtoaSAzOSw5JSBjw7JuIGzhuqFpIGtow7RuZyBz4bufIGjhu691IG5ow6AuDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0K