library(csv)
d <- read.csv("D:/UFM/2025- Kì 2/Phân tích dữ liệu định tính - Trần Mạnh Tường/Supermarket Transactions.csv", header = T)
names(d)
## [1] "X" "PurchaseDate" "CustomerID"
## [4] "Gender" "MaritalStatus" "Homeowner"
## [7] "Children" "AnnualIncome" "City"
## [10] "StateorProvince" "Country" "ProductFamily"
## [13] "ProductDepartment" "ProductCategory" "UnitsSold"
## [16] "Revenue"
tbdt <- c("Gender","MaritalStatus","Homeowner","City","StateorProvince","Country","ProductFamily","ProductDepartment","ProductCategory")
dc <- d[,tbdt]
head(dc)
Gender | MaritalStatus | Homeowner | City | StateorProvince | Country | ProductFamily | ProductDepartment | ProductCategory |
---|---|---|---|---|---|---|---|---|
F | S | Y | Los Angeles | CA | USA | Food | Snack Foods | Snack Foods |
M | M | Y | Los Angeles | CA | USA | Food | Produce | Vegetables |
F | M | N | Bremerton | WA | USA | Food | Snack Foods | Snack Foods |
M | M | Y | Portland | OR | USA | Food | Snacks | Candy |
F | S | Y | Beverly Hills | CA | USA | Drink | Beverages | Carbonated Beverages |
F | M | Y | Beverly Hills | CA | USA | Food | Deli | Side Dishes |
table(d$Gender)
##
## F M
## 7170 6889
table(d$Gender)/sum(nrow(d))
##
## F M
## 0.5099936 0.4900064
Vậy trong bdl này có 50.9993598% nữ và 49.0006402% nam.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
d %>% group_by(Gender) %>% summarise(n = n()) %>%
ggplot(aes(x = Gender, y = n))+
geom_col(fill='lightblue')+
labs(x = "Giới tính", y="tần suất")+
labs(caption = "Biểu đồ tần suất giới tính")+
geom_text(aes(label =n), vjust=2, color = 'black')
d %>% group_by(Gender) %>% summarise(n = n()) %>%
ggplot(aes(x = Gender, y = n))+
geom_col(fill='pink')+
labs(x = "Giới tính", y="tần suất")+
labs(caption = "Biểu đồ tần suất giới tính")+
geom_text(aes(label = table(d$Gender)/sum(nrow(d))), vjust=2, color = 'black')
## Don't know how to automatically pick scale for object of type <table>.
## Defaulting to continuous.