data2 <- read.csv("/Users/lengoctuongvy/Downloads/TLHK2:2025/T2_PHÂN TÍCH DỮ LIỆU ĐỊNH TÍNH/Book2.csv")
head(data2,10)
X | PurchaseDate | CustomerID | Gender | MaritalStatus | Homeowner | Children | AnnualIncome | City | StateorProvince | Country | ProductFamily | ProductDepartment | ProductCategory | UnitsSold | Revenue |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 12/18/2007 | 7223 | F | S | Y | 2 | $30K - $50K | Los Angeles | CA | USA | Food | Snack Foods | Snack Foods | 5 | 27.38 |
2 | 12/20/2007 | 7841 | M | M | Y | 5 | $70K - $90K | Los Angeles | CA | USA | Food | Produce | Vegetables | 5 | 14.90 |
3 | 12/21/2007 | 8374 | F | M | N | 2 | $50K - $70K | Bremerton | WA | USA | Food | Snack Foods | Snack Foods | 3 | 5.52 |
4 | 12/21/2007 | 9619 | M | M | Y | 3 | $30K - $50K | Portland | OR | USA | Food | Snacks | Candy | 4 | 4.44 |
5 | 12/22/2007 | 1900 | F | S | Y | 3 | $130K - $150K | Beverly Hills | CA | USA | Drink | Beverages | Carbonated Beverages | 4 | 14.00 |
6 | 12/22/2007 | 6696 | F | M | Y | 3 | $10K - $30K | Beverly Hills | CA | USA | Food | Deli | Side Dishes | 3 | 4.37 |
7 | 12/23/2007 | 9673 | M | S | Y | 2 | $30K - $50K | Salem | OR | USA | Food | Frozen Foods | Breakfast Foods | 4 | 13.78 |
8 | 12/25/2007 | 354 | F | M | Y | 2 | $150K + | Yakima | WA | USA | Food | Canned Foods | Canned Soup | 6 | 7.34 |
9 | 12/25/2007 | 1293 | M | M | Y | 3 | $10K - $30K | Bellingham | WA | USA | Non-Consumable | Household | Cleaning Supplies | 1 | 2.41 |
10 | 12/25/2007 | 7938 | M | S | N | 1 | $50K - $70K | San Diego | CA | USA | Non-Consumable | Health and Hygiene | Pain Relievers | 2 | 8.96 |
names(data2)
## [1] "X" "PurchaseDate" "CustomerID"
## [4] "Gender" "MaritalStatus" "Homeowner"
## [7] "Children" "AnnualIncome" "City"
## [10] "StateorProvince" "Country" "ProductFamily"
## [13] "ProductDepartment" "ProductCategory" "UnitsSold"
## [16] "Revenue"
tbdt <- c("Gender", "MaritalStatus", "Homeowner", "AnnualIncome", "City", "StateorProvince", "Country", "ProductFamily", "ProductDepartment", "ProductCategory" )
dt<- data2[,tbdt]
head(dt)
Gender | MaritalStatus | Homeowner | AnnualIncome | City | StateorProvince | Country | ProductFamily | ProductDepartment | ProductCategory |
---|---|---|---|---|---|---|---|---|---|
F | S | Y | $30K - $50K | Los Angeles | CA | USA | Food | Snack Foods | Snack Foods |
M | M | Y | $70K - $90K | Los Angeles | CA | USA | Food | Produce | Vegetables |
F | M | N | $50K - $70K | Bremerton | WA | USA | Food | Snack Foods | Snack Foods |
M | M | Y | $30K - $50K | Portland | OR | USA | Food | Snacks | Candy |
F | S | Y | $130K - $150K | Beverly Hills | CA | USA | Drink | Beverages | Carbonated Beverages |
F | M | Y | $10K - $30K | Beverly Hills | CA | USA | Food | Deli | Side Dishes |
gt <- table(data2$Gender)/sum(nrow(data2))
gt
##
## F M
## 0.5099936 0.4900064
`
Vậy trong bdl này có 50.9993598 nữ và 49.0006402 nam.
# Tạo bảng tần số từ dữ liệu
gender_freq <- table(data2$Gender)
# 🎯 Biểu đồ cột (barplot)
text(barplot(gender_freq, col = c("purple", "green"),
main = "Biểu đồ cột: Gender",
xlab = "Gender", ylab = "Tần số",
ylim = c(0, max(gender_freq) + 1)),
gender_freq, labels = gender_freq, pos = 1)
# 🎯 Biểu đồ tròn có phần trăm (optional)
gender_prop <- prop.table(gender_freq)
labels_percent <- paste0(names(gender_prop), ": ", round(gender_prop * 100, 1), "%")
pie(gender_freq,
col = c("purple", "green"),
main = "Pie chart with percentages",
labels = labels_percent)
EVA -L TRUE LÀ THỰC HIỆN CÂU LỆNH, FALSE LÀ KHÔNG THỰC HIỆN CÂU LỆNH. ECHO là xuất hiện câu lệnh.
thanhpho <- table(data2$Country)/sum(nrow(data2))
thanhpho
##
## Canada Mexico USA
## 0.05754321 0.26232307 0.68013372
Vậy trong bdl này có 2.7242336 Mexico, 1.017142 Canada và 5.7685468 USA.
# Tạo bảng tần số từ dữ liệu
f2<- table(data2$Country)
# 🎯 Biểu đồ cột (barplot)
text(barplot(f2, col = c("purple", "green","orange"),
main = "Biểu đồ cột: Country",
xlab = "Country", ylab = "Tần số",
ylim = c(0, max(f2) + 1)),
f2, labels = f2, pos = 1)
country_prop <- prop.table(f2)
labels_percent <- paste0(names(country_prop), ": ", round(country_prop * 100, 1), "%")
pie(f2,
col = c("purple", "green", "orange"),
main = "Pie chart with percentages",
labels = labels_percent)