data2 <- read.csv("khao_sat_ho_gia_dinh.csv")
dim(data2)
## [1] 100 8
names(data2)
## [1] "ho_id" "khu_vuc" "gioi_tinh" "tuoi" "hoc_van" "thu_nhap"
## [7] "chi_tieu" "tiet_kiem"
str(data2)
## 'data.frame': 100 obs. of 8 variables:
## $ ho_id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ khu_vuc : chr "Nong_thon" "Thanh_thi" "Nong_thon" "Thanh_thi" ...
## $ gioi_tinh: chr "Nu" "Nam" "Nu" "Nam" ...
## $ tuoi : int 31 32 33 34 35 36 37 38 39 40 ...
## $ hoc_van : chr "THCS" "THCS" "THPT" "Dai_hoc" ...
## $ thu_nhap : int 7150000 7300000 7450000 7600000 7750000 7900000 8050000 8200000 8350000 8500000 ...
## $ chi_tieu : int 5100000 5200000 5300000 5400000 5500000 5600000 5700000 5800000 5900000 6000000 ...
## $ tiet_kiem: int 2050000 2100000 2150000 2200000 2250000 2300000 2350000 2400000 2450000 2500000 ...
summary(data2)
## ho_id khu_vuc gioi_tinh tuoi
## Min. : 1.00 Length:100 Length:100 Min. :30
## 1st Qu.: 25.75 Class :character Class :character 1st Qu.:36
## Median : 50.50 Mode :character Mode :character Median :42
## Mean : 50.50 Mean :42
## 3rd Qu.: 75.25 3rd Qu.:48
## Max. :100.00 Max. :54
## hoc_van thu_nhap chi_tieu tiet_kiem
## Length:100 Min. : 7150000 Min. : 5100000 Min. :2050000
## Class :character 1st Qu.:10862500 1st Qu.: 7575000 1st Qu.:3287500
## Mode :character Median :14575000 Median :10050000 Median :4525000
## Mean :14575000 Mean :10050000 Mean :4525000
## 3rd Qu.:18287500 3rd Qu.:12525000 3rd Qu.:5762500
## Max. :22000000 Max. :15000000 Max. :7000000
thu_nhap_tb <- mean(data2$thu_nhap)
chi_tieu_tb <- mean(data2$chi_tieu)
tiet_kiem_tb <- mean(data2$tiet_kiem)
kable(
data.frame(
Chi_tieu = c("Thu nhập trung bình", "Chi tiêu trung bình", "Tiết kiệm trung bình"),
Gia_tri = c(thu_nhap_tb, chi_tieu_tb, tiet_kiem_tb)
)
)
| Chi_tieu | Gia_tri |
|---|---|
| Thu nhập trung bình | 14575000 |
| Chi tiêu trung bình | 10050000 |
| Tiết kiệm trung bình | 4525000 |
table1(~ thu_nhap + chi_tieu + tiet_kiem | khu_vuc, data = data2)
| Nong_thon (N=50) |
Thanh_thi (N=50) |
Overall (N=100) |
|
|---|---|---|---|
| thu_nhap | |||
| Mean (SD) | 14500000 (4370000) | 14700000 (4370000) | 14600000 (4350000) |
| Median [Min, Max] | 14500000 [7150000, 21900000] | 14700000 [7300000, 22000000] | 14600000 [7150000, 22000000] |
| chi_tieu | |||
| Mean (SD) | 10000000 (2920000) | 10100000 (2920000) | 10100000 (2900000) |
| Median [Min, Max] | 10000000 [5100000, 14900000] | 10100000 [5200000, 15000000] | 10100000 [5100000, 15000000] |
| tiet_kiem | |||
| Mean (SD) | 4500000 (1460000) | 4550000 (1460000) | 4530000 (1450000) |
| Median [Min, Max] | 4500000 [2050000, 6950000] | 4550000 [2100000, 7000000] | 4530000 [2050000, 7000000] |
table1(~ thu_nhap + chi_tieu + tiet_kiem | hoc_van, data = data2)
| Dai_hoc (N=25) |
THCS (N=50) |
THPT (N=25) |
Overall (N=100) |
|
|---|---|---|---|---|
| thu_nhap | ||||
| Mean (SD) | 14800000 (4420000) | 14500000 (4370000) | 14500000 (4410000) | 14600000 (4350000) |
| Median [Min, Max] | 14800000 [7600000, 22000000] | 14400000 [7150000, 21700000] | 14700000 [7450000, 21900000] | 14600000 [7150000, 22000000] |
| chi_tieu | ||||
| Mean (SD) | 10200000 (2940000) | 10000000 (2920000) | 10000000 (2940000) | 10100000 (2900000) |
| Median [Min, Max] | 10200000 [5400000, 15000000] | 9950000 [5100000, 14800000] | 10100000 [5300000, 14900000] | 10100000 [5100000, 15000000] |
| tiet_kiem | ||||
| Mean (SD) | 4600000 (1470000) | 4500000 (1460000) | 4500000 (1470000) | 4530000 (1450000) |
| Median [Min, Max] | 4600000 [2200000, 7000000] | 4480000 [2050000, 6900000] | 4550000 [2150000, 6950000] | 4530000 [2050000, 7000000] |
tong_hop_khu_vuc <- data2 %>%
group_by(khu_vuc) %>%
summarise(
thu_nhap_tb = mean(thu_nhap),
chi_tieu_tb = mean(chi_tieu),
tiet_kiem_tb = mean(tiet_kiem),
.groups = "drop"
)
tong_hop_hoc_van <- data2 %>%
group_by(hoc_van) %>%
summarise(
thu_nhap_tb = mean(thu_nhap),
chi_tieu_tb = mean(chi_tieu),
tiet_kiem_tb = mean(tiet_kiem),
.groups = "drop"
)
kable(tong_hop_khu_vuc)
| khu_vuc | thu_nhap_tb | chi_tieu_tb | tiet_kiem_tb |
|---|---|---|---|
| Nong_thon | 14500000 | 10000000 | 4500000 |
| Thanh_thi | 14650000 | 10100000 | 4550000 |
kable(tong_hop_hoc_van)
| hoc_van | thu_nhap_tb | chi_tieu_tb | tiet_kiem_tb |
|---|---|---|---|
| Dai_hoc | 14800000 | 10200000 | 4600000 |
| THCS | 14497000 | 9998000 | 4499000 |
| THPT | 14506000 | 10004000 | 4502000 |
ggplot(data2, aes(x = thu_nhap)) +
geom_histogram(binwidth = 1000000, boundary = 0) +
labs(
title = "Phân bố thu nhập của hộ gia đình",
x = "Thu nhập",
y = "Tần số"
) +
theme_minimal()
ggplot(data2, aes(x = khu_vuc, y = chi_tieu)) +
geom_boxplot() +
labs(
title = "Chi tiêu theo khu vực",
x = "Khu vực",
y = "Chi tiêu"
) +
theme_minimal()
ggplot(data2, aes(x = thu_nhap, y = chi_tieu)) +
geom_point() +
labs(
title = "Mối quan hệ giữa thu nhập và chi tiêu",
x = "Thu nhập",
y = "Chi tiêu"
) +
theme_minimal()
cor(data2$thu_nhap, data2$chi_tieu)
## [1] 1