data <- read.csv("khao_sat_ho_gia_dinh.csv")
head(data)
## ho_id khu_vuc gioi_tinh tuoi hoc_van thu_nhap chi_tieu tiet_kiem
## 1 1 Nong_thon Nu 31 THCS 7150000 5100000 2050000
## 2 2 Thanh_thi Nam 32 THCS 7300000 5200000 2100000
## 3 3 Nong_thon Nu 33 THPT 7450000 5300000 2150000
## 4 4 Thanh_thi Nam 34 Dai_hoc 7600000 5400000 2200000
## 5 5 Nong_thon Nu 35 THCS 7750000 5500000 2250000
## 6 6 Thanh_thi Nam 36 THPT 7900000 5600000 2300000
dim(data)
## [1] 100 8
names(data)
## [1] "ho_id" "khu_vuc" "gioi_tinh" "tuoi" "hoc_van" "thu_nhap"
## [7] "chi_tieu" "tiet_kiem"
summary(data)
## ho_id khu_vuc gioi_tinh tuoi
## Min. : 1.00 Length:100 Length:100 Min. :30
## 1st Qu.: 25.75 Class :character Class :character 1st Qu.:36
## Median : 50.50 Mode :character Mode :character Median :42
## Mean : 50.50 Mean :42
## 3rd Qu.: 75.25 3rd Qu.:48
## Max. :100.00 Max. :54
## hoc_van thu_nhap chi_tieu tiet_kiem
## Length:100 Min. : 7150000 Min. : 5100000 Min. :2050000
## Class :character 1st Qu.:10862500 1st Qu.: 7575000 1st Qu.:3287500
## Mode :character Median :14575000 Median :10050000 Median :4525000
## Mean :14575000 Mean :10050000 Mean :4525000
## 3rd Qu.:18287500 3rd Qu.:12525000 3rd Qu.:5762500
## Max. :22000000 Max. :15000000 Max. :7000000
mean_thu_nhap <- mean(data$thu_nhap, na.rm = TRUE)
mean_chi_tieu <- mean(data$chi_tieu, na.rm = TRUE)
mean_tiet_kiem <- mean(data$tiet_kiem, na.rm = TRUE)
cat("Trung bình Thu nhập:", mean_thu_nhap, "\n")
## Trung bình Thu nhập: 14575000
cat("Trung bình Chi tiêu:", mean_chi_tieu, "\n")
## Trung bình Chi tiêu: 10050000
cat("Trung bình Tiết kiệm:", mean_tiet_kiem, "\n")
## Trung bình Tiết kiệm: 4525000
str(data)
## 'data.frame': 100 obs. of 8 variables:
## $ ho_id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ khu_vuc : chr "Nong_thon" "Thanh_thi" "Nong_thon" "Thanh_thi" ...
## $ gioi_tinh: chr "Nu" "Nam" "Nu" "Nam" ...
## $ tuoi : int 31 32 33 34 35 36 37 38 39 40 ...
## $ hoc_van : chr "THCS" "THCS" "THPT" "Dai_hoc" ...
## $ thu_nhap : int 7150000 7300000 7450000 7600000 7750000 7900000 8050000 8200000 8350000 8500000 ...
## $ chi_tieu : int 5100000 5200000 5300000 5400000 5500000 5600000 5700000 5800000 5900000 6000000 ...
## $ tiet_kiem: int 2050000 2100000 2150000 2200000 2250000 2300000 2350000 2400000 2450000 2500000 ...
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~ thu_nhap + chi_tieu + tiet_kiem | khu_vuc, data = data)
| Nong_thon (N=50) |
Thanh_thi (N=50) |
Overall (N=100) |
|
|---|---|---|---|
| thu_nhap | |||
| Mean (SD) | 14500000 (4370000) | 14700000 (4370000) | 14600000 (4350000) |
| Median [Min, Max] | 14500000 [7150000, 21900000] | 14700000 [7300000, 22000000] | 14600000 [7150000, 22000000] |
| chi_tieu | |||
| Mean (SD) | 10000000 (2920000) | 10100000 (2920000) | 10100000 (2900000) |
| Median [Min, Max] | 10000000 [5100000, 14900000] | 10100000 [5200000, 15000000] | 10100000 [5100000, 15000000] |
| tiet_kiem | |||
| Mean (SD) | 4500000 (1460000) | 4550000 (1460000) | 4530000 (1450000) |
| Median [Min, Max] | 4500000 [2050000, 6950000] | 4550000 [2100000, 7000000] | 4530000 [2050000, 7000000] |
table1(~ thu_nhap + chi_tieu + tiet_kiem | hoc_van, data = data)
| Dai_hoc (N=25) |
THCS (N=50) |
THPT (N=25) |
Overall (N=100) |
|
|---|---|---|---|---|
| thu_nhap | ||||
| Mean (SD) | 14800000 (4420000) | 14500000 (4370000) | 14500000 (4410000) | 14600000 (4350000) |
| Median [Min, Max] | 14800000 [7600000, 22000000] | 14400000 [7150000, 21700000] | 14700000 [7450000, 21900000] | 14600000 [7150000, 22000000] |
| chi_tieu | ||||
| Mean (SD) | 10200000 (2940000) | 10000000 (2920000) | 10000000 (2940000) | 10100000 (2900000) |
| Median [Min, Max] | 10200000 [5400000, 15000000] | 9950000 [5100000, 14800000] | 10100000 [5300000, 14900000] | 10100000 [5100000, 15000000] |
| tiet_kiem | ||||
| Mean (SD) | 4600000 (1470000) | 4500000 (1460000) | 4500000 (1470000) | 4530000 (1450000) |
| Median [Min, Max] | 4600000 [2200000, 7000000] | 4480000 [2050000, 6900000] | 4550000 [2150000, 6950000] | 4530000 [2050000, 7000000] |
library(ggplot2)
ggplot(data, aes(x = thu_nhap)) +
geom_histogram(fill = "steelblue", color = "white", bins = 30) +
labs(title = "Phân bố Thu nhập của Hộ gia đình", x = "Thu nhập", y = "Số lượng") +
theme_minimal()
ggplot(data, aes(x = khu_vuc, y = chi_tieu, fill = khu_vuc)) +
geom_boxplot() +
labs(title = "So sánh Chi tiêu giữa các Khu vực", x = "Khu vực", y = "Chi tiêu") +
theme_light()
ggplot(data, aes(x = thu_nhap, y = chi_tieu)) +
geom_point(color = "darkred", alpha = 0.6) +
geom_smooth(method = "lm", col = "blue") + # Thêm đường xu hướng
labs(title = "Mối quan hệ giữa Thu nhập và Chi tiêu", x = "Thu nhập", y = "Chi tiêu") +
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'