data2 <- read.csv("khao_sat_ho_gia_dinh.csv")
dim(data2)
## [1] 100   8
names(data2)
## [1] "ho_id"     "khu_vuc"   "gioi_tinh" "tuoi"      "hoc_van"   "thu_nhap" 
## [7] "chi_tieu"  "tiet_kiem"
str(data2)
## 'data.frame':    100 obs. of  8 variables:
##  $ ho_id    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ khu_vuc  : chr  "Nong_thon" "Thanh_thi" "Nong_thon" "Thanh_thi" ...
##  $ gioi_tinh: chr  "Nu" "Nam" "Nu" "Nam" ...
##  $ tuoi     : int  31 32 33 34 35 36 37 38 39 40 ...
##  $ hoc_van  : chr  "THCS" "THCS" "THPT" "Dai_hoc" ...
##  $ thu_nhap : int  7150000 7300000 7450000 7600000 7750000 7900000 8050000 8200000 8350000 8500000 ...
##  $ chi_tieu : int  5100000 5200000 5300000 5400000 5500000 5600000 5700000 5800000 5900000 6000000 ...
##  $ tiet_kiem: int  2050000 2100000 2150000 2200000 2250000 2300000 2350000 2400000 2450000 2500000 ...
summary(data2)
##      ho_id          khu_vuc           gioi_tinh              tuoi   
##  Min.   :  1.00   Length:100         Length:100         Min.   :30  
##  1st Qu.: 25.75   Class :character   Class :character   1st Qu.:36  
##  Median : 50.50   Mode  :character   Mode  :character   Median :42  
##  Mean   : 50.50                                         Mean   :42  
##  3rd Qu.: 75.25                                         3rd Qu.:48  
##  Max.   :100.00                                         Max.   :54  
##    hoc_van             thu_nhap           chi_tieu          tiet_kiem      
##  Length:100         Min.   : 7150000   Min.   : 5100000   Min.   :2050000  
##  Class :character   1st Qu.:10862500   1st Qu.: 7575000   1st Qu.:3287500  
##  Mode  :character   Median :14575000   Median :10050000   Median :4525000  
##                     Mean   :14575000   Mean   :10050000   Mean   :4525000  
##                     3rd Qu.:18287500   3rd Qu.:12525000   3rd Qu.:5762500  
##                     Max.   :22000000   Max.   :15000000   Max.   :7000000
thu_nhap_tb  <- mean(data2$thu_nhap)
chi_tieu_tb  <- mean(data2$chi_tieu)
tiet_kiem_tb <- mean(data2$tiet_kiem)

kable(
  data.frame(
    Chi_tieu = c("Thu nhập trung bình", "Chi tiêu trung bình", "Tiết kiệm trung bình"),
    Gia_tri = c(thu_nhap_tb, chi_tieu_tb, tiet_kiem_tb)
  )
)
Chi_tieu Gia_tri
Thu nhập trung bình 14575000
Chi tiêu trung bình 10050000
Tiết kiệm trung bình 4525000
table1(~ thu_nhap + chi_tieu + tiet_kiem | khu_vuc, data = data2)
Nong_thon
(N=50)
Thanh_thi
(N=50)
Overall
(N=100)
thu_nhap
Mean (SD) 14500000 (4370000) 14700000 (4370000) 14600000 (4350000)
Median [Min, Max] 14500000 [7150000, 21900000] 14700000 [7300000, 22000000] 14600000 [7150000, 22000000]
chi_tieu
Mean (SD) 10000000 (2920000) 10100000 (2920000) 10100000 (2900000)
Median [Min, Max] 10000000 [5100000, 14900000] 10100000 [5200000, 15000000] 10100000 [5100000, 15000000]
tiet_kiem
Mean (SD) 4500000 (1460000) 4550000 (1460000) 4530000 (1450000)
Median [Min, Max] 4500000 [2050000, 6950000] 4550000 [2100000, 7000000] 4530000 [2050000, 7000000]
table1(~ thu_nhap + chi_tieu + tiet_kiem | hoc_van, data = data2)
Dai_hoc
(N=25)
THCS
(N=50)
THPT
(N=25)
Overall
(N=100)
thu_nhap
Mean (SD) 14800000 (4420000) 14500000 (4370000) 14500000 (4410000) 14600000 (4350000)
Median [Min, Max] 14800000 [7600000, 22000000] 14400000 [7150000, 21700000] 14700000 [7450000, 21900000] 14600000 [7150000, 22000000]
chi_tieu
Mean (SD) 10200000 (2940000) 10000000 (2920000) 10000000 (2940000) 10100000 (2900000)
Median [Min, Max] 10200000 [5400000, 15000000] 9950000 [5100000, 14800000] 10100000 [5300000, 14900000] 10100000 [5100000, 15000000]
tiet_kiem
Mean (SD) 4600000 (1470000) 4500000 (1460000) 4500000 (1470000) 4530000 (1450000)
Median [Min, Max] 4600000 [2200000, 7000000] 4480000 [2050000, 6900000] 4550000 [2150000, 6950000] 4530000 [2050000, 7000000]
tong_hop_khu_vuc <- data2 %>%
  group_by(khu_vuc) %>%
  summarise(
    thu_nhap_tb = mean(thu_nhap),
    chi_tieu_tb = mean(chi_tieu),
    tiet_kiem_tb = mean(tiet_kiem),
    .groups = "drop"
  )

tong_hop_hoc_van <- data2 %>%
  group_by(hoc_van) %>%
  summarise(
    thu_nhap_tb = mean(thu_nhap),
    chi_tieu_tb = mean(chi_tieu),
    tiet_kiem_tb = mean(tiet_kiem),
    .groups = "drop"
  )

kable(tong_hop_khu_vuc)
khu_vuc thu_nhap_tb chi_tieu_tb tiet_kiem_tb
Nong_thon 14500000 10000000 4500000
Thanh_thi 14650000 10100000 4550000
kable(tong_hop_hoc_van)
hoc_van thu_nhap_tb chi_tieu_tb tiet_kiem_tb
Dai_hoc 14800000 10200000 4600000
THCS 14497000 9998000 4499000
THPT 14506000 10004000 4502000
ggplot(data2, aes(x = thu_nhap)) +
  geom_histogram(binwidth = 1000000, boundary = 0) +
  labs(
    title = "Phân bố thu nhập của hộ gia đình",
    x = "Thu nhập",
    y = "Tần số"
  ) +
  theme_minimal()

ggplot(data2, aes(x = khu_vuc, y = chi_tieu)) +
  geom_boxplot() +
  labs(
    title = "Chi tiêu theo khu vực",
    x = "Khu vực",
    y = "Chi tiêu"
  ) +
  theme_minimal()

ggplot(data2, aes(x = thu_nhap, y = chi_tieu)) +
  geom_point() +
  labs(
    title = "Mối quan hệ giữa thu nhập và chi tiêu",
    x = "Thu nhập",
    y = "Chi tiêu"
  ) +
  theme_minimal()

cor(data2$thu_nhap, data2$chi_tieu)
## [1] 1