data <- read.csv("khao_sat_ho_gia_dinh.csv")


head(data)
##   ho_id   khu_vuc gioi_tinh tuoi hoc_van thu_nhap chi_tieu tiet_kiem
## 1     1 Nong_thon        Nu   31    THCS  7150000  5100000   2050000
## 2     2 Thanh_thi       Nam   32    THCS  7300000  5200000   2100000
## 3     3 Nong_thon        Nu   33    THPT  7450000  5300000   2150000
## 4     4 Thanh_thi       Nam   34 Dai_hoc  7600000  5400000   2200000
## 5     5 Nong_thon        Nu   35    THCS  7750000  5500000   2250000
## 6     6 Thanh_thi       Nam   36    THPT  7900000  5600000   2300000
dim(data) 
## [1] 100   8
names(data)
## [1] "ho_id"     "khu_vuc"   "gioi_tinh" "tuoi"      "hoc_van"   "thu_nhap" 
## [7] "chi_tieu"  "tiet_kiem"
summary(data)
##      ho_id          khu_vuc           gioi_tinh              tuoi   
##  Min.   :  1.00   Length:100         Length:100         Min.   :30  
##  1st Qu.: 25.75   Class :character   Class :character   1st Qu.:36  
##  Median : 50.50   Mode  :character   Mode  :character   Median :42  
##  Mean   : 50.50                                         Mean   :42  
##  3rd Qu.: 75.25                                         3rd Qu.:48  
##  Max.   :100.00                                         Max.   :54  
##    hoc_van             thu_nhap           chi_tieu          tiet_kiem      
##  Length:100         Min.   : 7150000   Min.   : 5100000   Min.   :2050000  
##  Class :character   1st Qu.:10862500   1st Qu.: 7575000   1st Qu.:3287500  
##  Mode  :character   Median :14575000   Median :10050000   Median :4525000  
##                     Mean   :14575000   Mean   :10050000   Mean   :4525000  
##                     3rd Qu.:18287500   3rd Qu.:12525000   3rd Qu.:5762500  
##                     Max.   :22000000   Max.   :15000000   Max.   :7000000
mean_thu_nhap <- mean(data$thu_nhap, na.rm = TRUE)
mean_chi_tieu <- mean(data$chi_tieu, na.rm = TRUE)
mean_tiet_kiem <- mean(data$tiet_kiem, na.rm = TRUE)

cat("Trung bình Thu nhập:", mean_thu_nhap, "\n")
## Trung bình Thu nhập: 14575000
cat("Trung bình Chi tiêu:", mean_chi_tieu, "\n")
## Trung bình Chi tiêu: 10050000
cat("Trung bình Tiết kiệm:", mean_tiet_kiem, "\n")
## Trung bình Tiết kiệm: 4525000
str(data)
## 'data.frame':    100 obs. of  8 variables:
##  $ ho_id    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ khu_vuc  : chr  "Nong_thon" "Thanh_thi" "Nong_thon" "Thanh_thi" ...
##  $ gioi_tinh: chr  "Nu" "Nam" "Nu" "Nam" ...
##  $ tuoi     : int  31 32 33 34 35 36 37 38 39 40 ...
##  $ hoc_van  : chr  "THCS" "THCS" "THPT" "Dai_hoc" ...
##  $ thu_nhap : int  7150000 7300000 7450000 7600000 7750000 7900000 8050000 8200000 8350000 8500000 ...
##  $ chi_tieu : int  5100000 5200000 5300000 5400000 5500000 5600000 5700000 5800000 5900000 6000000 ...
##  $ tiet_kiem: int  2050000 2100000 2150000 2200000 2250000 2300000 2350000 2400000 2450000 2500000 ...
library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
table1(~ thu_nhap + chi_tieu + tiet_kiem | khu_vuc, data = data)
Nong_thon
(N=50)
Thanh_thi
(N=50)
Overall
(N=100)
thu_nhap
Mean (SD) 14500000 (4370000) 14700000 (4370000) 14600000 (4350000)
Median [Min, Max] 14500000 [7150000, 21900000] 14700000 [7300000, 22000000] 14600000 [7150000, 22000000]
chi_tieu
Mean (SD) 10000000 (2920000) 10100000 (2920000) 10100000 (2900000)
Median [Min, Max] 10000000 [5100000, 14900000] 10100000 [5200000, 15000000] 10100000 [5100000, 15000000]
tiet_kiem
Mean (SD) 4500000 (1460000) 4550000 (1460000) 4530000 (1450000)
Median [Min, Max] 4500000 [2050000, 6950000] 4550000 [2100000, 7000000] 4530000 [2050000, 7000000]
table1(~ thu_nhap + chi_tieu + tiet_kiem | hoc_van, data = data)
Dai_hoc
(N=25)
THCS
(N=50)
THPT
(N=25)
Overall
(N=100)
thu_nhap
Mean (SD) 14800000 (4420000) 14500000 (4370000) 14500000 (4410000) 14600000 (4350000)
Median [Min, Max] 14800000 [7600000, 22000000] 14400000 [7150000, 21700000] 14700000 [7450000, 21900000] 14600000 [7150000, 22000000]
chi_tieu
Mean (SD) 10200000 (2940000) 10000000 (2920000) 10000000 (2940000) 10100000 (2900000)
Median [Min, Max] 10200000 [5400000, 15000000] 9950000 [5100000, 14800000] 10100000 [5300000, 14900000] 10100000 [5100000, 15000000]
tiet_kiem
Mean (SD) 4600000 (1470000) 4500000 (1460000) 4500000 (1470000) 4530000 (1450000)
Median [Min, Max] 4600000 [2200000, 7000000] 4480000 [2050000, 6900000] 4550000 [2150000, 6950000] 4530000 [2050000, 7000000]
library(ggplot2)

ggplot(data, aes(x = thu_nhap)) +
  geom_histogram(fill = "steelblue", color = "white", bins = 30) +
  labs(title = "Phân bố Thu nhập của Hộ gia đình", x = "Thu nhập", y = "Số lượng") +
  theme_minimal()

ggplot(data, aes(x = khu_vuc, y = chi_tieu, fill = khu_vuc)) +
  geom_boxplot() +
  labs(title = "So sánh Chi tiêu giữa các Khu vực", x = "Khu vực", y = "Chi tiêu") +
  theme_light()

ggplot(data, aes(x = thu_nhap, y = chi_tieu)) +
  geom_point(color = "darkred", alpha = 0.6) +
  geom_smooth(method = "lm", col = "blue") + # Thêm đường xu hướng
  labs(title = "Mối quan hệ giữa Thu nhập và Chi tiêu", x = "Thu nhập", y = "Chi tiêu") +
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'