Đọc số liệu vào

Kết quả đăng trong Bài báo: "Determinant of household expenditure on education in Red River Delta region, TNU Journal of Science and Technology, 226(04): 53 - 61, 2021, Ngo Thi Ngoan*, Nguyen Thi Tuyet Mai, Dam Thi Thu Trang", link tại: http://jst.tnu.edu.vn/jst/ft/view/81

Dữ liệu download tại: https://drive.google.com/drive/folders/1EYAG-uYIFbuP6exA6PE2sywmMdxavWSS?usp=sharing

setwd("D:/Public paper/Education expenditure Vietnam/Data_DBSH_TNU")
require(reshape2)
require(tidyverse)

require(gridExtra)
source("FunctionEduc.R")
#---

load("GIAODUC18.RData")
load("CCCHIGD.RData")
GIAODUC18 <- GIAODUC18[which(GIAODUC18$THUBQ >=0), ]
source("DescriptionTableFunction.R")

GIAODUC18 <- GIAODUC18 %>% mutate(songuoidihoc = SONAMDANGHOC + SONUDANGHOC)
GIAODUC18  <- GIAODUC18  %>% mutate(NGHENGHIEP_CH = ifelse(LAMCONGANLUONG == "Yes", "LAMCONGANLUONG",
                                                           ifelse(NONGLAMTHUYSAN == "Yes", "NONGLAMTHUYSAN", "KINHDOANHDICHVU")))

GIAODUC18 <- GIAODUC18 %>% mutate(songuoidihocFactor = as.factor(songuoidihoc ))
levels(GIAODUC18$HONNHAN_CH) <- c("Other",    "Married" , 
                                  "Other",   "Other", 
                                  "Other",   "Other"  )

#--add HOCTHEM variables
CCCHIGD_HH <- CCCHIGD %>% group_by(ID) %>% summarise(HOCTHEM = sum(HOCTHEM))
CCCHIGD_HH <- CCCHIGD_HH %>% mutate(HOCTHEM = ifelse(HOCTHEM > 0 , "Yes", "No"))
#GIAODUC18

GIAODUC18  <- GIAODUC18 [which(GIAODUC18 $CHIGD < 60000), ] #delete some outlier


GIAODUC18 <- left_join(GIAODUC18, CCCHIGD_HH)
GIAODUC18$HOCTHEM[is.na(GIAODUC18$HOCTHEM)] <- "No"

factor.name <- c("NOISONG", "DANTOC_CH", "HONNHAN_CH", "GIOITINH_CH",
                 "NGHENGHIEP_CH", "songuoidihocFactor",
                 "BANGCAP_CH", "HOCTHEM", "TROCAP")
conti.name <- c( "TUOI_CH", "TSNGUOI", "songuoidihoc" )

Des1 <- fun.DescriptionData(GIAODUC18, factor.name, conti.name)


Des1 <- fun.DescriptionData(GIAODUC18, factor.name, conti.name)
#by Province
Des1 <- fun.DescriptionData(GIAODUC18, factor.name, conti.name)

DesHanoi <- fun.DescriptionData(GIAODUC18 %>% filter(PROVINCE == "Hanoi"), factor.name, conti.name)
DesHaiPhong <- fun.DescriptionData(GIAODUC18 %>% filter(PROVINCE == "HaiPhong"), factor.name, conti.name)
DesVinhPhuc <- fun.DescriptionData(GIAODUC18 %>% filter(PROVINCE == "VinhPhuc"), factor.name, conti.name)
DesBacNinh <- fun.DescriptionData(GIAODUC18 %>% filter(PROVINCE == "BacNinh"), factor.name, conti.name)
DesQuangNinh <- fun.DescriptionData(GIAODUC18 %>% filter(PROVINCE == "QuangNinh"), factor.name, conti.name)
DesHaiDuong <- fun.DescriptionData(GIAODUC18 %>% filter(PROVINCE == "HaiDuong"), factor.name, conti.name)
DesHungYen <- fun.DescriptionData(GIAODUC18 %>% filter(PROVINCE == "HungYen"), factor.name, conti.name)
DesThaiBinh <- fun.DescriptionData(GIAODUC18 %>% filter(PROVINCE == "ThaiBinh"), factor.name, conti.name)
DesHaNam <- fun.DescriptionData(GIAODUC18 %>% filter(PROVINCE == "HaNam"), factor.name, conti.name)
DesNamDinh<- fun.DescriptionData(GIAODUC18 %>% filter(PROVINCE == "NamDinh"), factor.name, conti.name)
DesNinhBinh<- fun.DescriptionData(GIAODUC18 %>% filter(PROVINCE == "NinhBinh"), factor.name, conti.name)

setdiff( row.names(DesHaiPhong), row.names(DesHanoi))
## character(0)
DesHaiPhong <- DesHaiPhong[row.names(DesHanoi), ]

DesVinhPhuc <- DesVinhPhuc[row.names(DesHanoi), ]
DesBacNinh <- DesBacNinh[row.names(DesHanoi), ]
DesQuangNinh <- DesQuangNinh[row.names(DesHanoi), ]
DesHaiDuong<- DesHaiDuong[row.names(DesHanoi), ]
DesHungYen <- DesHungYen[row.names(DesHanoi), ]
DesThaiBinh <- DesThaiBinh[row.names(DesHanoi), ]
DesHaNam  <- DesHaNam [row.names(DesHanoi), ]
DesNamDinh <- DesNamDinh[row.names(DesHanoi), ]
DesNinhBinh <- DesNinhBinh[row.names(DesHanoi), ]

DesALL  <- data.frame(Hanoi = DesHanoi,  VinhPhuc = DesVinhPhuc,  BacNinh = DesBacNinh, 
                      QuangNinh = DesQuangNinh,  HaiDuong = DesHaiDuong,
                      HungYen = DesHungYen, ThaiBinh = DesThaiBinh,
                      Hanam = DesHaNam,     Haiphong = DesHaiPhong , 
                      NamDinh = DesNamDinh, NinhBinh = DesNinhBinh)

Bảng 2. Thông tin chung về đặc điểm hộ và chế độ học tập

DesALL
##                                  percen.factor    VinhPhuc     BacNinh
## So luong quan sat                          306         134         137
## TUOI_CH                            56.2 (13.1) 50.2 (12.7) 51.3 (11.3)
## TSNGUOI                              3.8 (1.6)   4.1 (1.6)   4.2 (1.7)
## songuoidihoc                             1 (1)     1.2 (1)     1.2 (1)
## NOISONG_URBAN                             43.8        26.1        27.7
## NOISONG_RURAL                             56.2        73.9        72.3
## DANTOC_CH_Kinh                            99.7        94.8         100
## DANTOC_CH_Minority                         0.3         5.2        <NA>
## HONNHAN_CH_Other                            18        15.7        13.9
## HONNHAN_CH_Married                          82        84.3        86.1
## GIOITINH_CH_Male                          69.3        74.6        80.3
## GIOITINH_CH_Female                        30.7        25.4        19.7
## NGHENGHIEP_CH_KINHDOANHDICHVU             50.7        25.4        34.3
## NGHENGHIEP_CH_LAMCONGANLUONG              32.7        42.5        42.3
## NGHENGHIEP_CH_NONGLAMTHUYSAN              16.7        32.1        23.4
## songuoidihocFactor_0                      43.8        32.8        33.6
## songuoidihocFactor_1                      22.9        22.4        23.4
## songuoidihocFactor_2                      26.5        34.3        32.8
## songuoidihocFactor_3                       5.9        10.4        10.2
## songuoidihocFactor_4                         1           0           0
## BANGCAP_CH_No qualification                7.2         7.5        10.2
## BANGCAP_CH_Primary school                   17        19.4        24.1
## BANGCAP_CH_Secondary-high school          57.2        62.7        58.4
## BANGCAP_CH_University                     18.6        10.4         7.3
## HOCTHEM_No                                63.1        45.5        69.3
## HOCTHEM_Yes                               36.9        54.5        30.7
## TROCAP_Yes                                21.2          44        25.5
## TROCAP_No                                 78.8          56        74.5
##                                  QuangNinh    HaiDuong     HungYen    ThaiBinh
## So luong quan sat                      147         180         145         188
## TUOI_CH                          53 (13.1) 55.1 (13.7) 56.6 (14.3) 57.2 (14.4)
## TSNGUOI                          3.6 (1.4)   3.6 (1.6)   3.3 (1.6)     3 (1.5)
## songuoidihoc                     0.9 (0.9)   0.8 (0.9)   0.8 (0.9)   0.7 (0.9)
## NOISONG_URBAN                         59.2        21.7        11.7         9.6
## NOISONG_RURAL                         40.8        78.3        88.3        90.4
## DANTOC_CH_Kinh                        90.5         100         100         100
## DANTOC_CH_Minority                     9.5        <NA>        <NA>        <NA>
## HONNHAN_CH_Other                      16.3        16.7        22.8        19.1
## HONNHAN_CH_Married                    83.7        83.3        77.2        80.9
## GIOITINH_CH_Male                        83        81.1        78.6        81.9
## GIOITINH_CH_Female                      17        18.9        21.4        18.1
## NGHENGHIEP_CH_KINHDOANHDICHVU         36.7        26.1        26.2        25.5
## NGHENGHIEP_CH_LAMCONGANLUONG          40.1        40.6        42.1        39.9
## NGHENGHIEP_CH_NONGLAMTHUYSAN          23.1        33.3        31.7        34.6
## songuoidihocFactor_0                  45.6        51.1        53.1        56.4
## songuoidihocFactor_1                  27.2        20.6        18.6        22.3
## songuoidihocFactor_2                  23.8        23.9        26.2        17.6
## songuoidihocFactor_3                   3.4         4.4         1.4         3.7
## songuoidihocFactor_4                     0           0         0.7           0
## BANGCAP_CH_No qualification           12.9         5.6          11         4.3
## BANGCAP_CH_Primary school             18.4          15          11        10.6
## BANGCAP_CH_Secondary-high school      57.8          75        71.7        81.4
## BANGCAP_CH_University                 10.9         4.4         6.2         3.7
## HOCTHEM_No                            84.4          65        68.3        71.8
## HOCTHEM_Yes                           15.6          35        31.7        28.2
## TROCAP_Yes                            27.2        23.3        24.1        20.7
## TROCAP_No                             72.8        76.7        75.9        79.3
##                                        Hanam    Haiphong     NamDinh
## So luong quan sat                        123         184         193
## TUOI_CH                          56.4 (14.4) 53.1 (14.4) 57.4 (13.1)
## TSNGUOI                            3.1 (1.3)   3.5 (1.5)   3.1 (1.5)
## songuoidihoc                       0.8 (0.9)     1 (0.9)     0.7 (1)
## NOISONG_URBAN                           12.2        46.2        18.7
## NOISONG_RURAL                           87.8        53.8        81.3
## DANTOC_CH_Kinh                           100        99.5         100
## DANTOC_CH_Minority                      <NA>         0.5        <NA>
## HONNHAN_CH_Other                          22        21.2        23.8
## HONNHAN_CH_Married                        78        78.8        76.2
## GIOITINH_CH_Male                        79.7        72.8        77.2
## GIOITINH_CH_Female                      20.3        27.2        22.8
## NGHENGHIEP_CH_KINHDOANHDICHVU           30.1        33.7        24.4
## NGHENGHIEP_CH_LAMCONGANLUONG              35        46.7        37.3
## NGHENGHIEP_CH_NONGLAMTHUYSAN              35        19.6        38.3
## songuoidihocFactor_0                      48        41.3          58
## songuoidihocFactor_1                    26.8        24.5          15
## songuoidihocFactor_2                    21.1        32.1        21.2
## songuoidihocFactor_3                     4.1         2.2         5.7
## songuoidihocFactor_4                       0           0           0
## BANGCAP_CH_No qualification             12.2           6        11.4
## BANGCAP_CH_Primary school                 13        19.6        16.6
## BANGCAP_CH_Secondary-high school        72.4          62        67.4
## BANGCAP_CH_University                    2.4        12.5         4.7
## HOCTHEM_No                              58.5        53.3        69.9
## HOCTHEM_Yes                             41.5        46.7        30.1
## TROCAP_Yes                              22.8        31.5        17.6
## TROCAP_No                               77.2        68.5        82.4
##                                     NinhBinh
## So luong quan sat                        129
## TUOI_CH                          51.1 (15.3)
## TSNGUOI                            3.4 (1.4)
## songuoidihoc                           1 (1)
## NOISONG_URBAN                           20.9
## NOISONG_RURAL                           79.1
## DANTOC_CH_Kinh                          98.4
## DANTOC_CH_Minority                       1.6
## HONNHAN_CH_Other                        11.6
## HONNHAN_CH_Married                      88.4
## GIOITINH_CH_Male                        80.6
## GIOITINH_CH_Female                      19.4
## NGHENGHIEP_CH_KINHDOANHDICHVU           19.4
## NGHENGHIEP_CH_LAMCONGANLUONG            50.4
## NGHENGHIEP_CH_NONGLAMTHUYSAN            30.2
## songuoidihocFactor_0                    39.5
## songuoidihocFactor_1                    27.9
## songuoidihocFactor_2                    25.6
## songuoidihocFactor_3                       7
## songuoidihocFactor_4                       0
## BANGCAP_CH_No qualification                7
## BANGCAP_CH_Primary school               17.1
## BANGCAP_CH_Secondary-high school        70.5
## BANGCAP_CH_University                    5.4
## HOCTHEM_No                              56.6
## HOCTHEM_Yes                             43.4
## TROCAP_Yes                              34.1
## TROCAP_No                               65.9

Hình 1. Biểu đồ hộp về tỉ lệ chi tiêu giáo dục (%) và chi tiêu (nghìn đồng) của các hộ gia đình ở đồng bằng sông Hồng

p1 <- ggplot(GIAODUC18, aes(y  = TONGCHIGD,  x = PROVINCE)) +
  geom_boxplot() +ylim (c( 0, 20))+
  ylab("Phantram")+
  stat_summary(fun.y=mean, colour="darkred", geom="point", 
               shape=18, size=3,show_guide = FALSE) 

means <-  aggregate(CHIGD ~  PROVINCE, GIAODUC18, mean)
p2 <- ggplot(GIAODUC18, aes(y  = CHIGD,  x = PROVINCE)) +
  geom_boxplot() +ylim (c( 0, 36000)) + ylab("nghin dong")+
  stat_summary(fun.y=mean, colour="darkred", geom="point", 
               shape=18, size=3,show_guide = FALSE) 
grid.arrange(p1, p2, ncol = 1)

p2 <- ggplot( CCCHIGD, aes(x = PROVINCE, y = CHIALL, fill = m2xc6)) +
  geom_boxplot(outlier.shape = NA) + ylim(0, 35000)+
  ggtitle("Chi phi 1 nam hoc theo cap hoc va theo tinh")

p2 

sử dụng hình với màu đen và trắng

library(patternplot)
pattern.type <- c('hdashes', 'blank', 'crosshatch','hdashes', 'blank', 'crosshatch')
ggplot( CCCHIGD, aes(x = PROVINCE, y = CHIALL)) +
  geom_boxplot(aes(fill=m2xc6, outlier.shape = NA)) + ylim(0, 35000)+
  scale_fill_brewer(palette="OrRd")

#CCCHIGD$PROVINCE%in%c( "QuangNinh", "VinhPhuc",  "BacNinh","HaiDuong") 

tgc <- summarySE(CCCHIGD, measurevar="CHIALL", groupvars=c("PROVINCE","m2xc6"))
#tgc


# The errorbars overlapped, so use position_dodge to move them horizontally
pd <- position_dodge(0.5) # move them .05 to the left and right

p2N <- ggplot(tgc, aes(x = PROVINCE, y = CHIALL, shape=m2xc6)) + # colour = m2xc6,
  geom_errorbar(aes(ymin=CHIALL-ci, ymax=CHIALL+ci), width=0.3, position=pd) +
  geom_line(position=pd, size = 2) +
  geom_point(position=pd, size = 3)

p2N

Tiếp tục xử lý chi tiêu theo từng khoản mục

#CCCHIGD %>% filter(TRAITUYEN >0 )

Muc2X.Chitieu2 <- CCCHIGD %>% select(ID, PROVINCE, HOCPHI,  DONGGOP, QUANAO, 
                                     SGK, DUNGCU, HOCTHEM, CHIGDKHAC )
Muc2X.Chitieu2 <- melt(Muc2X.Chitieu2, id.vars = c("ID", "PROVINCE"))

ggplot(Muc2X.Chitieu2, aes(x = variable, y = value, fill = PROVINCE)) +
  geom_boxplot(outlier.shape = NA) + ylim(0, 2000)

#----- Tinh ti le % cua chia tieu

Muc2X.Pct <- CCCHIGD %>% mutate( PHOCPHI = HOCPHI*100/CHIALL,
                                 PDONGGOP= DONGGOP*100/CHIALL,
                                 PQUANAO = QUANAO*100/CHIALL, 
                                 PSGK = SGK*100/CHIALL,
                                 PDUNGCU = DUNGCU*100/CHIALL,
                                 PHOCTHEM = HOCTHEM*100/CHIALL ) %>%
  select(ID, PROVINCE, m2xc6, PHOCPHI ,PDONGGOP ,PQUANAO  ,PSGK ,PDUNGCU, PHOCTHEM  )
Muc2X.Pct <- Muc2X.Pct %>% 
  mutate(PCHIKHAC = 100 - PHOCPHI - PDONGGOP - PQUANAO - PSGK - PDUNGCU -PHOCTHEM) 
Muc2X.Pct$PROVINCE <- as.factor(Muc2X.Pct$PROVINCE)
Muc2X.Pct$m2xc6 <- as.factor(Muc2X.Pct$m2xc6)

Muc2X.Pct <- Muc2X.Pct %>%dplyr::group_by( m2xc6, PROVINCE) %>% 
  dplyr::summarise(PHOCPHI = mean(PHOCPHI, na.rm = TRUE),
            PDONGGOP = mean(PDONGGOP, na.rm = TRUE),
            PQUANAO = mean(PQUANAO, na.rm = TRUE),
            PSGK = mean(PSGK, na.rm = TRUE),
            PDUNGCU = mean(PDUNGCU, na.rm = TRUE),
            PHOCTHEM = mean(PHOCTHEM, na.rm = TRUE))

Muc2X.Pct <- melt(Muc2X.Pct, id.vars = c("m2xc6",    "PROVINCE"))

Hình với đen và trắng

p3 <- ggplot(data=Muc2X.Pct, aes(x = m2xc6, y = value, fill = variable)) +
  geom_bar(stat="identity")+
  facet_grid(cols = vars(PROVINCE))+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  xlab("Levels") + ylab("Percentage")+
  # scale_fill_brewer(palette="Sequential")+
  ggtitle("Ty le phan tram chi tieu cho tung khoan muc va cap hoc")

p3

#------ Vẽ lại và không sử dụng màu

# plotting settings -------------------------------------------------------
p3N <- ggplot(data = Muc2X.Pct, mapping = aes(x = PROVINCE, y = value, group =  m2xc6, shape = variable)) + 
 # scale_x_continuous(breaks = seq(1960, 2000, by = 20)) +
  theme_bw() +
  #geom_line(aes( fill = m2xc6 ))+
  # colored dots
  geom_point(  size = 1) +
 facet_grid(cols = vars(PROVINCE)) 
  # colored lines
  #geom_line(data = Muc2X.Pct, aes(color = variable, group = variable)) +
  #facet_grid(cols = vars(PROVINCE)) 
  # weighted average black line
 # geom_line(data = Muc2X.Pct, aes(x = m2xc6, y = value, size = 1)) +
  #geom_point(data = Muc2X.Pct, aes(x = m2xc6, y = value, size = 1000))

p3N

Kết quả hồi quy

require(AER)
#DONE!!
GIAODUC18$HOCTHEM <- as.factor(GIAODUC18$HOCTHEM)
GIAODUC18$PROVINCE <- factor(GIAODUC18$PROVINCE, levels = c("Hanoi", "QuangNinh", "VinhPhuc",  "BacNinh",  "HaiDuong",
                                                            "HaiPhong",  "HungYen",  "ThaiBinh",  "HaNam",  "NamDinh",  
                                                            "NinhBinh"  ))
to6 <- tobit(CHIGD ~ THUBQ + BANGCAP_CH +GIOITINH_CH+TROCAP + songuoidihoc + HOCTHEM+NOISONG +PROVINCE , 
             data = GIAODUC18, left = 0, dist = "gaussian") # Chạy Tobit Model.
#summary(to6)
to60 <- update(to6, . ~ 1)
#1 - as.vector(logLik(to6)/logLik(to60)) # Pseudo R2
Coef6 <- gen.model1(to6)
Coef6
##                                  return.Coef        V2
## (Intercept)                     -16800.12*** (1839.22)
## THUBQ                                0.28***     (0.1)
## BANGCAP_CHPrimary school            1320.77  (1415.12)
## BANGCAP_CHSecondary-high school    3041.38** (1277.59)
## BANGCAP_CHUniversity              5285.94*** (1595.82)
## GIOITINH_CHFemale                 -1920.18**  (763.81)
## TROCAPNo                          6632.19***  (739.53)
## songuoidihoc                     12853.68***   (463.4)
## HOCTHEMYes                        3832.87***  (765.42)
## NOISONGRURAL                     -4433.44***  (741.33)
## PROVINCEQuangNinh                 -2758.79** (1316.23)
## PROVINCEVinhPhuc                  -2648.55** (1288.13)
## PROVINCEBacNinh                      -104.9  (1265.32)
## PROVINCEHaiDuong                  -2604.71** (1253.63)
## PROVINCEHaiPhong                     686.48  (1156.66)
## PROVINCEHungYen                     -953.59   (1344.4)
## PROVINCEThaiBinh                 -3815.28***  (1294.3)
## PROVINCEHaNam                      -2465.54* (1422.78)
## PROVINCENamDinh                    -1869.25  (1254.92)
## PROVINCENinhBinh                 -3610.89*** (1357.19)
## Log(scale)                           9.28***    (0.02)