rm(list = ls()) # Xoa ALL
setwd("D:/CoDaGD/FinalCODE/solieu")
load("CCCHIGDV3.RData")
require(compositions)
library(xtable)
require(tidyverse)
require(compositions)
require(reshape2)
library(robCompositions)
table(CCCHIGDV3$m2xc6)##
## High school Nursery School Primary School Secondary school
## 1002 1469 2778 1797
## University Vocational school
## 13 557
#------------Nghiên cứu cho ba cấp học của GIÁO DỤC PHỔ THÔNG: Tiểu học, THCS và THPD
CCCHIGDV3 <- CCCHIGDV3 %>%
filter(m2xc6 %in% c("Primary School","Secondary school", "High school" ))
load("GIAODUC20V3.RData")
#names(GIAODUC20V3)
#names(CCCHIGDV3)
CCCHIGDV3 <- left_join(CCCHIGDV3,
GIAODUC20V3 %>% select(c(ID, THUBQ, GIOITINH_CH, TUOI_CH, HONNHAN_CH, DANTOC_CH, TSNGUOI, NGHENGHIEP_CH, BANGCAP_CH, SONAMDANGHOC, SONUDANGHOC, songuoidihoc, songuoidihocFactor, CHIGD, TONGCHIGD)))
table(CCCHIGDV3$m2xc8)##
## C<f4>ng l?p D<e2>n l?p T? th?c Kh<e1>c NR
## 5507 44 22 4 0
levels(CCCHIGDV3$m2xc8) <- c("Cong lap", "Danlap-tuthuc", "Danlap-tuthuc","Cong lap","Cong lap")
levels(CCCHIGDV3$GIOITINH)## [1] "Male" "Female"
CCCHIGDV3 <- CCCHIGDV3[!is.na(CCCHIGDV3$THUBQ), ]#Cấp Tiểu học
CCCHIGDV3_prg_all1 <- CCCHIGDV3 %>% filter(m2xc6 == "Primary School") %>% mutate(
s1 = ( SGK + HOCPHI+QUANAO )/CHIALL,
s2 = ( DONGGOP+HOCTHEM+CHIGDKHAC)/CHIALL,
s3 = ( DUNGCU + TRAITUYEN)/CHIALL) #%>% select("s1", "s2", "s3")
length(CCCHIGDV3_prg_all1$s1[CCCHIGDV3_prg_all1$s1 == 0])## [1] 112
length(CCCHIGDV3_prg_all1$s2[CCCHIGDV3_prg_all1$s2 == 0])## [1] 104
length(CCCHIGDV3_prg_all1$s3[CCCHIGDV3_prg_all1$s3 == 0])## [1] 63
CCCHIGDV3_prg_all1 <- CCCHIGDV3_prg_all1 %>% filter(s1>0 & s2 >0 & s3>0)
mean(acomp(CCCHIGDV3_prg_all1[CCCHIGDV3_prg_all1$AREA == "DB Song Hong" , c("s1", "s2", "s3")]))## s1 s2 s3
## "0.22977599" "0.67266796" "0.09755605"
## attr(,"class")
## [1] "acomp"
mean(acomp(CCCHIGDV3_prg_all1[CCCHIGDV3_prg_all1$AREA == "TD MN Phia Bac" , c("s1", "s2", "s3")]))## s1 s2 s3
## "0.3551988" "0.4637290" "0.1810722"
## attr(,"class")
## [1] "acomp"
mean(acomp(CCCHIGDV3_prg_all1[CCCHIGDV3_prg_all1$AREA == "Mien Trung" , c("s1", "s2", "s3")]))## s1 s2 s3
## "0.3327236" "0.5248247" "0.1424517"
## attr(,"class")
## [1] "acomp"
mean(acomp(CCCHIGDV3_prg_all1[CCCHIGDV3_prg_all1$AREA == "Tay nguyen" , c("s1", "s2", "s3")]))## s1 s2 s3
## "0.4347799" "0.3852465" "0.1799736"
## attr(,"class")
## [1] "acomp"
mean(acomp(CCCHIGDV3_prg_all1[CCCHIGDV3_prg_all1$AREA == "Dong Nam Bo" , c("s1", "s2", "s3")]))## s1 s2 s3
## "0.3216198" "0.5654591" "0.1129211"
## attr(,"class")
## [1] "acomp"
mean(acomp(CCCHIGDV3_prg_all1[CCCHIGDV3_prg_all1$AREA == "DB Cuu Long" , c("s1", "s2", "s3")]))## s1 s2 s3
## "0.4466981" "0.3734841" "0.1798178"
## attr(,"class")
## [1] "acomp"
#-------------Tennary by hand
A <- c(0, 0)
B <- c(1, 0)
C <- c(0.5, sqrt(3) / 2)
CCCHIGDV3_prg_all1$X<- CCCHIGDV3_prg_all1[, "s1"] * A[1] + CCCHIGDV3_prg_all1[, "s2"] * B[1] + CCCHIGDV3_prg_all1[, "s3"] * C[1]
CCCHIGDV3_prg_all1$Y <- CCCHIGDV3_prg_all1[, "s1"] * A[2] + CCCHIGDV3_prg_all1[, "s2"] * B[2] + CCCHIGDV3_prg_all1[, "s3"] * C[2]
m.SH <- mean(acomp(CCCHIGDV3_prg_all1[CCCHIGDV3_prg_all1$AREA == "DB Song Hong" , c("s1", "s2", "s3")]))
m.SH <- data.frame(s1 = 0.22855422, s2 = 0.67401613, s3 = 0.09742965)
m.SH$X<- m.SH[, "s1"] * A[1] + m.SH[, "s2"] * B[1] + m.SH[, "s3"] * C[1]
m.SH$Y <- m.SH[, "s1"] * A[2] + m.SH[, "s2"] * B[2] + m.SH[, "s3"] * C[2]
m.DNB <- mean(acomp(CCCHIGDV3_prg_all1[CCCHIGDV3_prg_all1$AREA == "Dong Nam Bo" , c("s1", "s2", "s3")]))
m.DNB <- data.frame(s1 = 0.3189416, s2 = 0.5690714, s3 = 0.1119870)
m.DNB$X<- m.DNB[, "s1"] * A[1] + m.DNB[, "s2"] * B[1] + m.DNB[, "s3"] * C[1]
m.DNB$Y <- m.DNB[, "s1"] * A[2] + m.DNB[, "s2"] * B[2] + m.DNB[, "s3"] * C[2]
m.MNPB <- mean(acomp(CCCHIGDV3_prg_all1[CCCHIGDV3_prg_all1$AREA == "TD MN Phia Bac" , c("s1", "s2", "s3")]))
m.MNPB <- data.frame(s1 = 0.3553339, s2 = 0.4632153, s3 = 0.1814508)
m.MNPB$X<- m.MNPB[, "s1"] * A[1] + m.MNPB[, "s2"] * B[1] + m.MNPB[, "s3"] * C[1]
m.MNPB$Y <- m.MNPB[, "s1"] * A[2] + m.MNPB[, "s2"] * B[2] + m.MNPB[, "s3"] * C[2]
m.MT <- mean(acomp(CCCHIGDV3_prg_all1[CCCHIGDV3_prg_all1$AREA == "Mien Trung" , c("s1", "s2", "s3")]))
m.MT <- data.frame(s1 = 0.3330167, s2 = 0.5267577, s3 = 0.1402256)
m.MT$X<- m.MT[, "s1"] * A[1] + m.MT[, "s2"] * B[1] + m.MT[, "s3"] * C[1]
m.MT$Y <- m.MT[, "s1"] * A[2] + m.MT[, "s2"] * B[2] + m.MT[, "s3"] * C[2]
m.TN <- mean(acomp(CCCHIGDV3_prg_all1[CCCHIGDV3_prg_all1$AREA == "Tay nguyen" , c("s1", "s2", "s3")]))
m.TN <- data.frame(s1 = 0.4351050, s2 = 0.3866537, s3 = 0.1782413)
m.TN$X<- m.TN[, "s1"] * A[1] + m.TN[, "s2"] * B[1] + m.TN[, "s3"] * C[1]
m.TN$Y <- m.TN[, "s1"] * A[2] + m.TN[, "s2"] * B[2] + m.TN[, "s3"] * C[2]
m.DBSCL <- mean(acomp(CCCHIGDV3_prg_all1[CCCHIGDV3_prg_all1$AREA == "DB Cuu Long" , c("s1", "s2", "s3")]))
m.DBSCL <- data.frame(s1 = 0.4458013, s2 = 0.3744958, s3 = 0.1797029)
m.DBSCL$X<- m.DBSCL[, "s1"] * A[1] + m.DBSCL[, "s2"] * B[1] + m.DBSCL[, "s3"] * C[1]
m.DBSCL$Y <- m.DBSCL[, "s1"] * A[2] + m.DBSCL[, "s2"] * B[2] + m.DBSCL[, "s3"] * C[2]
plot(rbind(A, B, C), xaxt = "n", yaxt = "n", frame = F,
type = "n", xlab = "", ylab = "", asp = 1,
main = "Tieu hoc", #Bandwidth = c(0.05, 0.05),
ylim = c(-0.1, sqrt(3)/2), cex.main = 2)
text(c(0.05, 0.95, 1/2+0.07), c(0-0.06, -0.06, sqrt(3)/2-0.05),
c("s1", "s2", "s3"), pos = 3, #"s1", "s2", "s3"
cex = 0.5)
lines(c(0, 1), c(0, 0), col = "black")
lines(c(0, 0.5), c(0, sqrt(3)/2), col = "black")
lines(c(1, 0.5), c(0, sqrt(3)/2), col = "black")
points(CCCHIGDV3_prg_all1$X, CCCHIGDV3_prg_all1$Y, pch = 16, cex=0.5, col="grey")
points(m.SH$X, m.SH$Y, pch = 16, cex=1, col = "blue")
points(m.DNB$X, m.DNB$Y, pch = 16, cex=1, col = "green")
points(m.MNPB$X, m.MNPB$Y, pch = 16, cex=1, col = "yellow")
points(m.MT$X, m.MT$Y, pch = 16, cex=1, col = "red")
points(m.TN$X, m.TN$Y, pch = 16, cex=1, col = "darkviolet")
points(m.DBSCL$X, m.DBSCL$Y, pch = 16, cex=1, col = "pink")
legend("topright", col = c("blue", "green","yellow","red","darkviolet","pink" ), pch = c(16, 16),
legend = c("SH", "DNB", "MNPB", "MT", "TN", "DBSCL"))#++++ Cấp THCS
CCCHIGDV3_sec_all1 <- CCCHIGDV3 %>% filter(m2xc6 == "Secondary school") %>% mutate(
s1 = ( SGK + HOCPHI+QUANAO )/CHIALL,
s2 = ( DONGGOP+HOCTHEM+CHIGDKHAC)/CHIALL,
s3 = ( DUNGCU + TRAITUYEN)/CHIALL) #%>% select("s1", "s2", "s3")
length(CCCHIGDV3_sec_all1$s1[CCCHIGDV3_sec_all1$s1 == 0])## [1] 56
length(CCCHIGDV3_sec_all1$s2[CCCHIGDV3_sec_all1$s2 == 0])## [1] 48
length(CCCHIGDV3_sec_all1$s3[CCCHIGDV3_sec_all1$s3 == 0])## [1] 27
CCCHIGDV3_sec_all1 <- CCCHIGDV3_sec_all1 %>% filter(s1>0 & s2 >0 & s3>0)
mean(acomp(CCCHIGDV3_sec_all1[CCCHIGDV3_sec_all1$AREA == "DB Song Hong" , c("s1", "s2", "s3")]))## s1 s2 s3
## "0.31729908" "0.60009903" "0.08260189"
## attr(,"class")
## [1] "acomp"
mean(acomp(CCCHIGDV3_sec_all1[CCCHIGDV3_sec_all1$AREA == "TD MN Phia Bac" , c("s1", "s2", "s3")]))## s1 s2 s3
## "0.4469919" "0.3907558" "0.1622523"
## attr(,"class")
## [1] "acomp"
mean(acomp(CCCHIGDV3_sec_all1[CCCHIGDV3_sec_all1$AREA == "Mien Trung" , c("s1", "s2", "s3")]))## s1 s2 s3
## "0.39739515" "0.50289373" "0.09971113"
## attr(,"class")
## [1] "acomp"
mean(acomp(CCCHIGDV3_sec_all1[CCCHIGDV3_sec_all1$AREA == "Tay nguyen" , c("s1", "s2", "s3")]))## s1 s2 s3
## "0.4452001" "0.4345334" "0.1202665"
## attr(,"class")
## [1] "acomp"
mean(acomp(CCCHIGDV3_sec_all1[CCCHIGDV3_sec_all1$AREA == "Dong Nam Bo" , c("s1", "s2", "s3")]))## s1 s2 s3
## "0.40679900" "0.50939120" "0.08380979"
## attr(,"class")
## [1] "acomp"
mean(acomp(CCCHIGDV3_sec_all1[CCCHIGDV3_sec_all1$AREA == "DB Cuu Long" , c("s1", "s2", "s3")]))## s1 s2 s3
## "0.5490749" "0.3184657" "0.1324594"
## attr(,"class")
## [1] "acomp"
#-------------Tennary by hand, thank Tibo
A <- c(0, 0)
B <- c(1, 0)
C <- c(0.5, sqrt(3) / 2)
CCCHIGDV3_sec_all1$X<- CCCHIGDV3_sec_all1[, "s1"] * A[1] + CCCHIGDV3_sec_all1[, "s2"] * B[1] + CCCHIGDV3_sec_all1[, "s3"] * C[1]
CCCHIGDV3_sec_all1$Y <- CCCHIGDV3_sec_all1[, "s1"] * A[2] + CCCHIGDV3_sec_all1[, "s2"] * B[2] + CCCHIGDV3_sec_all1[, "s3"] * C[2]
m.SH <- mean(acomp(CCCHIGDV3_sec_all1[CCCHIGDV3_sec_all1$AREA == "DB Song Hong" , c("s1", "s2", "s3")]))
m.SH <- data.frame(s1 = 0.31545119, s2 = 0.60965076, s3 = 0.07489805)
m.SH$X<- m.SH[, "s1"] * A[1] + m.SH[, "s2"] * B[1] + m.SH[, "s3"] * C[1]
m.SH$Y <- m.SH[, "s1"] * A[2] + m.SH[, "s2"] * B[2] + m.SH[, "s3"] * C[2]
m.DNB <- mean(acomp(CCCHIGDV3_sec_all1[CCCHIGDV3_sec_all1$AREA == "Dong Nam Bo" , c("s1", "s2", "s3")]))
m.DNB <- data.frame(s1 = 0.41015440, s2 = 0.51737431, s3 = 0.07247128)
m.DNB$X<- m.DNB[, "s1"] * A[1] + m.DNB[, "s2"] * B[1] + m.DNB[, "s3"] * C[1]
m.DNB$Y <- m.DNB[, "s1"] * A[2] + m.DNB[, "s2"] * B[2] + m.DNB[, "s3"] * C[2]
m.MNPB <- mean(acomp(CCCHIGDV3_sec_all1[CCCHIGDV3_sec_all1$AREA == "TD MN Phia Bac" , c("s1", "s2", "s3")]))
m.MNPB <- data.frame(s1 = 0.4284185, s2 = 0.4129878, s3 = 0.1585937)
m.MNPB$X<- m.MNPB[, "s1"] * A[1] + m.MNPB[, "s2"] * B[1] + m.MNPB[, "s3"] * C[1]
m.MNPB$Y <- m.MNPB[, "s1"] * A[2] + m.MNPB[, "s2"] * B[2] + m.MNPB[, "s3"] * C[2]
m.MT <- mean(acomp(CCCHIGDV3_sec_all1[CCCHIGDV3_sec_all1$AREA == "Mien Trung" , c("s1", "s2", "s3")]))
m.MT <- data.frame(s1 = 0.38887893, s2 = 0.52216255, s3 = 0.08895852)
m.MT$X<- m.MT[, "s1"] * A[1] + m.MT[, "s2"] * B[1] + m.MT[, "s3"] * C[1]
m.MT$Y <- m.MT[, "s1"] * A[2] + m.MT[, "s2"] * B[2] + m.MT[, "s3"] * C[2]
m.TN <- mean(acomp(CCCHIGDV3_sec_all1[CCCHIGDV3_sec_all1$AREA == "Tay nguyen" , c("s1", "s2", "s3")]))
m.TN <- data.frame(s1 = 0.4301452, s2 = 0.4555126, s3 = 0.1143423)
m.TN$X<- m.TN[, "s1"] * A[1] + m.TN[, "s2"] * B[1] + m.TN[, "s3"] * C[1]
m.TN$Y <- m.TN[, "s1"] * A[2] + m.TN[, "s2"] * B[2] + m.TN[, "s3"] * C[2]
m.DBSCL <- mean(acomp(CCCHIGDV3_sec_all1[CCCHIGDV3_sec_all1$AREA == "DB Cuu Long" , c("s1", "s2", "s3")]))
m.DBSCL <- data.frame(s1 = 0.5354573, s2 = 0.3399442, s3 = 0.1245985)
m.DBSCL$X<- m.DBSCL[, "s1"] * A[1] + m.DBSCL[, "s2"] * B[1] + m.DBSCL[, "s3"] * C[1]
m.DBSCL$Y <- m.DBSCL[, "s1"] * A[2] + m.DBSCL[, "s2"] * B[2] + m.DBSCL[, "s3"] * C[2]
plot(rbind(A, B, C), xaxt = "n", yaxt = "n", frame = F,
type = "n", xlab = "", ylab = "", asp = 1,
main = "Trung hoc Co so", #Bandwidth = c(0.05, 0.05),
ylim = c(-0.1, sqrt(3)/2), cex.main = 2)
text(c(0.05, 0.95, 1/2+0.07), c(0-0.06, -0.06, sqrt(3)/2-0.05),
c("s1", "s2", "s3"), pos = 3, #"s1", "s2", "s3"
cex = 0.5)
lines(c(0, 1), c(0, 0), col = "black")
lines(c(0, 0.5), c(0, sqrt(3)/2), col = "black")
lines(c(1, 0.5), c(0, sqrt(3)/2), col = "black")
points(CCCHIGDV3_sec_all1$X, CCCHIGDV3_sec_all1$Y, pch = 16, cex=0.5, col="grey")
points(m.SH$X, m.SH$Y, pch = 16, cex=1, col = "blue")
points(m.DNB$X, m.DNB$Y, pch = 16, cex=1, col = "green")
points(m.MNPB$X, m.MNPB$Y, pch = 16, cex=1, col = "yellow")
points(m.MT$X, m.MT$Y, pch = 16, cex=1, col = "red")
points(m.TN$X, m.TN$Y, pch = 16, cex=1, col = "darkviolet")
points(m.DBSCL$X, m.DBSCL$Y, pch = 16, cex=1, col = "pink")
legend("topright", col = c("blue", "green","yellow","red","darkviolet","pink" ), pch = c(16, 16),
legend = c("SH", "DNB", "MNPB", "MT", "TN", "DBSCL"))#=============================== THPT =======================================================
CCCHIGDV3_hig_all1 <- CCCHIGDV3 %>% filter(m2xc6 == "High school") %>% mutate(
s1 = ( SGK + HOCPHI+QUANAO )/CHIALL,
s2 = ( DONGGOP+HOCTHEM+CHIGDKHAC)/CHIALL,
s3 = ( DUNGCU + TRAITUYEN)/CHIALL) #%>% select("s1", "s2", "s3")
length(CCCHIGDV3_hig_all1$s1[CCCHIGDV3_hig_all1$s1 == 0])## [1] 7
length(CCCHIGDV3_hig_all1$s2[CCCHIGDV3_hig_all1$s2 == 0])## [1] 9
length(CCCHIGDV3_hig_all1$s3[CCCHIGDV3_hig_all1$s3 == 0])## [1] 10
CCCHIGDV3_hig_all1 <- CCCHIGDV3_hig_all1 %>% filter(s1>0 & s2 >0 & s3>0)
mean(acomp(CCCHIGDV3_hig_all1[CCCHIGDV3_hig_all1$AREA == "DB Song Hong" , c("s1", "s2", "s3")]))## s1 s2 s3
## "0.32441481" "0.60973589" "0.06584929"
## attr(,"class")
## [1] "acomp"
mean(acomp(CCCHIGDV3_hig_all1[CCCHIGDV3_hig_all1$AREA == "TD MN Phia Bac" , c("s1", "s2", "s3")]))## s1 s2 s3
## "0.3978807" "0.4740392" "0.1280801"
## attr(,"class")
## [1] "acomp"
mean(acomp(CCCHIGDV3_hig_all1[CCCHIGDV3_hig_all1$AREA == "Mien Trung" , c("s1", "s2", "s3")]))## s1 s2 s3
## "0.37606268" "0.55154297" "0.07239435"
## attr(,"class")
## [1] "acomp"
mean(acomp(CCCHIGDV3_hig_all1[CCCHIGDV3_hig_all1$AREA == "Tay nguyen" , c("s1", "s2", "s3")]))## s1 s2 s3
## "0.3875022" "0.5117863" "0.1007115"
## attr(,"class")
## [1] "acomp"
mean(acomp(CCCHIGDV3_hig_all1[CCCHIGDV3_hig_all1$AREA == "Dong Nam Bo" , c("s1", "s2", "s3")]))## s1 s2 s3
## "0.43841879" "0.50132007" "0.06026114"
## attr(,"class")
## [1] "acomp"
mean(acomp(CCCHIGDV3_hig_all1[CCCHIGDV3_hig_all1$AREA == "DB Cuu Long" , c("s1", "s2", "s3")]))## s1 s2 s3
## "0.5222032" "0.3640084" "0.1137884"
## attr(,"class")
## [1] "acomp"
#-------------Tennary by hand, thank Tibo
A <- c(0, 0)
B <- c(1, 0)
C <- c(0.5, sqrt(3) / 2)
CCCHIGDV3_hig_all1$X<- CCCHIGDV3_hig_all1[, "s1"] * A[1] + CCCHIGDV3_hig_all1[, "s2"] * B[1] + CCCHIGDV3_hig_all1[, "s3"] * C[1]
CCCHIGDV3_hig_all1$Y <- CCCHIGDV3_hig_all1[, "s1"] * A[2] + CCCHIGDV3_hig_all1[, "s2"] * B[2] + CCCHIGDV3_hig_all1[, "s3"] * C[2]
m.SH <- mean(acomp(CCCHIGDV3_hig_all1[CCCHIGDV3_hig_all1$AREA == "DB Song Hong" , c("s1", "s2", "s3")]))
m.SH <- data.frame(s1 = 0.3209782, s2 = 0.6136370, s3 = 0.0653847)
m.SH$X<- m.SH[, "s1"] * A[1] + m.SH[, "s2"] * B[1] + m.SH[, "s3"] * C[1]
m.SH$Y <- m.SH[, "s1"] * A[2] + m.SH[, "s2"] * B[2] + m.SH[, "s3"] * C[2]
m.DNB <- mean(acomp(CCCHIGDV3_hig_all1[CCCHIGDV3_hig_all1$AREA == "Dong Nam Bo" , c("s1", "s2", "s3")]))
m.DNB <- data.frame(s1 = 0.42889762, s2 = 0.51008902, s3 = 0.06101336)
m.DNB$X<- m.DNB[, "s1"] * A[1] + m.DNB[, "s2"] * B[1] + m.DNB[, "s3"] * C[1]
m.DNB$Y <- m.DNB[, "s1"] * A[2] + m.DNB[, "s2"] * B[2] + m.DNB[, "s3"] * C[2]
m.MNPB <- mean(acomp(CCCHIGDV3_hig_all1[CCCHIGDV3_hig_all1$AREA == "TD MN Phia Bac" , c("s1", "s2", "s3")]))
m.MNPB <- data.frame(s1 = 0.3956486, s2 = 0.4765080, s3 = 0.1278435)
m.MNPB$X<- m.MNPB[, "s1"] * A[1] + m.MNPB[, "s2"] * B[1] + m.MNPB[, "s3"] * C[1]
m.MNPB$Y <- m.MNPB[, "s1"] * A[2] + m.MNPB[, "s2"] * B[2] + m.MNPB[, "s3"] * C[2]
m.MT <- mean(acomp(CCCHIGDV3_hig_all1[CCCHIGDV3_hig_all1$AREA == "Mien Trung" , c("s1", "s2", "s3")]))
m.MT <- data.frame(s1 = 0.37611861, s2 = 0.55118872, s3 = 0.07269268)
m.MT$X<- m.MT[, "s1"] * A[1] + m.MT[, "s2"] * B[1] + m.MT[, "s3"] * C[1]
m.MT$Y <- m.MT[, "s1"] * A[2] + m.MT[, "s2"] * B[2] + m.MT[, "s3"] * C[2]
m.TN <- mean(acomp(CCCHIGDV3_hig_all1[CCCHIGDV3_hig_all1$AREA == "Tay nguyen" , c("s1", "s2", "s3")]))
m.TN <- data.frame(s1 = 0.38830644, s2 = 0.51229418, s3 = 0.09939937)
m.TN$X<- m.TN[, "s1"] * A[1] + m.TN[, "s2"] * B[1] + m.TN[, "s3"] * C[1]
m.TN$Y <- m.TN[, "s1"] * A[2] + m.TN[, "s2"] * B[2] + m.TN[, "s3"] * C[2]
m.DBSCL <- mean(acomp(CCCHIGDV3_hig_all1[CCCHIGDV3_hig_all1$AREA == "DB Cuu Long" , c("s1", "s2", "s3")]))
m.DBSCL <- data.frame(s1 = 0.5156451, s2 = 0.3718453, s3 = 0.1125095)
m.DBSCL$X<- m.DBSCL[, "s1"] * A[1] + m.DBSCL[, "s2"] * B[1] + m.DBSCL[, "s3"] * C[1]
m.DBSCL$Y <- m.DBSCL[, "s1"] * A[2] + m.DBSCL[, "s2"] * B[2] + m.DBSCL[, "s3"] * C[2]
plot(rbind(A, B, C), xaxt = "n", yaxt = "n", frame = F,
type = "n", xlab = "", ylab = "", asp = 1,
main = "Trung hoc Pho thong", #Bandwidth = c(0.05, 0.05),
ylim = c(-0.1, sqrt(3)/2), cex.main = 2)
text(c(0.05, 0.95, 1/2+0.07), c(0-0.06, -0.06, sqrt(3)/2-0.05),
c("s1", "s2", "s3"), pos = 3, #"s1", "s2", "s3"
cex = 0.5)
lines(c(0, 1), c(0, 0), col = "black")
lines(c(0, 0.5), c(0, sqrt(3)/2), col = "black")
lines(c(1, 0.5), c(0, sqrt(3)/2), col = "black")
points(CCCHIGDV3_hig_all1$X, CCCHIGDV3_hig_all1$Y, pch = 16, cex=0.5, col="grey")
points(m.SH$X, m.SH$Y, pch = 16, cex=1, col = "blue")
points(m.DNB$X, m.DNB$Y, pch = 16, cex=1, col = "green")
points(m.MNPB$X, m.MNPB$Y, pch = 16, cex=1, col = "yellow")
points(m.MT$X, m.MT$Y, pch = 16, cex=1, col = "red")
points(m.TN$X, m.TN$Y, pch = 16, cex=1, col = "darkviolet")
points(m.DBSCL$X, m.DBSCL$Y, pch = 16, cex=1, col = "pink")
legend("topright", col = c("blue","yellow", "red","darkviolet", "green", "pink" ), pch = c(16, 16),
legend = c("Đồng bằng sông Hồng", "Trung du và Miền núi phía Bắc","Bắc Trung Bộ và DH Miền Trung ",
"Tây Nguyên", "Đông Nam Bộ", "Đồng bằng sông Cửu Long"), cex=0.6)require(tableone)
factorVars <- c("m2xc8", "GIOITINH", "TROCAP", "NOISONG", "AREA",
"songuoidihocFactor", "GIOITINH_CH",
"HONNHAN_CH","DANTOC_CH",
"NGHENGHIEP_CH","BANGCAP_CH" )
vars <- c("THUBQ","TONGCHIGD","CHIALL", "CHIGD",
"SONAMDANGHOC", "SONUDANGHOC",
"m2xc8", "GIOITINH", "TROCAP", "NOISONG", "AREA",
"songuoidihocFactor", "GIOITINH_CH",
"HONNHAN_CH","DANTOC_CH", "TUOI_CH", "TSNGUOI",
"NGHENGHIEP_CH","BANGCAP_CH" )
Destieuhoc <- CreateTableOne(vars = vars,
factorVars = factorVars,
data = CCCHIGDV3 %>%
filter(m2xc6 == "Primary School"))
Destieuhoc <- print(Destieuhoc)##
## Overall
## n 2749
## THUBQ (mean (SD)) 3481.52 (2603.37)
## TONGCHIGD (mean (SD)) 5.58 (5.15)
## CHIALL (mean (SD)) 2399.78 (2623.26)
## CHIGD (mean (SD)) 6391.56 (7965.06)
## SONAMDANGHOC (mean (SD)) 0.98 (0.77)
## SONUDANGHOC (mean (SD)) 1.00 (0.85)
## m2xc8 = Danlap-tuthuc (%) 20 ( 0.7)
## GIOITINH = Female (%) 1318 (47.9)
## TROCAP = No (%) 49 ( 1.8)
## NOISONG = RURAL (%) 1934 (70.4)
## AREA (%)
## DB Cuu Long 490 (17.8)
## DB Song Hong 619 (22.5)
## Dong Nam Bo 195 ( 7.1)
## Mien Trung 675 (24.6)
## Tay nguyen 223 ( 8.1)
## TD MN Phia Bac 547 (19.9)
## songuoidihocFactor (%)
## 1 640 (23.3)
## 2 1628 (59.2)
## 3 481 (17.5)
## GIOITINH_CH = Female (%) 568 (20.7)
## HONNHAN_CH = Married (%) 2442 (88.8)
## DANTOC_CH = Minority (%) 588 (21.4)
## TUOI_CH (mean (SD)) 45.00 (12.87)
## TSNGUOI (mean (SD)) 4.81 (1.37)
## NGHENGHIEP_CH (%)
## KINHDOANHDICHVU 640 (23.3)
## LAMCONGANLUONG 1421 (51.7)
## NONGLAMTHUYSAN 688 (25.0)
## BANGCAP_CH (%)
## No qualification 470 (17.1)
## Primary school 675 (24.6)
## Secondary-high school 1324 (48.2)
## University 280 (10.2)
DesTHCS <- CreateTableOne(vars = vars,
factorVars = factorVars,
data = CCCHIGDV3 %>%
filter(m2xc6 == "Secondary school"))
DesTHCS <- print(DesTHCS)##
## Overall
## n 1757
## THUBQ (mean (SD)) 3696.37 (4863.24)
## TONGCHIGD (mean (SD)) 7.62 (6.60)
## CHIALL (mean (SD)) 3820.34 (3989.59)
## CHIGD (mean (SD)) 9003.49 (10384.16)
## SONAMDANGHOC (mean (SD)) 1.00 (0.77)
## SONUDANGHOC (mean (SD)) 1.02 (0.90)
## m2xc8 = Danlap-tuthuc (%) 8 ( 0.5)
## GIOITINH = Female (%) 862 (49.1)
## TROCAP = No (%) 1382 (78.7)
## NOISONG = RURAL (%) 1213 (69.0)
## AREA (%)
## DB Cuu Long 335 (19.1)
## DB Song Hong 392 (22.3)
## Dong Nam Bo 133 ( 7.6)
## Mien Trung 407 (23.2)
## Tay nguyen 137 ( 7.8)
## TD MN Phia Bac 353 (20.1)
## songuoidihocFactor (%)
## 1 436 (24.8)
## 2 960 (54.6)
## 3 361 (20.5)
## GIOITINH_CH = Female (%) 360 (20.5)
## HONNHAN_CH = Married (%) 1541 (87.7)
## DANTOC_CH = Minority (%) 368 (20.9)
## TUOI_CH (mean (SD)) 46.11 (11.45)
## TSNGUOI (mean (SD)) 4.59 (1.37)
## NGHENGHIEP_CH (%)
## KINHDOANHDICHVU 387 (22.0)
## LAMCONGANLUONG 932 (53.0)
## NONGLAMTHUYSAN 438 (24.9)
## BANGCAP_CH (%)
## No qualification 304 (17.3)
## Primary school 444 (25.3)
## Secondary-high school 848 (48.3)
## University 161 ( 9.2)
DesTHPT <- CreateTableOne(vars = vars,
factorVars = factorVars,
data = CCCHIGDV3 %>%
filter(m2xc6 == "High school"))
DesTHPT <- print(DesTHPT)##
## Overall
## n 985
## THUBQ (mean (SD)) 3950.96 (2738.51)
## TONGCHIGD (mean (SD)) 9.54 (7.34)
## CHIALL (mean (SD)) 5874.44 (5255.06)
## CHIGD (mean (SD)) 11389.47 (10856.05)
## SONAMDANGHOC (mean (SD)) 0.91 (0.74)
## SONUDANGHOC (mean (SD)) 0.92 (0.79)
## m2xc8 = Danlap-tuthuc (%) 32 ( 3.2)
## GIOITINH = Female (%) 491 (49.8)
## TROCAP = No (%) 877 (89.0)
## NOISONG = RURAL (%) 661 (67.1)
## AREA (%)
## DB Cuu Long 161 (16.3)
## DB Song Hong 237 (24.1)
## Dong Nam Bo 87 ( 8.8)
## Mien Trung 254 (25.8)
## Tay nguyen 66 ( 6.7)
## TD MN Phia Bac 180 (18.3)
## songuoidihocFactor (%)
## 1 353 (35.8)
## 2 474 (48.1)
## 3 158 (16.0)
## GIOITINH_CH = Female (%) 190 (19.3)
## HONNHAN_CH = Married (%) 877 (89.0)
## DANTOC_CH = Minority (%) 141 (14.3)
## TUOI_CH (mean (SD)) 48.32 (10.15)
## TSNGUOI (mean (SD)) 4.37 (1.23)
## NGHENGHIEP_CH (%)
## KINHDOANHDICHVU 240 (24.4)
## LAMCONGANLUONG 522 (53.0)
## NONGLAMTHUYSAN 223 (22.6)
## BANGCAP_CH (%)
## No qualification 137 (13.9)
## Primary school 264 (26.8)
## Secondary-high school 485 (49.2)
## University 99 (10.1)
DesALL <- cbind(Destieuhoc, DesTHCS)
DesALL <- cbind(DesALL,DesTHPT)
DesALL ## Overall Overall
## n " 2749" " 1757"
## THUBQ (mean (SD)) "3481.52 (2603.37)" "3696.37 (4863.24)"
## TONGCHIGD (mean (SD)) " 5.58 (5.15)" " 7.62 (6.60)"
## CHIALL (mean (SD)) "2399.78 (2623.26)" "3820.34 (3989.59)"
## CHIGD (mean (SD)) "6391.56 (7965.06)" "9003.49 (10384.16)"
## SONAMDANGHOC (mean (SD)) " 0.98 (0.77)" " 1.00 (0.77)"
## SONUDANGHOC (mean (SD)) " 1.00 (0.85)" " 1.02 (0.90)"
## m2xc8 = Danlap-tuthuc (%) " 20 ( 0.7) " " 8 ( 0.5) "
## GIOITINH = Female (%) " 1318 (47.9) " " 862 (49.1) "
## TROCAP = No (%) " 49 ( 1.8) " " 1382 (78.7) "
## NOISONG = RURAL (%) " 1934 (70.4) " " 1213 (69.0) "
## AREA (%) " " " "
## DB Cuu Long " 490 (17.8) " " 335 (19.1) "
## DB Song Hong " 619 (22.5) " " 392 (22.3) "
## Dong Nam Bo " 195 ( 7.1) " " 133 ( 7.6) "
## Mien Trung " 675 (24.6) " " 407 (23.2) "
## Tay nguyen " 223 ( 8.1) " " 137 ( 7.8) "
## TD MN Phia Bac " 547 (19.9) " " 353 (20.1) "
## songuoidihocFactor (%) " " " "
## 1 " 640 (23.3) " " 436 (24.8) "
## 2 " 1628 (59.2) " " 960 (54.6) "
## 3 " 481 (17.5) " " 361 (20.5) "
## GIOITINH_CH = Female (%) " 568 (20.7) " " 360 (20.5) "
## HONNHAN_CH = Married (%) " 2442 (88.8) " " 1541 (87.7) "
## DANTOC_CH = Minority (%) " 588 (21.4) " " 368 (20.9) "
## TUOI_CH (mean (SD)) " 45.00 (12.87)" " 46.11 (11.45)"
## TSNGUOI (mean (SD)) " 4.81 (1.37)" " 4.59 (1.37)"
## NGHENGHIEP_CH (%) " " " "
## KINHDOANHDICHVU " 640 (23.3) " " 387 (22.0) "
## LAMCONGANLUONG " 1421 (51.7) " " 932 (53.0) "
## NONGLAMTHUYSAN " 688 (25.0) " " 438 (24.9) "
## BANGCAP_CH (%) " " " "
## No qualification " 470 (17.1) " " 304 (17.3) "
## Primary school " 675 (24.6) " " 444 (25.3) "
## Secondary-high school " 1324 (48.2) " " 848 (48.3) "
## University " 280 (10.2) " " 161 ( 9.2) "
## Overall
## n " 985"
## THUBQ (mean (SD)) " 3950.96 (2738.51)"
## TONGCHIGD (mean (SD)) " 9.54 (7.34)"
## CHIALL (mean (SD)) " 5874.44 (5255.06)"
## CHIGD (mean (SD)) "11389.47 (10856.05)"
## SONAMDANGHOC (mean (SD)) " 0.91 (0.74)"
## SONUDANGHOC (mean (SD)) " 0.92 (0.79)"
## m2xc8 = Danlap-tuthuc (%) " 32 ( 3.2) "
## GIOITINH = Female (%) " 491 (49.8) "
## TROCAP = No (%) " 877 (89.0) "
## NOISONG = RURAL (%) " 661 (67.1) "
## AREA (%) " "
## DB Cuu Long " 161 (16.3) "
## DB Song Hong " 237 (24.1) "
## Dong Nam Bo " 87 ( 8.8) "
## Mien Trung " 254 (25.8) "
## Tay nguyen " 66 ( 6.7) "
## TD MN Phia Bac " 180 (18.3) "
## songuoidihocFactor (%) " "
## 1 " 353 (35.8) "
## 2 " 474 (48.1) "
## 3 " 158 (16.0) "
## GIOITINH_CH = Female (%) " 190 (19.3) "
## HONNHAN_CH = Married (%) " 877 (89.0) "
## DANTOC_CH = Minority (%) " 141 (14.3) "
## TUOI_CH (mean (SD)) " 48.32 (10.15)"
## TSNGUOI (mean (SD)) " 4.37 (1.23)"
## NGHENGHIEP_CH (%) " "
## KINHDOANHDICHVU " 240 (24.4) "
## LAMCONGANLUONG " 522 (53.0) "
## NONGLAMTHUYSAN " 223 (22.6) "
## BANGCAP_CH (%) " "
## No qualification " 137 (13.9) "
## Primary school " 264 (26.8) "
## Secondary-high school " 485 (49.2) "
## University " 99 (10.1) "
#xtable(DesALL, type = "latex", file = "DesALL.tex")
#write.csv(DesALL, file = "DesALL.csv") ## Bảng 1 #===============Hinh ve
CCCHIGDV3$m2xc6 <- factor(CCCHIGDV3$m2xc6, levels = c("Primary School", "Secondary school", "High school" ))
#=====Hình 1:
CCCHIGDV4 <- CCCHIGDV3[, c("m2xc6","HOCPHI", "TRAITUYEN", "DONGGOP",
"QUANAO", "SGK", "DUNGCU",
"HOCTHEM", "CHIGDKHAC")]
require(reshape2)
CCCHIGDV4 <- melt(CCCHIGDV4, id.vars ="m2xc6" )
ggplot(CCCHIGDV4, aes(y = value, x = variable, fill = m2xc6)) +
geom_boxplot(outlier.shape = NA) +
ylim (c(0, 5000)) +
ylab("nghin VND") +
# stat_summary(fun.y = mean, colour = "darkred", geom = "point",
# shape = 18, size = 3,show_guide = FALSE) +
ggtitle("chi tieu cho giao duc theo khoan muc and cap hoc") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))CCCHIGDV5 <- CCCHIGDV3 %>% mutate(
s1 = ( SGK + HOCPHI+QUANAO )*100/CHIALL,
s2 = ( DONGGOP+HOCTHEM+CHIGDKHAC)*100/CHIALL,
s3 = ( DUNGCU + TRAITUYEN)*100/CHIALL) %>% select(s1, s2, s3, m2xc6)
CCCHIGDV5 <- melt(CCCHIGDV5, id.vars ="m2xc6" )
ggplot(CCCHIGDV5, aes(y = value, x = variable, fill = m2xc6)) +
geom_boxplot(outlier.shape = NA) +
# ylim (c(0, 5000)) +
ylab("Phantram") +
ggtitle("Tỉ lệ chi tieu cho giao duc theo khoan muc and cap hoc") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))#===================compositional regression1: Tính chi tiết
#ILR_1=√(2/3) ln〖s_1/(s_2 s_3 )〗; ILR_2= √(1/2) ln〖s_2/s_3 〗
# Công thức trong bài báo!
CCCHIGDV3_hig_all1 <- CCCHIGDV3_hig_all1 %>%
mutate( ILR1 = sqrt(2/3) * log(s1/sqrt(s2*s3)),
ILR2 = sqrt(1/2)* log(s2/s3))
#head(CCCHIGDV3_hig_all1)
model1_hig <- lm (ILR1 ~ log(THUBQ) + GIOITINH + TROCAP + m2xc8 + songuoidihocFactor+ HONNHAN_CH + DANTOC_CH + NGHENGHIEP_CH + BANGCAP_CH + NOISONG + AREA, data = CCCHIGDV3_hig_all1) #bỏ + GIOITINH_CH, TSNGUOI, TUOI_CH
model2_hig <- lm (ILR2 ~ log(THUBQ) + GIOITINH + TROCAP + m2xc8 + songuoidihocFactor+ HONNHAN_CH + DANTOC_CH + NGHENGHIEP_CH + BANGCAP_CH + NOISONG + AREA, data = CCCHIGDV3_hig_all1) #bỏ + GIOITINH_CH, TSNGUOI, TUOI_CH
summary(model1_hig)##
## Call:
## lm(formula = ILR1 ~ log(THUBQ) + GIOITINH + TROCAP + m2xc8 +
## songuoidihocFactor + HONNHAN_CH + DANTOC_CH + NGHENGHIEP_CH +
## BANGCAP_CH + NOISONG + AREA, data = CCCHIGDV3_hig_all1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.11419 -0.38916 -0.03754 0.37528 2.80862
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.298434 0.340745 3.811 0.000148 ***
## log(THUBQ) -0.117142 0.040672 -2.880 0.004065 **
## GIOITINHFemale 0.005539 0.039201 0.141 0.887668
## TROCAPNo 0.411802 0.073650 5.591 2.95e-08 ***
## m2xc8Danlap-tuthuc 0.484225 0.112244 4.314 1.77e-05 ***
## songuoidihocFactor2 0.014201 0.043308 0.328 0.743049
## songuoidihocFactor3 0.007701 0.061143 0.126 0.899793
## HONNHAN_CHMarried -0.024533 0.064150 -0.382 0.702225
## DANTOC_CHMinority 0.250139 0.074917 3.339 0.000874 ***
## NGHENGHIEP_CHLAMCONGANLUONG 0.021016 0.050591 0.415 0.677942
## NGHENGHIEP_CHNONGLAMTHUYSAN -0.038420 0.061529 -0.624 0.532507
## BANGCAP_CHPrimary school 0.066654 0.065804 1.013 0.311362
## BANGCAP_CHSecondary-high school 0.066427 0.064954 1.023 0.306719
## BANGCAP_CHUniversity 0.098143 0.091302 1.075 0.282683
## NOISONGRURAL -0.021160 0.045835 -0.462 0.644427
## AREADB Song Hong -0.402177 0.064834 -6.203 8.28e-10 ***
## AREADong Nam Bo -0.071774 0.083371 -0.861 0.389510
## AREAMien Trung -0.273053 0.062705 -4.355 1.48e-05 ***
## AREATay nguyen -0.400247 0.091314 -4.383 1.30e-05 ***
## AREATD MN Phia Bac -0.469736 0.076573 -6.134 1.26e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6001 on 942 degrees of freedom
## Multiple R-squared: 0.1116, Adjusted R-squared: 0.09373
## F-statistic: 6.231 on 19 and 942 DF, p-value: 2.74e-15
summary(model2_hig)##
## Call:
## lm(formula = ILR2 ~ log(THUBQ) + GIOITINH + TROCAP + m2xc8 +
## songuoidihocFactor + HONNHAN_CH + DANTOC_CH + NGHENGHIEP_CH +
## BANGCAP_CH + NOISONG + AREA, data = CCCHIGDV3_hig_all1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.58541 -0.47834 -0.00064 0.52581 2.31572
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.11005 0.44345 0.248 0.80407
## log(THUBQ) 0.09769 0.05293 1.846 0.06527 .
## GIOITINHFemale 0.07169 0.05102 1.405 0.16031
## TROCAPNo 0.09888 0.09585 1.032 0.30250
## m2xc8Danlap-tuthuc -0.04495 0.14608 -0.308 0.75836
## songuoidihocFactor2 0.01629 0.05636 0.289 0.77263
## songuoidihocFactor3 -0.04403 0.07957 -0.553 0.58015
## HONNHAN_CHMarried -0.09111 0.08349 -1.091 0.27543
## DANTOC_CHMinority -0.46697 0.09750 -4.790 1.94e-06 ***
## NGHENGHIEP_CHLAMCONGANLUONG 0.03397 0.06584 0.516 0.60601
## NGHENGHIEP_CHNONGLAMTHUYSAN -0.05702 0.08008 -0.712 0.47655
## BANGCAP_CHPrimary school -0.03831 0.08564 -0.447 0.65471
## BANGCAP_CHSecondary-high school -0.03424 0.08453 -0.405 0.68555
## BANGCAP_CHUniversity -0.03524 0.11882 -0.297 0.76688
## NOISONGRURAL -0.17187 0.05965 -2.881 0.00405 **
## AREADB Song Hong 0.75871 0.08438 8.992 < 2e-16 ***
## AREADong Nam Bo 0.64208 0.10850 5.918 4.56e-09 ***
## AREAMien Trung 0.68519 0.08161 8.396 < 2e-16 ***
## AREATay nguyen 0.45227 0.11884 3.806 0.00015 ***
## AREATD MN Phia Bac 0.41357 0.09965 4.150 3.63e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7809 on 942 degrees of freedom
## Multiple R-squared: 0.1867, Adjusted R-squared: 0.1703
## F-statistic: 11.38 on 19 and 942 DF, p-value: < 2.2e-16
#primary
CCCHIGDV3_prg_all1 <- CCCHIGDV3_prg_all1 %>% mutate( ILR1 = sqrt(2/3) * log(s1/sqrt(s2*s3)),
ILR2 = sqrt(1/2)* log(s2/s3))
head(CCCHIGDV3_prg_all1)## ID IDind m2xc1 m2xc5 m2xc6 m2xc7 m2xc8
## 1 01_1_1_37_15 01_1_1_37_15_5 1 <NA> Primary School 2 Cong lap
## 2 01_2_67_17_14 01_2_67_17_14_3 3 <NA> Primary School 4 Cong lap
## 3 01_3_97_35_13 01_3_97_35_13_5 2 <NA> Primary School 3 Cong lap
## 4 01_3_106_37_14 01_3_106_37_14_4 3 <NA> Primary School 4 Cong lap
## 5 01_4_115_19_13 01_4_115_19_13_5 4 <NA> Primary School 5 Cong lap
## 6 01_4_115_19_13 01_4_115_19_13_6 0 <NA> Primary School 1 Cong lap
## tinh tentinh HOCPHI TRAITUYEN DONGGOP QUANAO SGK DUNGCU
## 1 01 Th<e0>nh ph? H<e0> N?i 0 0 3000 1000 400 800
## 2 01 Th<e0>nh ph? H<e0> N?i 0 0 1000 500 250 100
## 3 01 Th<e0>nh ph? H<e0> N?i 0 0 650 0 300 300
## 4 01 Th<e0>nh ph? H<e0> N?i 0 0 1300 900 450 400
## 5 01 Th<e0>nh ph? H<e0> N?i 0 0 1000 500 500 250
## 6 01 Th<e0>nh ph? H<e0> N?i 0 0 1000 800 400 300
## HOCTHEM CHIGDKHAC CHIALL TROCAP GIOITINH NOISONG AREA THUBQ
## 1 2700 300 8200 Yes Female URBAN DB Song Hong 4393
## 2 0 700 2550 Yes Male URBAN DB Song Hong 4611
## 3 0 0 1250 Yes Female URBAN DB Song Hong 3421
## 4 9600 950 13600 No Female URBAN DB Song Hong 5690
## 5 4500 2000 8750 Yes Male URBAN DB Song Hong 4741
## 6 2700 2500 7700 Yes Female URBAN DB Song Hong 4741
## GIOITINH_CH TUOI_CH HONNHAN_CH DANTOC_CH TSNGUOI NGHENGHIEP_CH
## 1 Male 62 Other Kinh 5 LAMCONGANLUONG
## 2 Female 65 Married Kinh 3 KINHDOANHDICHVU
## 3 Male 49 Married Kinh 5 KINHDOANHDICHVU
## 4 Female 64 Married Kinh 5 KINHDOANHDICHVU
## 5 Male 62 Married Kinh 6 KINHDOANHDICHVU
## 6 Male 62 Married Kinh 6 KINHDOANHDICHVU
## BANGCAP_CH SONAMDANGHOC SONUDANGHOC songuoidihoc
## 1 Primary school 1 1 2
## 2 Secondary-high school 1 0 1
## 3 University 0 3 3
## 4 Secondary-high school 0 2 2
## 5 Secondary-high school 1 1 2
## 6 Secondary-high school 1 1 2
## songuoidihocFactor CHIGD TONGCHIGD s1 s2 s3 X
## 1 2 24645 11.833055 0.17073171 0.7317073 0.09756098 0.7804878
## 2 1 12550 10.942445 0.29411765 0.6666667 0.03921569 0.6862745
## 3 3 18000 11.889350 0.24000000 0.5200000 0.24000000 0.6400000
## 4 2 19213 7.690353 0.09926471 0.8713235 0.02941176 0.8860294
## 5 2 16450 7.607658 0.11428571 0.8571429 0.02857143 0.8714286
## 6 2 16450 7.607658 0.15584416 0.8051948 0.03896104 0.8246753
## Y ILR1 ILR2
## 1 0.08449028 -0.3656563 1.4247516
## 2 0.03396178 0.4885069 2.0033844
## 3 0.20784610 -0.3156535 0.5467278
## 4 0.02547134 -0.3902151 2.3961152
## 5 0.02474358 -0.2566284 2.4050097
## 6 0.03374125 -0.1044844 2.1414885
model1_prg <- lm (ILR1 ~ log(THUBQ) + GIOITINH + TROCAP + m2xc8 + songuoidihocFactor +
HONNHAN_CH + DANTOC_CH + NGHENGHIEP_CH + BANGCAP_CH +
NOISONG + AREA,
data = CCCHIGDV3_prg_all1) #bỏ + GIOITINH_CH, TSNGUOI, TUOI_CH
model2_prg <- lm (ILR2 ~ log(THUBQ) + GIOITINH + TROCAP + m2xc8 + songuoidihocFactor +
HONNHAN_CH + DANTOC_CH + NGHENGHIEP_CH + BANGCAP_CH +
NOISONG + AREA,
data = CCCHIGDV3_prg_all1) #bỏ + GIOITINH_CH, TSNGUOI, TUOI_CH
summary(model1_prg)##
## Call:
## lm(formula = ILR1 ~ log(THUBQ) + GIOITINH + TROCAP + m2xc8 +
## songuoidihocFactor + HONNHAN_CH + DANTOC_CH + NGHENGHIEP_CH +
## BANGCAP_CH + NOISONG + AREA, data = CCCHIGDV3_prg_all1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.90677 -0.38697 0.00704 0.38445 2.62481
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.385596 0.201735 6.868 8.16e-12 ***
## log(THUBQ) -0.127505 0.024154 -5.279 1.41e-07 ***
## GIOITINHFemale 0.028244 0.023640 1.195 0.232300
## TROCAPNo 0.373667 0.111529 3.350 0.000819 ***
## m2xc8Danlap-tuthuc 0.630012 0.168514 3.739 0.000189 ***
## songuoidihocFactor2 -0.026318 0.029414 -0.895 0.371013
## songuoidihocFactor3 -0.024813 0.039320 -0.631 0.528057
## HONNHAN_CHMarried 0.008867 0.038859 0.228 0.819514
## DANTOC_CHMinority 0.170909 0.038453 4.445 9.19e-06 ***
## NGHENGHIEP_CHLAMCONGANLUONG 0.049001 0.030967 1.582 0.113690
## NGHENGHIEP_CHNONGLAMTHUYSAN 0.071076 0.036258 1.960 0.050073 .
## BANGCAP_CHPrimary school -0.036933 0.039172 -0.943 0.345858
## BANGCAP_CHSecondary-high school -0.057741 0.038203 -1.511 0.130806
## BANGCAP_CHUniversity -0.032976 0.054573 -0.604 0.545727
## NOISONGRURAL 0.042600 0.028051 1.519 0.128977
## AREADB Song Hong -0.448406 0.040162 -11.165 < 2e-16 ***
## AREADong Nam Bo -0.197536 0.052645 -3.752 0.000179 ***
## AREAMien Trung -0.267378 0.038179 -7.003 3.20e-12 ***
## AREATay nguyen -0.115663 0.051995 -2.225 0.026203 *
## AREATD MN Phia Bac -0.363910 0.045153 -8.060 1.17e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5886 on 2501 degrees of freedom
## Multiple R-squared: 0.1425, Adjusted R-squared: 0.136
## F-statistic: 21.87 on 19 and 2501 DF, p-value: < 2.2e-16
summary(model2_prg)##
## Call:
## lm(formula = ILR2 ~ log(THUBQ) + GIOITINH + TROCAP + m2xc8 +
## songuoidihocFactor + HONNHAN_CH + DANTOC_CH + NGHENGHIEP_CH +
## BANGCAP_CH + NOISONG + AREA, data = CCCHIGDV3_prg_all1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.05543 -0.49199 0.00146 0.48615 2.68137
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.879509 0.255389 -3.444 0.000583 ***
## log(THUBQ) 0.176932 0.030578 5.786 8.10e-09 ***
## GIOITINHFemale 0.001577 0.029927 0.053 0.957977
## TROCAPNo 0.187110 0.141191 1.325 0.185219
## m2xc8Danlap-tuthuc 0.184327 0.213332 0.864 0.387652
## songuoidihocFactor2 0.046882 0.037237 1.259 0.208142
## songuoidihocFactor3 -0.013015 0.049777 -0.261 0.793749
## HONNHAN_CHMarried 0.049609 0.049194 1.008 0.313338
## DANTOC_CHMinority -0.371221 0.048680 -7.626 3.42e-14 ***
## NGHENGHIEP_CHLAMCONGANLUONG 0.005797 0.039203 0.148 0.882449
## NGHENGHIEP_CHNONGLAMTHUYSAN -0.014153 0.045902 -0.308 0.757851
## BANGCAP_CHPrimary school 0.193329 0.049591 3.898 9.93e-05 ***
## BANGCAP_CHSecondary-high school 0.157369 0.048363 3.254 0.001153 **
## BANGCAP_CHUniversity 0.130631 0.069087 1.891 0.058765 .
## NOISONGRURAL -0.202589 0.035512 -5.705 1.30e-08 ***
## AREADB Song Hong 0.686449 0.050844 13.501 < 2e-16 ***
## AREADong Nam Bo 0.428202 0.066647 6.425 1.57e-10 ***
## AREAMien Trung 0.356409 0.048334 7.374 2.24e-13 ***
## AREATay nguyen 0.143140 0.065824 2.175 0.029755 *
## AREATD MN Phia Bac 0.304111 0.057162 5.320 1.13e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7451 on 2501 degrees of freedom
## Multiple R-squared: 0.2322, Adjusted R-squared: 0.2264
## F-statistic: 39.82 on 19 and 2501 DF, p-value: < 2.2e-16
#THCS
CCCHIGDV3_sec_all1 <- CCCHIGDV3_sec_all1 %>% mutate( ILR1 = sqrt(2/3) * log(s1/sqrt(s2*s3)),
ILR2 = sqrt(1/2)* log(s2/s3))
head(CCCHIGDV3_sec_all1)## ID IDind m2xc1 m2xc5 m2xc6 m2xc7 m2xc8
## 1 01_1_1_37_15 01_1_1_37_15_4 5 <NA> Secondary school 6 Cong lap
## 2 01_1_1_37_19 01_1_1_37_19_5 6 <NA> Secondary school 7 Cong lap
## 3 01_1_31_3_13 01_1_31_3_13_4 8 <NA> Secondary school 9 Cong lap
## 4 01_3_91_1_15 01_3_91_1_15_4 6 <NA> Secondary school 7 Cong lap
## 5 01_3_97_35_13 01_3_97_35_13_4 5 <NA> Secondary school 6 Cong lap
## 6 01_4_121_4_14 01_4_121_4_14_3 8 <NA> Secondary school 9 Cong lap
## tinh tentinh HOCPHI TRAITUYEN DONGGOP QUANAO SGK DUNGCU
## 1 01 Th<e0>nh ph? H<e0> N?i 1395 0 3000 2000 500 1000
## 2 01 Th<e0>nh ph? H<e0> N?i 1395 0 1300 1200 300 500
## 3 01 Th<e0>nh ph? H<e0> N?i 1395 0 3000 2000 450 900
## 4 01 Th<e0>nh ph? H<e0> N?i 1953 0 1500 2100 450 500
## 5 01 Th<e0>nh ph? H<e0> N?i 0 0 600 700 400 500
## 6 01 Th<e0>nh ph? H<e0> N?i 1350 0 900 360 580 580
## HOCTHEM CHIGDKHAC CHIALL TROCAP GIOITINH NOISONG AREA THUBQ
## 1 8100 450 16445 No Male URBAN DB Song Hong 4393
## 2 13500 1000 19195 No Female URBAN DB Song Hong 3246
## 3 6850 450 15045 No Female URBAN DB Song Hong 9193
## 4 4320 100 10923 No Male URBAN DB Song Hong 5081
## 5 0 0 2200 Yes Female URBAN DB Song Hong 3421
## 6 1850 280 5900 No Male URBAN DB Song Hong 3574
## GIOITINH_CH TUOI_CH HONNHAN_CH DANTOC_CH TSNGUOI NGHENGHIEP_CH
## 1 Male 62 Other Kinh 5 LAMCONGANLUONG
## 2 Male 71 Married Kinh 5 KINHDOANHDICHVU
## 3 Female 51 Married Kinh 5 LAMCONGANLUONG
## 4 Male 54 Married Kinh 4 LAMCONGANLUONG
## 5 Male 49 Married Kinh 5 KINHDOANHDICHVU
## 6 Male 44 Married Kinh 4 LAMCONGANLUONG
## BANGCAP_CH SONAMDANGHOC SONUDANGHOC songuoidihoc
## 1 Primary school 1 1 2
## 2 Secondary-high school 0 2 2
## 3 University 1 1 2
## 4 Secondary-high school 1 0 1
## 5 University 0 3 3
## 6 Secondary-high school 2 0 2
## songuoidihocFactor CHIGD TONGCHIGD s1 s2 s3 X
## 1 2 24645 11.833055 0.2368501 0.7023411 0.06080876 0.7327455
## 2 2 46195 24.338714 0.1508205 0.8231310 0.02604845 0.8361552
## 3 2 37195 3.691595 0.2555666 0.6846128 0.05982054 0.7145231
## 4 1 21423 16.961928 0.4122494 0.5419756 0.04577497 0.5648631
## 5 3 18000 11.889350 0.5000000 0.2727273 0.22727273 0.3863636
## 6 2 10960 10.094358 0.3881356 0.5135593 0.09830508 0.5627119
## Y ILR1 ILR2
## 1 0.05266193 0.11133009 1.7300679
## 2 0.02255862 0.02413052 2.4417508
## 3 0.05180611 0.19055553 1.7235759
## 4 0.03964229 0.78559154 1.7476028
## 5 0.19682396 0.56934028 0.1289208
## 6 0.08513470 0.44632487 1.1690524
model1_sec <- lm (ILR1 ~ log(THUBQ) + GIOITINH + TROCAP + m2xc8 + songuoidihocFactor +
HONNHAN_CH + DANTOC_CH + NGHENGHIEP_CH + BANGCAP_CH +
NOISONG + AREA,
data = CCCHIGDV3_sec_all1) #bỏ + GIOITINH_CH, TSNGUOI, TUOI_CH
model2_sec <- lm (ILR2 ~ log(THUBQ) + GIOITINH + TROCAP + m2xc8 + songuoidihocFactor +
HONNHAN_CH + DANTOC_CH + NGHENGHIEP_CH + BANGCAP_CH +
NOISONG + AREA,
data = CCCHIGDV3_sec_all1) #bỏ + GIOITINH_CH, TSNGUOI, TUOI_CH
summary(model1_sec)##
## Call:
## lm(formula = ILR1 ~ log(THUBQ) + GIOITINH + TROCAP + m2xc8 +
## songuoidihocFactor + HONNHAN_CH + DANTOC_CH + NGHENGHIEP_CH +
## BANGCAP_CH + NOISONG + AREA, data = CCCHIGDV3_sec_all1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.66063 -0.36523 -0.01049 0.36652 2.50930
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.72980 0.23260 7.437 1.66e-13 ***
## log(THUBQ) -0.14049 0.02849 -4.932 8.96e-07 ***
## GIOITINHFemale 0.02229 0.02801 0.796 0.426269
## TROCAPNo 0.26764 0.04281 6.251 5.19e-10 ***
## m2xc8Danlap-tuthuc 0.87442 0.20154 4.339 1.52e-05 ***
## songuoidihocFactor2 -0.01069 0.03394 -0.315 0.752828
## songuoidihocFactor3 -0.02506 0.04335 -0.578 0.563306
## HONNHAN_CHMarried -0.03021 0.04382 -0.689 0.490633
## DANTOC_CHMinority 0.18108 0.05032 3.599 0.000329 ***
## NGHENGHIEP_CHLAMCONGANLUONG -0.05369 0.03720 -1.443 0.149103
## NGHENGHIEP_CHNONGLAMTHUYSAN -0.06432 0.04455 -1.444 0.149038
## BANGCAP_CHPrimary school 0.06969 0.04544 1.534 0.125258
## BANGCAP_CHSecondary-high school 0.01853 0.04392 0.422 0.673252
## BANGCAP_CHUniversity 0.06326 0.06521 0.970 0.332181
## NOISONGRURAL -0.01426 0.03328 -0.428 0.668352
## AREADB Song Hong -0.48369 0.04533 -10.670 < 2e-16 ***
## AREADong Nam Bo -0.23280 0.06044 -3.852 0.000122 ***
## AREAMien Trung -0.33245 0.04394 -7.566 6.37e-14 ***
## AREATay nguyen -0.31675 0.06002 -5.277 1.49e-07 ***
## AREATD MN Phia Bac -0.41198 0.05302 -7.770 1.38e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5611 on 1635 degrees of freedom
## Multiple R-squared: 0.1274, Adjusted R-squared: 0.1172
## F-statistic: 12.56 on 19 and 1635 DF, p-value: < 2.2e-16
summary(model2_sec)##
## Call:
## lm(formula = ILR2 ~ log(THUBQ) + GIOITINH + TROCAP + m2xc8 +
## songuoidihocFactor + HONNHAN_CH + DANTOC_CH + NGHENGHIEP_CH +
## BANGCAP_CH + NOISONG + AREA, data = CCCHIGDV3_sec_all1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.54827 -0.49745 -0.00671 0.50169 2.39129
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.1051044 0.3177728 -3.478 0.000519 ***
## log(THUBQ) 0.2180369 0.0389167 5.603 2.47e-08 ***
## GIOITINHFemale -0.0160141 0.0382614 -0.419 0.675603
## TROCAPNo 0.1909287 0.0584928 3.264 0.001121 **
## m2xc8Danlap-tuthuc -0.1739624 0.2753371 -0.632 0.527595
## songuoidihocFactor2 0.0300791 0.0463738 0.649 0.516674
## songuoidihocFactor3 0.0520851 0.0592206 0.880 0.379254
## HONNHAN_CHMarried -0.0041756 0.0598602 -0.070 0.944397
## DANTOC_CHMinority -0.4093031 0.0687443 -5.954 3.20e-09 ***
## NGHENGHIEP_CHLAMCONGANLUONG 0.0094446 0.0508203 0.186 0.852590
## NGHENGHIEP_CHNONGLAMTHUYSAN -0.0818918 0.0608675 -1.345 0.178679
## BANGCAP_CHPrimary school -0.0375300 0.0620751 -0.605 0.545535
## BANGCAP_CHSecondary-high school 0.0002039 0.0600074 0.003 0.997289
## BANGCAP_CHUniversity 0.0352333 0.0890957 0.395 0.692559
## NOISONGRURAL -0.2021388 0.0454680 -4.446 9.35e-06 ***
## AREADB Song Hong 0.6444991 0.0619313 10.407 < 2e-16 ***
## AREADong Nam Bo 0.4774096 0.0825780 5.781 8.86e-09 ***
## AREAMien Trung 0.5514267 0.0600273 9.186 < 2e-16 ***
## AREATay nguyen 0.4081058 0.0820047 4.977 7.15e-07 ***
## AREATD MN Phia Bac 0.3659923 0.0724380 5.052 4.85e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7666 on 1635 degrees of freedom
## Multiple R-squared: 0.2515, Adjusted R-squared: 0.2428
## F-statistic: 28.92 on 19 and 1635 DF, p-value: < 2.2e-16
###tạo bảng hệ số pvalue cho các biến hồi quy # VAN Anh
table_hig1 <- summary(model1_hig)$coefficients
table_hig1 <- data.frame(table_hig1)
star.CoDa1 <- ifelse(table_hig1$Pr...t.. > 0.1, " ",
ifelse(table_hig1$Pr...t..<=0.1 & table_hig1$Pr...t.. > 0.05, ".",
ifelse(table_hig1$Pr...t.. <= 0.05 & table_hig1$Pr...t.. > 0.01, "*",
ifelse(table_hig1$Pr...t.. <= 0.01 & table_hig1$Pr...t.. >0.001, "**", "***"))))
#table_hig1 <- table_hig1 %>% select("Estimate","Pr...t..")
table_hig1 <- cbind(round(table_hig1, 5), star.CoDa1)
write.csv(table_hig1, file = "lm_model1_hig.csv")
table_hig2 <- summary(model2_hig)$coefficients
table_hig2 <- data.frame(table_hig2)
star.CoDa2 <- ifelse(table_hig2$Pr...t.. > 0.1, " ",
ifelse(table_hig2$Pr...t..<=0.1 & table_hig2$Pr...t.. > 0.05, ".",
ifelse(table_hig2$Pr...t.. <= 0.05 & table_hig2$Pr...t.. > 0.01, "*",
ifelse(table_hig2$Pr...t.. <= 0.01 & table_hig2$Pr...t.. >0.001, "**", "***"))))
table_hig2 <- cbind(round(table_hig2, 5), star.CoDa2)
write.csv(table_hig2, file = "lm_model2_hig.csv")
table_prg1 <- summary(model1_prg)$coefficients
table_prg1 <- data.frame(table_prg1)
star.CoDa3 <- ifelse(table_prg1$Pr...t.. > 0.1, " ",
ifelse(table_prg1$Pr...t..<=0.1 & table_prg1$Pr...t.. > 0.05, ".",
ifelse(table_prg1$Pr...t.. <= 0.05 & table_prg1$Pr...t.. > 0.01, "*",
ifelse(table_prg1$Pr...t.. <= 0.01 & table_prg1$Pr...t.. >0.001, "**", "***"))))
table_prg1 <- cbind(round(table_prg1, 5), star.CoDa3)
write.csv(table_prg1, file = "lm_table_prg1.csv")
table_prg2 <- summary(model2_prg)$coefficients
table_prg2 <- data.frame(table_prg2)
star.CoDa4 <- ifelse(table_prg2$Pr...t.. > 0.1, " ",
ifelse(table_prg2$Pr...t..<=0.1 & table_prg2$Pr...t.. > 0.05, ".",
ifelse(table_prg2$Pr...t.. <= 0.05 & table_prg2$Pr...t.. > 0.01, "*",
ifelse(table_prg2$Pr...t.. <= 0.01 & table_prg2$Pr...t.. >0.001, "**", "***"))))
table_prg2 <- cbind(round(table_prg2, 5), star.CoDa4)
write.csv(table_prg2, file = "lm_table_prg2.csv")
table_sec1 <- summary(model1_sec)$coefficients
table_sec1 <- data.frame(table_sec1)
star.CoDa5 <- ifelse(table_sec1$Pr...t.. > 0.1, " ",
ifelse(table_sec1$Pr...t..<=0.1 & table_sec1$Pr...t.. > 0.05, ".",
ifelse(table_sec1$Pr...t.. <= 0.05 & table_sec1$Pr...t.. > 0.01, "*",
ifelse(table_sec1$Pr...t.. <= 0.01 & table_sec1$Pr...t.. >0.001, "**", "***"))))
table_sec1 <- cbind(round(table_sec1, 5), star.CoDa5)
write.csv(table_sec1, file = "lm_table_sec1.csv")
table_sec2 <- summary(model2_sec)$coefficients
table_sec2 <- data.frame(table_sec2)
star.CoDa6 <- ifelse(table_sec2$Pr...t.. > 0.1, " ",
ifelse(table_sec2$Pr...t..<=0.1 & table_sec2$Pr...t.. > 0.05, ".",
ifelse(table_sec2$Pr...t.. <= 0.05 & table_sec2$Pr...t.. > 0.01, "*",
ifelse(table_sec2$Pr...t.. <= 0.01 & table_sec2$Pr...t.. >0.001, "**", "***"))))
table_sec2 <- cbind(round(table_sec2, 5), star.CoDa6)
write.csv(table_sec2, file = "lm_table_sec2.csv")shapiro.test(residuals(model2_hig))##
## Shapiro-Wilk normality test
##
## data: residuals(model2_hig)
## W = 0.9977, p-value = 0.2037
shapiro.test(residuals(model1_hig))##
## Shapiro-Wilk normality test
##
## data: residuals(model1_hig)
## W = 0.98517, p-value = 2.657e-08
shapiro.test(residuals(model1_prg))##
## Shapiro-Wilk normality test
##
## data: residuals(model1_prg)
## W = 0.99626, p-value = 6.597e-06
shapiro.test(residuals(model2_prg))##
## Shapiro-Wilk normality test
##
## data: residuals(model2_prg)
## W = 0.99902, p-value = 0.1786
shapiro.test(residuals(model1_sec))##
## Shapiro-Wilk normality test
##
## data: residuals(model1_sec)
## W = 0.99545, p-value = 6.573e-05
shapiro.test(residuals(model2_sec))##
## Shapiro-Wilk normality test
##
## data: residuals(model2_sec)
## W = 0.99874, p-value = 0.2791
#library(VIF)
library(car)
# TÍNH TOÁN VIF
vif(model1_hig)## GVIF Df GVIF^(1/(2*Df))
## log(THUBQ) 1.621590 1 1.273417
## GIOITINH 1.026414 1 1.013121
## TROCAP 1.301883 1 1.141001
## m2xc8 1.049734 1 1.024565
## songuoidihocFactor 1.121768 2 1.029143
## HONNHAN_CH 1.077987 1 1.038262
## DANTOC_CH 1.729737 1 1.315195
## NGHENGHIEP_CH 1.316187 2 1.071098
## BANGCAP_CH 1.565956 3 1.077614
## NOISONG 1.245990 1 1.116239
## AREA 1.805543 5 1.060867
vif(model2_hig)## GVIF Df GVIF^(1/(2*Df))
## log(THUBQ) 1.621590 1 1.273417
## GIOITINH 1.026414 1 1.013121
## TROCAP 1.301883 1 1.141001
## m2xc8 1.049734 1 1.024565
## songuoidihocFactor 1.121768 2 1.029143
## HONNHAN_CH 1.077987 1 1.038262
## DANTOC_CH 1.729737 1 1.315195
## NGHENGHIEP_CH 1.316187 2 1.071098
## BANGCAP_CH 1.565956 3 1.077614
## NOISONG 1.245990 1 1.116239
## AREA 1.805543 5 1.060867
vif(model1_prg)## GVIF Df GVIF^(1/(2*Df))
## log(THUBQ) 1.625499 1 1.274951
## GIOITINH 1.014890 1 1.007417
## TROCAP 1.448217 1 1.203419
## m2xc8 1.464972 1 1.210360
## songuoidihocFactor 1.130318 2 1.031098
## HONNHAN_CH 1.108591 1 1.052897
## DANTOC_CH 1.516664 1 1.231529
## NGHENGHIEP_CH 1.297019 2 1.067177
## BANGCAP_CH 1.608194 3 1.082405
## NOISONG 1.227002 1 1.107701
## AREA 1.851415 5 1.063532
vif(model2_prg)## GVIF Df GVIF^(1/(2*Df))
## log(THUBQ) 1.625499 1 1.274951
## GIOITINH 1.014890 1 1.007417
## TROCAP 1.448217 1 1.203419
## m2xc8 1.464972 1 1.210360
## songuoidihocFactor 1.130318 2 1.031098
## HONNHAN_CH 1.108591 1 1.052897
## DANTOC_CH 1.516664 1 1.231529
## NGHENGHIEP_CH 1.297019 2 1.067177
## BANGCAP_CH 1.608194 3 1.082405
## NOISONG 1.227002 1 1.107701
## AREA 1.851415 5 1.063532
vif(model1_sec)## GVIF Df GVIF^(1/(2*Df))
## log(THUBQ) 1.805069 1 1.343529
## GIOITINH 1.030215 1 1.014995
## TROCAP 1.403762 1 1.184804
## m2xc8 1.026967 1 1.013394
## songuoidihocFactor 1.140551 2 1.033424
## HONNHAN_CH 1.095069 1 1.046456
## DANTOC_CH 1.902331 1 1.379250
## NGHENGHIEP_CH 1.320455 2 1.071966
## BANGCAP_CH 1.555999 3 1.076469
## NOISONG 1.262221 1 1.123486
## AREA 1.946109 5 1.068850
vif(model2_sec)## GVIF Df GVIF^(1/(2*Df))
## log(THUBQ) 1.805069 1 1.343529
## GIOITINH 1.030215 1 1.014995
## TROCAP 1.403762 1 1.184804
## m2xc8 1.026967 1 1.013394
## songuoidihocFactor 1.140551 2 1.033424
## HONNHAN_CH 1.095069 1 1.046456
## DANTOC_CH 1.902331 1 1.379250
## NGHENGHIEP_CH 1.320455 2 1.071966
## BANGCAP_CH 1.555999 3 1.076469
## NOISONG 1.262221 1 1.123486
## AREA 1.946109 5 1.068850
write.csv(vif(model1_hig), file = "model1_hig.csv")
write.csv(vif(model2_hig), file = "model2_hig.csv")
write.csv(vif(model1_prg), file = "model1_prg.csv")
write.csv(vif(model2_prg), file = "model2_prg.csv")
write.csv(vif(model1_sec), file = "model1_sec.csv")
write.csv(vif(model2_sec), file = "model2_sec.csv") qqnorm(model1_hig$residuals,main="Bieu do q-q plot cua chuoi phan du cap ILR1 cap THPT", cex.lab=1.5, xlab = "Phan vi ly thuyet", ylab = "Phan vi thuc nghiem" )
qqline(model1_hig$residuals)qqnorm(model2_hig$residuals,main="Bieu do q-q plot cua chuoi phan du ILR2 cap THPT")
qqline(model2_hig$residuals)### Summary coefficient # Huong
gen.CODA = data.frame(name = names(model2_hig$coefficients),
coef = NA, sd = NA,
row.names="name")
gen.CoDa.ListSchool <- function(model.Coda)
{
p.v.CoDa1 = 2*pnorm(abs(summary(model.Coda)$coefficients[, 1]/summary(model.Coda)$coefficients[, 2]),
lower.tail=FALSE)
star.CoDa1 = ifelse(p.v.CoDa1 > 0.1, " ",
ifelse(p.v.CoDa1<=0.1 & p.v.CoDa1 > 0.05, ".",
ifelse(p.v.CoDa1 <= 0.05 & p.v.CoDa1 > 0.01, "*",
ifelse(p.v.CoDa1 <= 0.01 & p.v.CoDa1 >0.001, "**", "***"))))
Rcoef.CODA = gen.CODA
Rcoef.CODA[ , c("coef", "sd")] <-
cbind( paste(round(summary(model.Coda)$coefficients[, 1], 2), star.CoDa1),
paste0("(", round(summary(model.Coda)$coefficients[, 2], 2), ")") )
return(Rcoef.CODA)
}
CoefALLILR <- cbind(gen.CoDa.ListSchool(model1_prg), # Bảng 2
gen.CoDa.ListSchool(model2_prg),
gen.CoDa.ListSchool(model1_sec),
gen.CoDa.ListSchool(model2_sec),
gen.CoDa.ListSchool(model1_hig),
gen.CoDa.ListSchool(model2_hig))
CoefALLILR <- CoefALLILR[, c(1, 3, 5, 7, 9, 11)]
write.csv(CoefALLILR, file = "CoefALLILR.csv") #----------- Compositional regression2: Tính gọn --------------
CCCHIGDV3 <- CCCHIGDV3 %>% mutate(
s1 = ( SGK + HOCPHI+QUANAO )*100/CHIALL,
s2 = ( DONGGOP+HOCTHEM+CHIGDKHAC)*100/CHIALL,
s3 = ( DUNGCU + TRAITUYEN)*100/CHIALL)
ListSchool = c( "Primary School", "Secondary school", "High school" )
Models = NULL
Coef = NULL
for (t in 1:length(ListSchool)){
shares = acomp(CCCHIGDV3[CCCHIGDV3$m2xc6==
as.character(ListSchool[t]), c("s3", "s2", "s1")]) # Để đạt được đúng
# ILR như trong mô hình đã định nghĩa
# hàm ilr() tính các chuyển đổi cho mình.
Models[[t]] = lm(ilr(shares) ~ log(THUBQ) + GIOITINH + TROCAP + m2xc8 + songuoidihocFactor + GIOITINH_CH +
HONNHAN_CH + DANTOC_CH + NGHENGHIEP_CH + BANGCAP_CH +
TUOI_CH + TSNGUOI + NOISONG + AREA,
data = CCCHIGDV3[CCCHIGDV3$m2xc6==as.character(ListSchool[t]),])
Coef[[t]] = ilrInv(Models[[t]]$coefficients, orig=shares)
}
# Coef
cbind(round(Coef[[1]],2), round(Coef[[2]],2) ,round(Coef[[3]],2))## s3 s2 s1 s3 s2 s1 s3 s2 s1
## (Intercept) 0.33 0.06 0.60 0.23 0.03 0.73 0.18 0.11 0.71
## log(THUBQ) 0.30 0.39 0.31 0.30 0.41 0.30 0.32 0.37 0.30
## GIOITINHFemale 0.33 0.33 0.34 0.33 0.33 0.34 0.32 0.35 0.34
## TROCAPNo 0.27 0.28 0.45 0.24 0.33 0.42 0.25 0.29 0.46
## m2xc8Danlap-tuthuc 0.22 0.29 0.49 0.23 0.18 0.59 0.28 0.25 0.47
## songuoidihocFactor2 0.33 0.35 0.32 0.32 0.35 0.33 0.33 0.33 0.34
## songuoidihocFactor3 0.35 0.33 0.32 0.32 0.35 0.33 0.35 0.32 0.34
## GIOITINH_CHFemale 0.31 0.36 0.33 0.32 0.36 0.33 0.31 0.37 0.32
## HONNHAN_CHMarried 0.31 0.36 0.33 0.33 0.36 0.32 0.33 0.35 0.31
## DANTOC_CHMinority 0.40 0.23 0.36 0.41 0.22 0.36 0.40 0.20 0.40
## NGHENGHIEP_CHLAMCONGANLUONG 0.32 0.34 0.35 0.33 0.35 0.31 0.32 0.34 0.34
## NGHENGHIEP_CHNONGLAMTHUYSAN 0.32 0.32 0.35 0.36 0.33 0.31 0.34 0.33 0.33
## BANGCAP_CHPrimary school 0.29 0.38 0.32 0.32 0.32 0.35 0.32 0.32 0.35
## BANGCAP_CHSecondary-high school 0.30 0.38 0.32 0.33 0.34 0.34 0.32 0.32 0.36
## BANGCAP_CHUniversity 0.31 0.37 0.32 0.32 0.33 0.35 0.33 0.31 0.36
## TUOI_CH 0.33 0.33 0.33 0.33 0.33 0.33 0.33 0.33 0.33
## TSNGUOI 0.33 0.33 0.33 0.34 0.33 0.33 0.33 0.34 0.33
## NOISONGRURAL 0.37 0.29 0.34 0.38 0.29 0.33 0.38 0.30 0.32
## AREADB Song Hong 0.22 0.58 0.20 0.23 0.57 0.20 0.20 0.59 0.21
## AREADong Nam Bo 0.25 0.48 0.27 0.26 0.48 0.26 0.21 0.50 0.29
## AREAMien Trung 0.28 0.46 0.26 0.24 0.52 0.23 0.21 0.55 0.24
## AREATay nguyen 0.30 0.40 0.30 0.27 0.49 0.24 0.27 0.50 0.23
## AREATD MN Phia Bac 0.31 0.46 0.23 0.29 0.49 0.22 0.29 0.50 0.20
#Tính Elasticity
Y.prg <- acomp(CCCHIGDV3[which(CCCHIGDV3$m2xc6 == "Primary School") , c("s1", "s2", "s3")])
model.prg = lm(ilr(Y.prg) ~ log(THUBQ) + GIOITINH + TROCAP + m2xc8 + songuoidihocFactor + GIOITINH_CH +
HONNHAN_CH + DANTOC_CH + NGHENGHIEP_CH + BANGCAP_CH +
TUOI_CH + TSNGUOI + NOISONG + AREA,
data = CCCHIGDV3[which(CCCHIGDV3$m2xc6 == "Primary School"), ])
coefs.prg <- ilrInv(coef(model.prg),orig=Y.prg)
Y.sec <- acomp(CCCHIGDV3[which(CCCHIGDV3$m2xc6 == "Secondary school") , c("s1", "s2", "s3")])
model.sec = lm(ilr(Y.sec) ~ log(THUBQ) + GIOITINH + TROCAP + m2xc8 + songuoidihocFactor + GIOITINH_CH +
HONNHAN_CH + DANTOC_CH + NGHENGHIEP_CH + BANGCAP_CH +
TUOI_CH + TSNGUOI + NOISONG + AREA,
data = CCCHIGDV3[which(CCCHIGDV3$m2xc6 == "Secondary school"), ])
coefs.sec <- ilrInv(coef(model.sec),orig=Y.sec)
Y.hig <- acomp(CCCHIGDV3[which(CCCHIGDV3$m2xc6 == "High school") , c("s1", "s2", "s3")])
model.hig = lm(ilr(Y.hig) ~ log(THUBQ) + GIOITINH + TROCAP + m2xc8 + songuoidihocFactor + GIOITINH_CH +
HONNHAN_CH + DANTOC_CH + NGHENGHIEP_CH + BANGCAP_CH +
TUOI_CH + TSNGUOI + NOISONG + AREA,
data = CCCHIGDV3[which(CCCHIGDV3$m2xc6 == "High school"), ])
coefs.hig <- ilrInv(coef(model.hig),orig=Y.hig)
#========
#caphocElas <- "Primary school"
ElasCoDaAll <- function(caphocElas)
{
don <- CCCHIGDV3[which(CCCHIGDV3$m2xc6 == caphocElas), ]
Y.caphoc <- acomp(don[,c("s1", "s2", "s3")])
model.caphoc = lm(ilr(Y.caphoc) ~ log(THUBQ) + GIOITINH + TROCAP + m2xc8 + songuoidihocFactor + GIOITINH_CH +
HONNHAN_CH + DANTOC_CH + NGHENGHIEP_CH + BANGCAP_CH +
TUOI_CH + TSNGUOI + NOISONG + AREA, data=don)
coefs.caphoc <- data.frame(ilrInv(coef(model.caphoc),orig=Y.caphoc))
lc.s1 <- log(coefs.caphoc["log(THUBQ)","s1"])
lc.s2 <- log(coefs.caphoc["log(THUBQ)","s2"])
lc.s3 <- log(coefs.caphoc["log(THUBQ)","s3"])
e.s1 <- c(rep(NA, dim(don)[1]))
for (i in 1:length(e.s1))
{
e.s1[i] <- (lc.s1 - don[i,"s1"]*lc.s1 - don[i,"s2"]*lc.s2 - don[i,"s3"]*lc.s3)#*log(don[i,"THUBQ"])
}
e.s2 <- c(rep(NA, dim(don)[1]))
for (i in 1:length(e.s1))
{
e.s2[i] <- (lc.s2 - don[i,"s1"]*lc.s1 - don[i,"s2"]*lc.s2 - don[i,"s3"]*lc.s3)#*log(don[i,"THUBQ"])
}
e.s3 <- c(rep(NA, dim(don)[1]))
for (i in 1:length(e.s1))
{
e.s3[i] <- (lc.s3 - don[i,"s1"]*lc.s1 - don[i,"s2"]*lc.s2 - don[i,"s3"]*lc.s3)#*log(don[i,"THUBQ"])
}
print(c(mean(e.s1),mean(e.s2),mean(e.s3)))
refinal <- data.frame(cbind(e.s1,e.s2,e.s3))
names(refinal) <- c("s1", "s2", "s3")
refinal$m2xc6 <- caphocElas
return(refinal)
}
elasCoDa.ALL <- ElasCoDaAll("Primary School")## [1] 105.2911 105.5417 105.2763
elasCoDa.ALL <- rbind(elasCoDa.ALL, ElasCoDaAll("Secondary school"))## [1] 105.9964 106.3073 105.9965
elasCoDa.ALL <- rbind(elasCoDa.ALL, ElasCoDaAll("High school"))## [1] 107.0435 107.2455 107.1062
head(elasCoDa.ALL)## s1 s2 s3 m2xc6
## 1 99.10757 99.35815 99.09284 Primary School
## 2 100.65135 100.90192 100.63661 Primary School
## 3 104.62217 104.87274 104.60744 Primary School
## 4 95.50881 95.75938 95.49408 Primary School
## 5 95.86290 96.11347 95.84816 Primary School
## 6 97.17987 97.43044 97.16513 Primary School
# -----------------Figure 9: Plot elasticities of share in a boxplot for all year
elasCoDa.ALL.melt <- melt(elasCoDa.ALL,id="m2xc6")
require(ggplot2)
# grouped boxplot
ggplot(elasCoDa.ALL.melt, aes(x = m2xc6, y= value , fill = variable)) +
geom_boxplot()