#Thư viện

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

#Dữ liệu

Dta1<-haven::read_dta("D:\\STUDY\\HK5\\PTDL\\BTGK\\Dữ liệu\\Data 1.dta")

Biến đổi

Dta1$COUNTRIES <- as.factor(Dta1$COUNTRIES)

Dta1$IDNGDPmoi <- NA
Dta1$IDNGDPmoi[Dta1$IDNGDP <= 4] <- 1
Dta1$IDNGDPmoi[Dta1$IDNGDP > 4 & Dta1$IDNGDP <= 6] <- 2
Dta1$IDNGDPmoi[Dta1$IDNGDP > 6] <- 3
Dta1$IDNFDImoi <- NA
Dta1$IDNFDImoi[Dta1$IDNFDI <= 0] <- 1
Dta1$IDNFDImoi[Dta1$IDNFDI > 0] <- 2

Dta1$THAGDPmoi <- NA
Dta1$THAGDPmoi[Dta1$THAGDP <= 4] <- 1
Dta1$THAGDPmoi[Dta1$THAGDP > 4 & Dta1$THAGDP <= 6] <- 2
Dta1$THAGDPmoi[Dta1$THAGDP > 6] <- 3
Dta1$THAFDImoi <- NA
Dta1$THAFDImoi[Dta1$THAFDI <= 0] <- 1
Dta1$THAFDImoi[Dta1$THAFDI > 0] <- 2

Dta1$VNMGDPmoi <- NA
Dta1$VNMGDPmoi[Dta1$VNMGDP <= 4] <- 1
Dta1$VNMGDPmoi[Dta1$VNMGDP > 4 & Dta1$VNMGDP <= 6] <- 2
Dta1$VNMGDPmoi[Dta1$VNMGDP > 6] <- 3
Dta1$VNMFDImoi <- NA
Dta1$VNMFDImoi[Dta1$VNMFDI <= 0] <- 1
Dta1$VNMFDImoi[Dta1$VNMFDI > 0] <- 2

#Tạo biểu đồ histogram

hist(Dta1$IDNGDP, probability=TRUE)

hist(Dta1$THAGDP, probability=TRUE)

hist(Dta1$VNMGDP, probability=TRUE)

hist(Dta1$IDNFDI, probability=TRUE)

hist(Dta1$THAFDI, probability=TRUE)

hist(Dta1$VNMFDI, probability=TRUE)

#Kiểm định tham số ##Kiểm định ANOVA ###GDP

library(dplyr)
anova_GDP <- aov(GDP ~ COUNTRIES, data = Dta1)
summary(anova_GDP)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## COUNTRIES    2  105.3   52.66   10.82 9.74e-05 ***
## Residuals   60  292.1    4.87                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

###FDI

anova_FDI <- aov(FDI ~ COUNTRIES, data = Dta1)
summary(anova_FDI)
##             Df    Sum Sq   Mean Sq F value   Pr(>F)    
## COUNTRIES    2 1.499e+21 7.496e+20   16.92 1.49e-06 ***
## Residuals   60 2.658e+21 4.430e+19                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

##Kiểm định sâu ANOVA (kiểm định Turkey) ###GDP

tukey_GDP <- TukeyHSD(anova_GDP, "COUNTRIES")
print(tukey_GDP)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = GDP ~ COUNTRIES, data = Dta1)
## 
## $COUNTRIES
##          diff        lwr        upr     p adj
## 2-1 -1.848812 -3.4851976 -0.2124258 0.0231579
## 3-1  1.302459 -0.3339272  2.9388446 0.1438780
## 3-2  3.151270  1.5148845  4.7876563 0.0000595

###FDI

tukey_FDI <- TukeyHSD(anova_FDI, "COUNTRIES")
print(tukey_FDI)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = FDI ~ COUNTRIES, data = Dta1)
## 
## $COUNTRIES
##            diff          lwr         upr     p adj
## 2-1 10818948623   5882918815 15754978431 0.0000058
## 3-1  1016980221  -3919049588  5953010029 0.8738990
## 3-2 -9801968402 -14737998211 -4865938594 0.0000356

#Kiểm định phi tham số ##Kiểm định dấu hạng Wilcoxon ###INDO ####GDP

wilcox_IDNGDP <- wilcox.test(Dta1$IDNGDP, mu = 4, exact = FALSE)
print(wilcox_IDNGDP)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  Dta1$IDNGDP
## V = 209, p-value = 0.001227
## alternative hypothesis: true location is not equal to 4

####FDI

wilcox_IDNFDI <- wilcox.test(Dta1$IDNFDI, mu = -10000000000, exact = FALSE)
print(wilcox_IDNFDI)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  Dta1$IDNFDI
## V = 106, p-value = 0.7544
## alternative hypothesis: true location is not equal to -1e+10

###THÁI ####GDP

wilcox_THAGDP <- wilcox.test(Dta1$THAGDP, mu = 3, exact = FALSE)
print(wilcox_THAGDP)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  Dta1$THAGDP
## V = 128, p-value = 0.6766
## alternative hypothesis: true location is not equal to 3

####FDI

wilcox_THAFDI <- wilcox.test(Dta1$THAFDI, mu = 300000000, exact = FALSE)
print(wilcox_THAFDI)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  Dta1$THAFDI
## V = 100, p-value = 0.6021
## alternative hypothesis: true location is not equal to 3e+08

###VN ####GDP

wilcox_VNMGDP <- wilcox.test(Dta1$VNMGDP, mu = 6, exact = FALSE)
print(wilcox_VNMGDP)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  Dta1$VNMGDP
## V = 160, p-value = 0.1262
## alternative hypothesis: true location is not equal to 6

####FDI

wilcox_VNMFDI <- wilcox.test(Dta1$VNMFDI, mu = -9000000000, exact = FALSE)
print(wilcox_VNMFDI)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  Dta1$VNMFDI
## V = 115, p-value = 1
## alternative hypothesis: true location is not equal to -9e+09

##Kiểm định tổng hạng Wilcoxon ###GDP ####INDO THÁI

wilcox_GDPIT <- wilcox.test(GDPIT ~ IDN_THA, data = Dta1, exact = FALSE)
print(wilcox_GDPIT)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  GDPIT by IDN_THA
## W = 323, p-value = 0.01029
## alternative hypothesis: true location shift is not equal to 0

####THÁI VN

wilcox_GDPTV <- wilcox.test(GDPTV ~ THA_VNM, data = Dta1, exact = FALSE)
print(wilcox_GDPTV)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  GDPTV by THA_VNM
## W = 74, p-value = 0.00024
## alternative hypothesis: true location shift is not equal to 0

####INDO VN

wilcox_GDPIV <- wilcox.test(GDPIV ~ IDN_VNM, data = Dta1, exact = FALSE)
print(wilcox_GDPIV)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  GDPIV by IDN_VNM
## W = 80, p-value = 0.0004286
## alternative hypothesis: true location shift is not equal to 0

###FDI ####INDO THÁI

wilcox_FDIIT <- wilcox.test(FDIIT ~ IDN_THA, data = Dta1, exact = FALSE)
print(wilcox_FDIIT)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  FDIIT by IDN_THA
## W = 70, p-value = 0.0001611
## alternative hypothesis: true location shift is not equal to 0

####THÁI VN

wilcox_FDITV <- wilcox.test(FDITV ~ THA_VNM, data = Dta1, exact = FALSE)
print(wilcox_FDITV)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  FDITV by THA_VNM
## W = 383, p-value = 4.597e-05
## alternative hypothesis: true location shift is not equal to 0

####INDO VN

wilcox_FDIIV <- wilcox.test(FDIIV ~ IDN_VNM, data = Dta1, exact = FALSE)
print(wilcox_FDIIV)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  FDIIV by IDN_VNM
## W = 195, p-value = 0.5294
## alternative hypothesis: true location shift is not equal to 0

##Kiểm định Kruskal-Wallis ###GDP

kruskal_GDP <- kruskal.test(GDP ~ COUNTRIES, data = Dta1)
print(kruskal_GDP)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  GDP by COUNTRIES
## Kruskal-Wallis chi-squared = 20.665, df = 2, p-value = 3.255e-05

###FDI

kruskal_FDI <- kruskal.test(FDI ~ COUNTRIES, data = Dta1)
print(kruskal_FDI)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  FDI by COUNTRIES
## Kruskal-Wallis chi-squared = 20.935, df = 2, p-value = 2.845e-05

##Kiểm định sâu Kruskal-Wallis ###GDP

library(dunn.test)

if (kruskal_GDP$p.value < 0.05) {
  dunn_GDP <- dunn.test(Dta1$GDP, Dta1$COUNTRIES, method = "bonferroni")
  print(dunn_GDP)
}
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 20.6652, df = 2, p-value = 0
## 
## 
##                            Comparison of x by group                            
##                                  (Bonferroni)                                  
## Col Mean-|
## Row Mean |          1          2
## ---------+----------------------
##        2 |   1.776184
##          |     0.1136
##          |
##        3 |  -2.735829  -4.512014
##          |    0.0093*    0.0000*
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
## $chi2
## [1] 20.66525
## 
## $Z
## [1]  1.776185 -2.735830 -4.512015
## 
## $P
## [1] 3.785122e-02 3.111160e-03 3.210737e-06
## 
## $P.adjusted
## [1] 1.135537e-01 9.333480e-03 9.632211e-06
## 
## $comparisons
## [1] "1 - 2" "1 - 3" "2 - 3"

###FDI

if (kruskal_FDI$p.value < 0.05) {
  dunn_FDI <- dunn.test(Dta1$FDI, Dta1$COUNTRIES, method = "bonferroni")
  print(dunn_FDI)
}
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 20.9345, df = 2, p-value = 0
## 
## 
##                            Comparison of x by group                            
##                                  (Bonferroni)                                  
## Col Mean-|
## Row Mean |          1          2
## ---------+----------------------
##        2 |  -4.116371
##          |    0.0001*
##          |
##        3 |  -0.328299   3.788072
##          |     1.0000    0.0002*
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
## $chi2
## [1] 20.93452
## 
## $Z
## [1] -4.1163716 -0.3282996  3.7880720
## 
## $P
## [1] 1.924418e-05 3.713426e-01 7.591042e-05
## 
## $P.adjusted
## [1] 5.773253e-05 1.000000e+00 2.277312e-04
## 
## $comparisons
## [1] "1 - 2" "1 - 3" "2 - 3"

##Kiểm định Chi bình phương về tính phụ thuộc ###INDO

table_IDN <- table(Dta1$IDNGDPmoi, Dta1$IDNFDImoi)
chi2_IDN <- chisq.test(table_IDN)
## Warning in chisq.test(table_IDN): Chi-squared approximation may be incorrect
print(chi2_IDN)
## 
##  Pearson's Chi-squared test
## 
## data:  table_IDN
## X-squared = 1.1053, df = 2, p-value = 0.5754

###THÁI

table_THA <- table(Dta1$THAGDPmoi, Dta1$THAFDImoi)
chi2_THA <- chisq.test(table_THA)
## Warning in chisq.test(table_THA): Chi-squared approximation may be incorrect
print(chi2_THA)
## 
##  Pearson's Chi-squared test
## 
## data:  table_THA
## X-squared = 1.4891, df = 2, p-value = 0.475

###VN

table_VNM <- table(Dta1$VNMGDPmoi, Dta1$VNMFDImoi)
chi2_VNM <- chisq.test(table_VNM)
print(chi2_VNM)
## 
##  Chi-squared test for given probabilities
## 
## data:  table_VNM
## X-squared = 11.143, df = 2, p-value = 0.003805