#Thư viện
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#Dữ liệu
Dta1<-haven::read_dta("D:\\STUDY\\HK5\\PTDL\\BTGK\\Dữ liệu\\Data 1.dta")
Biến đổi
Dta1$COUNTRIES <- as.factor(Dta1$COUNTRIES)
Dta1$IDNGDPmoi <- NA
Dta1$IDNGDPmoi[Dta1$IDNGDP <= 4] <- 1
Dta1$IDNGDPmoi[Dta1$IDNGDP > 4 & Dta1$IDNGDP <= 6] <- 2
Dta1$IDNGDPmoi[Dta1$IDNGDP > 6] <- 3
Dta1$IDNFDImoi <- NA
Dta1$IDNFDImoi[Dta1$IDNFDI <= 0] <- 1
Dta1$IDNFDImoi[Dta1$IDNFDI > 0] <- 2
Dta1$THAGDPmoi <- NA
Dta1$THAGDPmoi[Dta1$THAGDP <= 4] <- 1
Dta1$THAGDPmoi[Dta1$THAGDP > 4 & Dta1$THAGDP <= 6] <- 2
Dta1$THAGDPmoi[Dta1$THAGDP > 6] <- 3
Dta1$THAFDImoi <- NA
Dta1$THAFDImoi[Dta1$THAFDI <= 0] <- 1
Dta1$THAFDImoi[Dta1$THAFDI > 0] <- 2
Dta1$VNMGDPmoi <- NA
Dta1$VNMGDPmoi[Dta1$VNMGDP <= 4] <- 1
Dta1$VNMGDPmoi[Dta1$VNMGDP > 4 & Dta1$VNMGDP <= 6] <- 2
Dta1$VNMGDPmoi[Dta1$VNMGDP > 6] <- 3
Dta1$VNMFDImoi <- NA
Dta1$VNMFDImoi[Dta1$VNMFDI <= 0] <- 1
Dta1$VNMFDImoi[Dta1$VNMFDI > 0] <- 2
#Tạo biểu đồ histogram
hist(Dta1$IDNGDP, probability=TRUE)
hist(Dta1$THAGDP, probability=TRUE)
hist(Dta1$VNMGDP, probability=TRUE)
hist(Dta1$IDNFDI, probability=TRUE)
hist(Dta1$THAFDI, probability=TRUE)
hist(Dta1$VNMFDI, probability=TRUE)
#Kiểm định tham số ##Kiểm định ANOVA ###GDP
library(dplyr)
anova_GDP <- aov(GDP ~ COUNTRIES, data = Dta1)
summary(anova_GDP)
## Df Sum Sq Mean Sq F value Pr(>F)
## COUNTRIES 2 105.3 52.66 10.82 9.74e-05 ***
## Residuals 60 292.1 4.87
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###FDI
anova_FDI <- aov(FDI ~ COUNTRIES, data = Dta1)
summary(anova_FDI)
## Df Sum Sq Mean Sq F value Pr(>F)
## COUNTRIES 2 1.499e+21 7.496e+20 16.92 1.49e-06 ***
## Residuals 60 2.658e+21 4.430e+19
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##Kiểm định sâu ANOVA (kiểm định Turkey) ###GDP
tukey_GDP <- TukeyHSD(anova_GDP, "COUNTRIES")
print(tukey_GDP)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = GDP ~ COUNTRIES, data = Dta1)
##
## $COUNTRIES
## diff lwr upr p adj
## 2-1 -1.848812 -3.4851976 -0.2124258 0.0231579
## 3-1 1.302459 -0.3339272 2.9388446 0.1438780
## 3-2 3.151270 1.5148845 4.7876563 0.0000595
###FDI
tukey_FDI <- TukeyHSD(anova_FDI, "COUNTRIES")
print(tukey_FDI)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = FDI ~ COUNTRIES, data = Dta1)
##
## $COUNTRIES
## diff lwr upr p adj
## 2-1 10818948623 5882918815 15754978431 0.0000058
## 3-1 1016980221 -3919049588 5953010029 0.8738990
## 3-2 -9801968402 -14737998211 -4865938594 0.0000356
#Kiểm định phi tham số ##Kiểm định dấu hạng Wilcoxon ###INDO ####GDP
wilcox_IDNGDP <- wilcox.test(Dta1$IDNGDP, mu = 4, exact = FALSE)
print(wilcox_IDNGDP)
##
## Wilcoxon signed rank test with continuity correction
##
## data: Dta1$IDNGDP
## V = 209, p-value = 0.001227
## alternative hypothesis: true location is not equal to 4
####FDI
wilcox_IDNFDI <- wilcox.test(Dta1$IDNFDI, mu = -10000000000, exact = FALSE)
print(wilcox_IDNFDI)
##
## Wilcoxon signed rank test with continuity correction
##
## data: Dta1$IDNFDI
## V = 106, p-value = 0.7544
## alternative hypothesis: true location is not equal to -1e+10
###THÁI ####GDP
wilcox_THAGDP <- wilcox.test(Dta1$THAGDP, mu = 3, exact = FALSE)
print(wilcox_THAGDP)
##
## Wilcoxon signed rank test with continuity correction
##
## data: Dta1$THAGDP
## V = 128, p-value = 0.6766
## alternative hypothesis: true location is not equal to 3
####FDI
wilcox_THAFDI <- wilcox.test(Dta1$THAFDI, mu = 300000000, exact = FALSE)
print(wilcox_THAFDI)
##
## Wilcoxon signed rank test with continuity correction
##
## data: Dta1$THAFDI
## V = 100, p-value = 0.6021
## alternative hypothesis: true location is not equal to 3e+08
###VN ####GDP
wilcox_VNMGDP <- wilcox.test(Dta1$VNMGDP, mu = 6, exact = FALSE)
print(wilcox_VNMGDP)
##
## Wilcoxon signed rank test with continuity correction
##
## data: Dta1$VNMGDP
## V = 160, p-value = 0.1262
## alternative hypothesis: true location is not equal to 6
####FDI
wilcox_VNMFDI <- wilcox.test(Dta1$VNMFDI, mu = -9000000000, exact = FALSE)
print(wilcox_VNMFDI)
##
## Wilcoxon signed rank test with continuity correction
##
## data: Dta1$VNMFDI
## V = 115, p-value = 1
## alternative hypothesis: true location is not equal to -9e+09
##Kiểm định tổng hạng Wilcoxon ###GDP ####INDO THÁI
wilcox_GDPIT <- wilcox.test(GDPIT ~ IDN_THA, data = Dta1, exact = FALSE)
print(wilcox_GDPIT)
##
## Wilcoxon rank sum test with continuity correction
##
## data: GDPIT by IDN_THA
## W = 323, p-value = 0.01029
## alternative hypothesis: true location shift is not equal to 0
####THÁI VN
wilcox_GDPTV <- wilcox.test(GDPTV ~ THA_VNM, data = Dta1, exact = FALSE)
print(wilcox_GDPTV)
##
## Wilcoxon rank sum test with continuity correction
##
## data: GDPTV by THA_VNM
## W = 74, p-value = 0.00024
## alternative hypothesis: true location shift is not equal to 0
####INDO VN
wilcox_GDPIV <- wilcox.test(GDPIV ~ IDN_VNM, data = Dta1, exact = FALSE)
print(wilcox_GDPIV)
##
## Wilcoxon rank sum test with continuity correction
##
## data: GDPIV by IDN_VNM
## W = 80, p-value = 0.0004286
## alternative hypothesis: true location shift is not equal to 0
###FDI ####INDO THÁI
wilcox_FDIIT <- wilcox.test(FDIIT ~ IDN_THA, data = Dta1, exact = FALSE)
print(wilcox_FDIIT)
##
## Wilcoxon rank sum test with continuity correction
##
## data: FDIIT by IDN_THA
## W = 70, p-value = 0.0001611
## alternative hypothesis: true location shift is not equal to 0
####THÁI VN
wilcox_FDITV <- wilcox.test(FDITV ~ THA_VNM, data = Dta1, exact = FALSE)
print(wilcox_FDITV)
##
## Wilcoxon rank sum test with continuity correction
##
## data: FDITV by THA_VNM
## W = 383, p-value = 4.597e-05
## alternative hypothesis: true location shift is not equal to 0
####INDO VN
wilcox_FDIIV <- wilcox.test(FDIIV ~ IDN_VNM, data = Dta1, exact = FALSE)
print(wilcox_FDIIV)
##
## Wilcoxon rank sum test with continuity correction
##
## data: FDIIV by IDN_VNM
## W = 195, p-value = 0.5294
## alternative hypothesis: true location shift is not equal to 0
##Kiểm định Kruskal-Wallis ###GDP
kruskal_GDP <- kruskal.test(GDP ~ COUNTRIES, data = Dta1)
print(kruskal_GDP)
##
## Kruskal-Wallis rank sum test
##
## data: GDP by COUNTRIES
## Kruskal-Wallis chi-squared = 20.665, df = 2, p-value = 3.255e-05
###FDI
kruskal_FDI <- kruskal.test(FDI ~ COUNTRIES, data = Dta1)
print(kruskal_FDI)
##
## Kruskal-Wallis rank sum test
##
## data: FDI by COUNTRIES
## Kruskal-Wallis chi-squared = 20.935, df = 2, p-value = 2.845e-05
##Kiểm định sâu Kruskal-Wallis ###GDP
library(dunn.test)
if (kruskal_GDP$p.value < 0.05) {
dunn_GDP <- dunn.test(Dta1$GDP, Dta1$COUNTRIES, method = "bonferroni")
print(dunn_GDP)
}
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 20.6652, df = 2, p-value = 0
##
##
## Comparison of x by group
## (Bonferroni)
## Col Mean-|
## Row Mean | 1 2
## ---------+----------------------
## 2 | 1.776184
## | 0.1136
## |
## 3 | -2.735829 -4.512014
## | 0.0093* 0.0000*
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
## $chi2
## [1] 20.66525
##
## $Z
## [1] 1.776185 -2.735830 -4.512015
##
## $P
## [1] 3.785122e-02 3.111160e-03 3.210737e-06
##
## $P.adjusted
## [1] 1.135537e-01 9.333480e-03 9.632211e-06
##
## $comparisons
## [1] "1 - 2" "1 - 3" "2 - 3"
###FDI
if (kruskal_FDI$p.value < 0.05) {
dunn_FDI <- dunn.test(Dta1$FDI, Dta1$COUNTRIES, method = "bonferroni")
print(dunn_FDI)
}
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 20.9345, df = 2, p-value = 0
##
##
## Comparison of x by group
## (Bonferroni)
## Col Mean-|
## Row Mean | 1 2
## ---------+----------------------
## 2 | -4.116371
## | 0.0001*
## |
## 3 | -0.328299 3.788072
## | 1.0000 0.0002*
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
## $chi2
## [1] 20.93452
##
## $Z
## [1] -4.1163716 -0.3282996 3.7880720
##
## $P
## [1] 1.924418e-05 3.713426e-01 7.591042e-05
##
## $P.adjusted
## [1] 5.773253e-05 1.000000e+00 2.277312e-04
##
## $comparisons
## [1] "1 - 2" "1 - 3" "2 - 3"
##Kiểm định Chi bình phương về tính phụ thuộc ###INDO
table_IDN <- table(Dta1$IDNGDPmoi, Dta1$IDNFDImoi)
chi2_IDN <- chisq.test(table_IDN)
## Warning in chisq.test(table_IDN): Chi-squared approximation may be incorrect
print(chi2_IDN)
##
## Pearson's Chi-squared test
##
## data: table_IDN
## X-squared = 1.1053, df = 2, p-value = 0.5754
###THÁI
table_THA <- table(Dta1$THAGDPmoi, Dta1$THAFDImoi)
chi2_THA <- chisq.test(table_THA)
## Warning in chisq.test(table_THA): Chi-squared approximation may be incorrect
print(chi2_THA)
##
## Pearson's Chi-squared test
##
## data: table_THA
## X-squared = 1.4891, df = 2, p-value = 0.475
###VN
table_VNM <- table(Dta1$VNMGDPmoi, Dta1$VNMFDImoi)
chi2_VNM <- chisq.test(table_VNM)
print(chi2_VNM)
##
## Chi-squared test for given probabilities
##
## data: table_VNM
## X-squared = 11.143, df = 2, p-value = 0.003805