đọc dữ liệu

setwd("d:/DATA2020/NamKhanh_Q4")
library(readxl)
dulieu <-read_excel("data.xlsx")
head(dulieu)
## # A tibble: 6 x 14
##      No     Y Stage  Year Scope1 Sinner1 Heroin Opium CrackCocaine  Meth Hashish
##   <dbl> <dbl> <chr> <dbl>  <dbl>   <dbl>  <dbl> <dbl>        <dbl> <dbl>   <dbl>
## 1     1     0 befo~  2005    184     223   1.62   2.7       0.108    0      0   
## 2     2     0 befo~  2006    190     252   2.31   3.5       0.192    0      0   
## 3     3     0 befo~  2007    219     257   4.61   1.5       1.21     0      0   
## 4     4     0 befo~  2008    241     312   4.27   1.8       0.0729  16.7    6.12
## 5     5     0 befo~  2009    250     382   1.26   7.2       0.09   103.    11.2 
## 6     6     1 after  2010    273     412   3.51   7.6       4.77    62.1    1.8 
## # ... with 3 more variables: Scope2 <dbl>, Sinner2 <dbl>, Victim <dbl>

Vẽ đồ thị ma tuý qua các năm

library(ggplot2)
ggplot(data=dulieu, aes(x=Year)) +
  geom_line(data=dulieu,aes(y=Scope1/10),col="red", size=2) + 
  geom_point(data=dulieu,aes(y=Scope1/10), col="red",size=5) +
  geom_line(data=dulieu,aes(y=Sinner1/10),col="blue", size=2) + 
  geom_point(data=dulieu,aes(y=Sinner1/10), col="blue",size=5) +
  geom_line(data=dulieu,aes(y=Heroin),col="green", size=2) + 
  geom_point(data=dulieu,aes(y=Heroin), col="green",size=5) +
  geom_line(data=dulieu,aes(y=Opium),col="yellow", size=2) + 
  geom_point(data=dulieu,aes(y=Opium), col="yellow",size=5) +
  
geom_line(data=dulieu,aes(y=CrackCocaine),col="magenta4", size=2) + 
  geom_point(data=dulieu,aes(y=CrackCocaine), col="magenta4",size=5) + 
geom_line(data=dulieu,aes(y=Meth),col="steelblue", size=2) + 
  geom_point(data=dulieu,aes(y=Meth), col="steelblue",size=5) +
geom_line(data=dulieu,aes(y=Hashish),col="lightcoral", size=2) + 
  geom_point(data=dulieu,aes(y=Hashish), col="lightcoral",size=5) 

Vẽ đồ thị buôn bán người qua các năm

ggplot(data=dulieu, aes(x=Year)) +
  geom_line(aes(y=Scope2), col="red", size=2) +
  geom_point(aes(y=Scope2), col="red", size=5)+
  geom_point(aes(x=2006,y=200), col="red", size=5)+
  geom_text(aes(x=2006,y=200, label=" So luong vu an"), col="red", hjust=- 0.2)+
  
  geom_line(aes(y=Sinner1/10), col="blue", size=2) +
  geom_point(aes(y=Sinner1/10), col="blue", size=5)+
  geom_point(aes(x=2006,y=240), col="blue", size=5)+
  geom_text(aes(x=2006,y=240, label="Doi tuong tham gia"), col="blue", hjust=- 0.2)+
  
  geom_line(aes(y=Victim ), col="green", size=2) +
  geom_point(aes(y=Victim ), col="green", size=5) +
  geom_point(aes(x=2006,y=220), col="green", size=5)+
  geom_text(aes(x=2006,y=220, label="Nan nhan hang nam"), col="green", hjust=- 0.2)

Tạo dữ liệu trung bình Trước và Sau năm 2009

attach(dulieu)
dulieu2 <-aggregate(dulieu,by=list(Stage), FUN=mean)
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
head(dulieu2)
##   Group.1   No Y Stage   Year Scope1 Sinner1 Heroin Opium CrackCocaine     Meth
## 1   after 10.5 1    NA 2014.5  704.2  1104.9  9.303  8.02     45.59388 10.09542
## 2  before  3.0 0    NA 2007.0  216.8   285.2  2.814  3.34      0.33530 23.86000
##   Hashish Scope2 Sinner2 Victim
## 1   6.100    8.0    18.9   31.3
## 2   3.456    3.4     8.2   12.2

Vẽ đồ thị bar

m <- ggplot(data=dulieu2,aes(x=Group.1 )) 
  
m1 <- m + geom_bar(aes(y=Scope1), stat="identity", col="white", fill="red") 
m2 <- m +  geom_bar(aes(y=Sinner1), stat="identity", col="white", fill="blue")
m3 <- m + geom_bar(aes(y=Heroin), stat="identity", col="white", fill="green")
m4 <- m + geom_bar(aes(y=Opium), stat="identity", col="white", fill="yellow")
#   par(mfrow=c(2,2))
library(gridExtra)
grid.arrange(m1, m2, m3, m4, ncol=2)

Vẽ đồ thị boxplot buôn bán ma tuý

m <- ggplot(data=dulieu, aes(Stage)) 

m1 <- m + geom_boxplot(aes(y=Scope1), fill="red") + geom_jitter(aes(y=Scope1), width = 0.2)
m2 <- m +  geom_boxplot(aes(y=Sinner1), fill="blue") + geom_jitter(aes(y=Sinner1), width = 0.2)
m3 <- m + geom_boxplot(aes(y=Heroin), fill="green") + geom_jitter(aes(y=Heroin), width = 0.2)
m4 <- m + geom_boxplot(aes(y=Opium), fill="yellow") + geom_jitter(aes(y=Opium), width = 0.2)
m5 <- m + geom_boxplot(aes(y=CrackCocaine), fill="magenta4") + geom_jitter(aes(y=Opium), width = 0.2)
m6 <- m + geom_boxplot(aes(y=Meth), fill="steelblue") + geom_jitter(aes(y=Opium), width = 0.2)
m7 <- m + geom_boxplot(aes(y=Hashish), fill="lightcoral") + geom_jitter(aes(y=Opium), width = 0.2)
grid.arrange(m1, m2, m3, m4, ncol=2)

grid.arrange(m5, m6, m7, ncol=3)

Vẽ đồ thị boxplot mua bán người

m <- ggplot(data=dulieu, aes(Stage)) 

m1 <- m + geom_boxplot(aes(y=Scope2), fill="red") + geom_jitter(aes(y=Scope2), width = 0.2)
m2 <- m + geom_boxplot(aes(y=Sinner2), fill="blue") + geom_jitter(aes(y=Sinner2), width = 0.2)
m3 <- m + geom_boxplot(aes(y=Victim), fill="green") + geom_jitter(aes(y=Victim), width = 0.2)
grid.arrange(m1, m2, m3, ncol=3)

Thống kê mô tả

library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
table1(~ Scope1 + Sinner1 + Heroin + Opium  + CrackCocaine +    Meth    + Hashish    + Scope2   + Sinner2   + Victim | Stage , data=dulieu)
after
(n=10)
before
(n=5)
Overall
(n=15)
Scope1
Mean (SD) 704 (537) 217 (29.5) 542 (492)
Median [Min, Max] 399 [273, 1620] 219 [184, 250] 333 [184, 1620]
Sinner1
Mean (SD) 1100 (977) 285 (63.0) 832 (880)
Median [Min, Max] 521 [406, 2810] 257 [223, 382] 421 [223, 2810]
Heroin
Mean (SD) 9.30 (8.46) 2.81 (1.54) 7.14 (7.53)
Median [Min, Max] 6.01 [2.66, 28.3] 2.31 [1.26, 4.61] 4.27 [1.26, 28.3]
Opium
Mean (SD) 8.02 (4.85) 3.34 (2.30) 6.46 (4.67)
Median [Min, Max] 8.75 [0.00, 13.5] 2.70 [1.50, 7.20] 7.01 [0.00, 13.5]
CrackCocaine
Mean (SD) 45.6 (68.2) 0.335 (0.493) 30.5 (59.0)
Median [Min, Max] 12.6 [4.77, 228] 0.108 [0.0729, 1.21] 11.9 [0.0729, 228]
Meth
Mean (SD) 10.1 (18.9) 23.9 (44.6) 14.7 (29.0)
Median [Min, Max] 4.11 [0.00, 62.1] 0.00 [0.00, 103] 1.31 [0.00, 103]
Hashish
Mean (SD) 6.10 (6.38) 3.46 (5.06) 5.22 (5.92)
Median [Min, Max] 4.00 [0.00, 18.2] 0.00 [0.00, 11.2] 3.40 [0.00, 18.2]
Scope2
Mean (SD) 8.00 (5.35) 3.40 (1.52) 6.47 (4.91)
Median [Min, Max] 6.50 [2.00, 17.0] 3.00 [2.00, 6.00] 5.00 [2.00, 17.0]
Sinner2
Mean (SD) 18.9 (9.31) 8.20 (1.30) 15.3 (9.14)
Median [Min, Max] 17.0 [8.00, 34.0] 9.00 [6.00, 9.00] 11.0 [6.00, 34.0]
Victim
Mean (SD) 31.3 (20.7) 12.2 (3.96) 24.9 (19.1)
Median [Min, Max] 26.5 [9.00, 71.0] 11.0 [9.00, 19.0] 19.0 [9.00, 71.0]

Biểu đồ so sánh trước và sau

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
library(ggalt)
## Registered S3 methods overwritten by 'ggalt':
##   method                  from   
##   grid.draw.absoluteGrob  ggplot2
##   grobHeight.absoluteGrob ggplot2
##   grobWidth.absoluteGrob  ggplot2
##   grobX.absoluteGrob      ggplot2
##   grobY.absoluteGrob      ggplot2
library(base)
library(forcats)
dulieu2<-read_excel("data3.xlsx")
attach(dulieu2)
## The following object is masked from dulieu:
## 
##     No
#dulieu3 <-dulieu2[order(-Diff),]
#dulieu2 %>% arrange(Diff) %>%
dulieu2$Standard <- factor(dulieu2$Standard, levels = dulieu2$Standard[order(dulieu2$Diff)])

ggplot() + 
geom_dumbbell(data=dulieu2 ,aes(x=Score1, xend=Score2, y=factor(Standard)),color="#32a8a8",size=1.75,colour_x = "#bf244b", colour_xend = "#1a2d8f", size_x = 4,size_xend = 5)

Kiểm định Wilcoxon test

attach(dulieu)
## The following object is masked from dulieu2:
## 
##     No
## The following objects are masked from dulieu (pos = 9):
## 
##     CrackCocaine, Hashish, Heroin, Meth, No, Opium, Scope1, Scope2,
##     Sinner1, Sinner2, Stage, Victim, Y, Year
wilcox.test(Scope1~Stage)
## 
##  Wilcoxon rank sum test
## 
## data:  Scope1 by Stage
## W = 50, p-value = 0.000666
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(Sinner1~Stage)
## 
##  Wilcoxon rank sum test
## 
## data:  Sinner1 by Stage
## W = 50, p-value = 0.000666
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(Heroin~Stage)
## 
##  Wilcoxon rank sum test
## 
## data:  Heroin by Stage
## W = 42, p-value = 0.03996
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(Opium~Stage)
## Warning in wilcox.test.default(x = c(7.6, 9.89, 10.12, 6.98, 7.01, 0, 13.5, :
## cannot compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Opium by Stage
## W = 38, p-value = 0.1251
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(CrackCocaine~Stage)
## 
##  Wilcoxon rank sum test
## 
## data:  CrackCocaine by Stage
## W = 50, p-value = 0.000666
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(Meth~Stage)
## Warning in wilcox.test.default(x = c(62.12, 11, 6.91, 9.01, 10.6, 1.31422, :
## cannot compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Meth by Stage
## W = 25, p-value = 1
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(Hashish~Stage)
## Warning in wilcox.test.default(x = c(1.8, 0, 0, 12.7, 18.2, 1.7, 4.6, 3.4, :
## cannot compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Hashish by Stage
## W = 33, p-value = 0.3496
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(Scope2~Stage)
## Warning in wilcox.test.default(x = c(6, 2, 5, 3, 3, 7, 10, 11, 16, 17), : cannot
## compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Scope2 by Stage
## W = 39, p-value = 0.09163
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(Sinner2~Stage)
## Warning in wilcox.test.default(x = c(15, 8, 16, 11, 9, 18, 26, 19, 33, 34:
## cannot compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Sinner2 by Stage
## W = 45, p-value = 0.01586
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(Victim~Stage)
## Warning in wilcox.test.default(x = c(15, 9, 31, 26, 11, 23, 27, 38, 62, : cannot
## compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Victim by Stage
## W = 42, p-value = 0.04292
## alternative hypothesis: true location shift is not equal to 0

Vẽ đồ thị dumbell

library(dplyr)
dulieu<-read_excel("data3.xlsx")
dulieu1 <-dulieu %>% arrange(Diff) %>% mutate(Moi=factor(Standard,levels=Standard))
 ggplot() + 
   geom_dumbbell(data=dulieu1 ,aes(x=Score1, xend=Score2, y=Moi),color="red",size=1.75,colour_x = "#bf244b", colour_xend = "#1a2d8f", size_x = 4,size_xend = 5) +
 
   geom_text(data=dulieu1 %>% filter(Moi=="Sinner1"),aes(x=Score1,y=Moi, label="Before"), vjust=-1) +
   geom_text(data=dulieu1 %>% filter(Moi=="Sinner1"),aes(x=Score2,y=Moi, label="After"),vjust=-1) +
   geom_text(data=dulieu1, aes(x=Score1, y=Moi, label=round(Score1,1)), hjust=1.5) +
   geom_text(data=dulieu1, aes(x=Score2, y=Moi, label=round(Score2,1)), hjust= -0.8) +
   scale_y_discrete(expand = c(0.12,0)) +
   scale_x_discrete(expand = c(0.2,0))

Vẽ đồ thị hình tháp

dulieu <- read_excel("data2.xlsx")
 ggplot() +
  geom_bar(data=dulieu  , aes(x=Standard, y=Score,  fill=Stage), stat = "identity", col="white") +
   geom_text(data=dulieu %>% filter(Score <0), aes(x=Standard, y=Score, label=abs(Score)),hjust=1) +
   geom_text(data=dulieu %>% filter(Score >0), aes(x=Standard, y=Score, label=Score),hjust=-0.5) +
    scale_y_discrete(expand = c(0.2,0)) +
coord_flip()