Distribuation of world bank countries by region

location="https://github.com/pubpolicy/PubPolicy-543/raw/main/"
file="gbdChildMortality_2010s.csv"

linkToFile=paste0(location,file)

gdb=read.csv(linkToFile)
summary(gdb)
##       iso                     NAME_EN                         gbdRegion 
##  AFG    :  1   Afghanistan        :  1   Europe, Western           :22  
##  AGO    :  1   Albania            :  1   North Africa / Middle East:19  
##  ALB    :  1   Algeria            :  1   Sub-Saharan Africa, West  :19  
##  AND    :  1   Andorra            :  1   Caribbean                 :15  
##  ARE    :  1   Angola             :  1   Sub-Saharan Africa, East  :15  
##  ARG    :  1   Antigua and Barbuda:  1   Asia, Southeast           :13  
##  (Other):181   (Other)            :181   (Other)                   :84  
##       year          neoMR          postneoMR        age1_5MR     
##  Min.   :2010   Min.   : 1.270   Min.   : 0.50   Min.   : 0.090  
##  1st Qu.:2010   1st Qu.: 4.915   1st Qu.: 2.35   1st Qu.: 0.735  
##  Median :2010   Median :11.280   Median : 7.44   Median : 2.640  
##  Mean   :2010   Mean   :15.502   Mean   :13.38   Mean   :13.323  
##  3rd Qu.:2010   3rd Qu.:25.480   3rd Qu.:20.82   3rd Qu.:15.260  
##  Max.   :2010   Max.   :56.030   Max.   :63.62   Max.   :94.710  
##                                                                  
##     under5MR        neoDeaths      postneoDeaths     age1_5Deaths     
##  Min.   :  2.50   Min.   :     3   Min.   :     1   Min.   :     0.0  
##  1st Qu.:  8.29   1st Qu.:   194   1st Qu.:   111   1st Qu.:    39.5  
##  Median : 20.99   Median :  1493   Median :   955   Median :   358.0  
##  Mean   : 40.98   Mean   : 16735   Mean   : 12083   Mean   : 12324.1  
##  3rd Qu.: 62.38   3rd Qu.:  9370   3rd Qu.:  8301   3rd Qu.:  5008.0  
##  Max.   :180.13   Max.   :916630   Max.   :420715   Max.   :467326.0  
##                                                                       
##   under5Deaths      
##  Min.   :      5.0  
##  1st Qu.:    381.5  
##  Median :   2884.0  
##  Mean   :  41142.2  
##  3rd Qu.:  22843.0  
##  Max.   :1707643.0  
## 
names(gdb)
##  [1] "iso"           "NAME_EN"       "gbdRegion"     "year"         
##  [5] "neoMR"         "postneoMR"     "age1_5MR"      "under5MR"     
##  [9] "neoDeaths"     "postneoDeaths" "age1_5Deaths"  "under5Deaths"
# absolute values
absoluteT=table(gdb$gbdRegion,
                exclude = 'nothing') #include all!
absoluteT
## 
##    Asia Pacific, High Income                Asia, Central 
##                            4                            9 
##                   Asia, East                  Asia, South 
##                            3                            6 
##              Asia, Southeast                  Australasia 
##                           13                            2 
##                    Caribbean              Europe, Central 
##                           15                           13 
##              Europe, Eastern              Europe, Western 
##                            7                           22 
##        Latin America, Andean       Latin America, Central 
##                            3                            9 
##      Latin America, Southern      Latin America, Tropical 
##                            3                            2 
##   North Africa / Middle East   North America, High Income 
##                           19                            2 
##                      Oceania  Sub-Saharan Africa, Central 
##                            9                            6 
##     Sub-Saharan Africa, East Sub-Saharan Africa, Southern 
##                           15                            6 
##     Sub-Saharan Africa, West 
##                           19
# relative values
prop.table(absoluteT)
## 
##    Asia Pacific, High Income                Asia, Central 
##                   0.02139037                   0.04812834 
##                   Asia, East                  Asia, South 
##                   0.01604278                   0.03208556 
##              Asia, Southeast                  Australasia 
##                   0.06951872                   0.01069519 
##                    Caribbean              Europe, Central 
##                   0.08021390                   0.06951872 
##              Europe, Eastern              Europe, Western 
##                   0.03743316                   0.11764706 
##        Latin America, Andean       Latin America, Central 
##                   0.01604278                   0.04812834 
##      Latin America, Southern      Latin America, Tropical 
##                   0.01604278                   0.01069519 
##   North Africa / Middle East   North America, High Income 
##                   0.10160428                   0.01069519 
##                      Oceania  Sub-Saharan Africa, Central 
##                   0.04812834                   0.03208556 
##     Sub-Saharan Africa, East Sub-Saharan Africa, Southern 
##                   0.08021390                   0.03208556 
##     Sub-Saharan Africa, West 
##                   0.10160428
ToPlot=prop.table(absoluteT)*100
ToPlot
## 
##    Asia Pacific, High Income                Asia, Central 
##                     2.139037                     4.812834 
##                   Asia, East                  Asia, South 
##                     1.604278                     3.208556 
##              Asia, Southeast                  Australasia 
##                     6.951872                     1.069519 
##                    Caribbean              Europe, Central 
##                     8.021390                     6.951872 
##              Europe, Eastern              Europe, Western 
##                     3.743316                    11.764706 
##        Latin America, Andean       Latin America, Central 
##                     1.604278                     4.812834 
##      Latin America, Southern      Latin America, Tropical 
##                     1.604278                     1.069519 
##   North Africa / Middle East   North America, High Income 
##                    10.160428                     1.069519 
##                      Oceania  Sub-Saharan Africa, Central 
##                     4.812834                     3.208556 
##     Sub-Saharan Africa, East Sub-Saharan Africa, Southern 
##                     8.021390                     3.208556 
##     Sub-Saharan Africa, West 
##                    10.160428
# as data frame
tableFreq=as.data.frame(ToPlot)
tableFreq
##                            Var1      Freq
## 1     Asia Pacific, High Income  2.139037
## 2                 Asia, Central  4.812834
## 3                    Asia, East  1.604278
## 4                   Asia, South  3.208556
## 5               Asia, Southeast  6.951872
## 6                   Australasia  1.069519
## 7                     Caribbean  8.021390
## 8               Europe, Central  6.951872
## 9               Europe, Eastern  3.743316
## 10              Europe, Western 11.764706
## 11        Latin America, Andean  1.604278
## 12       Latin America, Central  4.812834
## 13      Latin America, Southern  1.604278
## 14      Latin America, Tropical  1.069519
## 15   North Africa / Middle East 10.160428
## 16   North America, High Income  1.069519
## 17                      Oceania  4.812834
## 18  Sub-Saharan Africa, Central  3.208556
## 19     Sub-Saharan Africa, East  8.021390
## 20 Sub-Saharan Africa, Southern  3.208556
## 21     Sub-Saharan Africa, West 10.160428
names(tableFreq)=c("Region","proportion")
tableFreq
##                          Region proportion
## 1     Asia Pacific, High Income   2.139037
## 2                 Asia, Central   4.812834
## 3                    Asia, East   1.604278
## 4                   Asia, South   3.208556
## 5               Asia, Southeast   6.951872
## 6                   Australasia   1.069519
## 7                     Caribbean   8.021390
## 8               Europe, Central   6.951872
## 9               Europe, Eastern   3.743316
## 10              Europe, Western  11.764706
## 11        Latin America, Andean   1.604278
## 12       Latin America, Central   4.812834
## 13      Latin America, Southern   1.604278
## 14      Latin America, Tropical   1.069519
## 15   North Africa / Middle East  10.160428
## 16   North America, High Income   1.069519
## 17                      Oceania   4.812834
## 18  Sub-Saharan Africa, Central   3.208556
## 19     Sub-Saharan Africa, East   8.021390
## 20 Sub-Saharan Africa, Southern   3.208556
## 21     Sub-Saharan Africa, West  10.160428
library(ggplot2)
base= ggplot(data = tableFreq, 
             aes(x = Region,
                 y = proportion))
base = base + theme_classic()
plot1 = base + geom_bar(fill ="blue",
                        stat = 'identity') +
  coord_flip()
plot1

titleText='Distribution of countries by World Bank Regions'
sourceText='Source: World Bank'

plot2 = plot1 + labs(title=titleText,
                     x =NULL,
                     y = NULL,
                     caption = sourceText)
plot2

plot3 = plot2 + geom_hline(yintercept = 5.88, #where
                           linetype="dashed", 
                           size=1, #thickness
                           alpha=0.5) #transparency
plot3

library(scales) # for "unit_format""
## Warning: package 'scales' was built under R version 3.6.2
# customize Y axis
plot4 = plot3 + scale_y_continuous(breaks=c(0,2,4,6,8,10,12),
                                   limits = c(0,12), 
                                   labels=unit_format(suffix = '%')) 
plot4

plot5 = plot4 + theme(plot.caption = element_text(hjust = 1), 
                      plot.title = element_text(hjust = 0.5))
plot5

paste0(round(tableFreq$proportion,2), '%')
##  [1] "2.14%"  "4.81%"  "1.6%"   "3.21%"  "6.95%"  "1.07%"  "8.02%"  "6.95%" 
##  [9] "3.74%"  "11.76%" "1.6%"   "4.81%"  "1.6%"   "1.07%"  "10.16%" "1.07%" 
## [17] "4.81%"  "3.21%"  "8.02%"  "3.21%"  "10.16%"
LABELS=paste0(round(tableFreq$proportion,2), '%')
plot6 = plot5 + geom_text(vjust=0.5, #hjust if flipping
                          size = 3,
                          aes(y = proportion ,
                              label = LABELS))
plot6 #+ coord_flip() # wanna flip the plot?

tableFreq=tableFreq[order(tableFreq$proportion),]
# then:
tableFreq
##                          Region proportion
## 6                   Australasia   1.069519
## 14      Latin America, Tropical   1.069519
## 16   North America, High Income   1.069519
## 3                    Asia, East   1.604278
## 11        Latin America, Andean   1.604278
## 13      Latin America, Southern   1.604278
## 1     Asia Pacific, High Income   2.139037
## 4                   Asia, South   3.208556
## 18  Sub-Saharan Africa, Central   3.208556
## 20 Sub-Saharan Africa, Southern   3.208556
## 9               Europe, Eastern   3.743316
## 2                 Asia, Central   4.812834
## 12       Latin America, Central   4.812834
## 17                      Oceania   4.812834
## 5               Asia, Southeast   6.951872
## 8               Europe, Central   6.951872
## 7                     Caribbean   8.021390
## 19     Sub-Saharan Africa, East   8.021390
## 15   North Africa / Middle East  10.160428
## 21     Sub-Saharan Africa, West  10.160428
## 10              Europe, Western  11.764706
regionOrd=tableFreq[order(tableFreq$proportion),'Region']
LABELS=paste0(round(tableFreq$proportion,2), '%')

base= ggplot(data = tableFreq, 
             aes(x = region,
                 y = proportion)) 
base= base + scale_x_discrete(limits=regionOrd) 
base= base + theme_classic()

base= ggplot(data = tableFreq, 
             aes(x = Region,
                 y = proportion))
base = base + scale_x_discrete(limits=regionOrd)
plot1 = base + geom_bar(fill ="blue",
                        stat = 'identity') +
  coord_flip()
plot1

titleText='Distribution of countries by World Bank Regions'
sourceText='Source: World Bank'

plot2 = plot1 + labs(title=titleText,
                     x =NULL,
                     y = NULL,
                     caption = sourceText)
plot2

plot3 = plot2 + geom_hline(yintercept = 5.88, #where
                           linetype="dashed", 
                           size=1, #thickness
                           alpha=0.5) #transparency
plot3

plot4 = plot3 + scale_y_continuous(breaks=c(0,2,4,6,8,10,12),
                                   limits = c(0,13), 
                                   labels=unit_format(suffix = '%')) 
plot4

plot5 = plot4 + theme(plot.caption = element_text(hjust = 1), 
                      plot.title = element_text(hjust = 0.5))
plot5

plot6 = plot5 + geom_text(vjust=0.5,hjust=0, #hjust if flipping
                          size = 3,
                          aes(y = proportion ,
                              label = LABELS))
plot6 #+ coord_flip() # wanna flip the plot?

#familiar
base = ggplot(tableFreq, aes(x=Region,proportion)) + theme_classic()
base = base + scale_x_discrete(limits=regionOrd) 
#new:geom_segment
lolliplot1=base +coord_flip()+ geom_segment(aes(y = 0, 
                                   x = Region, 
                                   yend = proportion, 
                                   xend = Region), 
                               color = "grey50") 
lolliplot1

lolliplot2=lolliplot1 + geom_point()
lolliplot2

tableFreq$gap=tableFreq$proportion-5.88
tableFreq
##                          Region proportion       gap
## 6                   Australasia   1.069519 -4.810481
## 14      Latin America, Tropical   1.069519 -4.810481
## 16   North America, High Income   1.069519 -4.810481
## 3                    Asia, East   1.604278 -4.275722
## 11        Latin America, Andean   1.604278 -4.275722
## 13      Latin America, Southern   1.604278 -4.275722
## 1     Asia Pacific, High Income   2.139037 -3.740963
## 4                   Asia, South   3.208556 -2.671444
## 18  Sub-Saharan Africa, Central   3.208556 -2.671444
## 20 Sub-Saharan Africa, Southern   3.208556 -2.671444
## 9               Europe, Eastern   3.743316 -2.136684
## 2                 Asia, Central   4.812834 -1.067166
## 12       Latin America, Central   4.812834 -1.067166
## 17                      Oceania   4.812834 -1.067166
## 5               Asia, Southeast   6.951872  1.071872
## 8               Europe, Central   6.951872  1.071872
## 7                     Caribbean   8.021390  2.141390
## 19     Sub-Saharan Africa, East   8.021390  2.141390
## 15   North Africa / Middle East  10.160428  4.280428
## 21     Sub-Saharan Africa, West  10.160428  4.280428
## 10              Europe, Western  11.764706  5.884706
base = ggplot(tableFreq, aes(x=Region,
                             y=gap)) #change
base = base + theme_classic()+coord_flip()
base = base + scale_x_discrete(limits=regionOrd)

lolliplot1=base + geom_segment(aes(y = 0,
                                   yend = gap, #change
                                   x = Region,
                                   xend = Region), 
                               color = "gray") 
lolliplot2 = lolliplot1 + geom_point()
lolliplot2