Distribuation of world bank countries by region
location="https://github.com/pubpolicy/PubPolicy-543/raw/main/"
file="gbdChildMortality_2010s.csv"
linkToFile=paste0(location,file)
gdb=read.csv(linkToFile)
summary(gdb)
## iso NAME_EN gbdRegion
## AFG : 1 Afghanistan : 1 Europe, Western :22
## AGO : 1 Albania : 1 North Africa / Middle East:19
## ALB : 1 Algeria : 1 Sub-Saharan Africa, West :19
## AND : 1 Andorra : 1 Caribbean :15
## ARE : 1 Angola : 1 Sub-Saharan Africa, East :15
## ARG : 1 Antigua and Barbuda: 1 Asia, Southeast :13
## (Other):181 (Other) :181 (Other) :84
## year neoMR postneoMR age1_5MR
## Min. :2010 Min. : 1.270 Min. : 0.50 Min. : 0.090
## 1st Qu.:2010 1st Qu.: 4.915 1st Qu.: 2.35 1st Qu.: 0.735
## Median :2010 Median :11.280 Median : 7.44 Median : 2.640
## Mean :2010 Mean :15.502 Mean :13.38 Mean :13.323
## 3rd Qu.:2010 3rd Qu.:25.480 3rd Qu.:20.82 3rd Qu.:15.260
## Max. :2010 Max. :56.030 Max. :63.62 Max. :94.710
##
## under5MR neoDeaths postneoDeaths age1_5Deaths
## Min. : 2.50 Min. : 3 Min. : 1 Min. : 0.0
## 1st Qu.: 8.29 1st Qu.: 194 1st Qu.: 111 1st Qu.: 39.5
## Median : 20.99 Median : 1493 Median : 955 Median : 358.0
## Mean : 40.98 Mean : 16735 Mean : 12083 Mean : 12324.1
## 3rd Qu.: 62.38 3rd Qu.: 9370 3rd Qu.: 8301 3rd Qu.: 5008.0
## Max. :180.13 Max. :916630 Max. :420715 Max. :467326.0
##
## under5Deaths
## Min. : 5.0
## 1st Qu.: 381.5
## Median : 2884.0
## Mean : 41142.2
## 3rd Qu.: 22843.0
## Max. :1707643.0
##
names(gdb)
## [1] "iso" "NAME_EN" "gbdRegion" "year"
## [5] "neoMR" "postneoMR" "age1_5MR" "under5MR"
## [9] "neoDeaths" "postneoDeaths" "age1_5Deaths" "under5Deaths"
# absolute values
absoluteT=table(gdb$gbdRegion,
exclude = 'nothing') #include all!
absoluteT
##
## Asia Pacific, High Income Asia, Central
## 4 9
## Asia, East Asia, South
## 3 6
## Asia, Southeast Australasia
## 13 2
## Caribbean Europe, Central
## 15 13
## Europe, Eastern Europe, Western
## 7 22
## Latin America, Andean Latin America, Central
## 3 9
## Latin America, Southern Latin America, Tropical
## 3 2
## North Africa / Middle East North America, High Income
## 19 2
## Oceania Sub-Saharan Africa, Central
## 9 6
## Sub-Saharan Africa, East Sub-Saharan Africa, Southern
## 15 6
## Sub-Saharan Africa, West
## 19
# relative values
prop.table(absoluteT)
##
## Asia Pacific, High Income Asia, Central
## 0.02139037 0.04812834
## Asia, East Asia, South
## 0.01604278 0.03208556
## Asia, Southeast Australasia
## 0.06951872 0.01069519
## Caribbean Europe, Central
## 0.08021390 0.06951872
## Europe, Eastern Europe, Western
## 0.03743316 0.11764706
## Latin America, Andean Latin America, Central
## 0.01604278 0.04812834
## Latin America, Southern Latin America, Tropical
## 0.01604278 0.01069519
## North Africa / Middle East North America, High Income
## 0.10160428 0.01069519
## Oceania Sub-Saharan Africa, Central
## 0.04812834 0.03208556
## Sub-Saharan Africa, East Sub-Saharan Africa, Southern
## 0.08021390 0.03208556
## Sub-Saharan Africa, West
## 0.10160428
ToPlot=prop.table(absoluteT)*100
ToPlot
##
## Asia Pacific, High Income Asia, Central
## 2.139037 4.812834
## Asia, East Asia, South
## 1.604278 3.208556
## Asia, Southeast Australasia
## 6.951872 1.069519
## Caribbean Europe, Central
## 8.021390 6.951872
## Europe, Eastern Europe, Western
## 3.743316 11.764706
## Latin America, Andean Latin America, Central
## 1.604278 4.812834
## Latin America, Southern Latin America, Tropical
## 1.604278 1.069519
## North Africa / Middle East North America, High Income
## 10.160428 1.069519
## Oceania Sub-Saharan Africa, Central
## 4.812834 3.208556
## Sub-Saharan Africa, East Sub-Saharan Africa, Southern
## 8.021390 3.208556
## Sub-Saharan Africa, West
## 10.160428
# as data frame
tableFreq=as.data.frame(ToPlot)
tableFreq
## Var1 Freq
## 1 Asia Pacific, High Income 2.139037
## 2 Asia, Central 4.812834
## 3 Asia, East 1.604278
## 4 Asia, South 3.208556
## 5 Asia, Southeast 6.951872
## 6 Australasia 1.069519
## 7 Caribbean 8.021390
## 8 Europe, Central 6.951872
## 9 Europe, Eastern 3.743316
## 10 Europe, Western 11.764706
## 11 Latin America, Andean 1.604278
## 12 Latin America, Central 4.812834
## 13 Latin America, Southern 1.604278
## 14 Latin America, Tropical 1.069519
## 15 North Africa / Middle East 10.160428
## 16 North America, High Income 1.069519
## 17 Oceania 4.812834
## 18 Sub-Saharan Africa, Central 3.208556
## 19 Sub-Saharan Africa, East 8.021390
## 20 Sub-Saharan Africa, Southern 3.208556
## 21 Sub-Saharan Africa, West 10.160428
names(tableFreq)=c("Region","proportion")
tableFreq
## Region proportion
## 1 Asia Pacific, High Income 2.139037
## 2 Asia, Central 4.812834
## 3 Asia, East 1.604278
## 4 Asia, South 3.208556
## 5 Asia, Southeast 6.951872
## 6 Australasia 1.069519
## 7 Caribbean 8.021390
## 8 Europe, Central 6.951872
## 9 Europe, Eastern 3.743316
## 10 Europe, Western 11.764706
## 11 Latin America, Andean 1.604278
## 12 Latin America, Central 4.812834
## 13 Latin America, Southern 1.604278
## 14 Latin America, Tropical 1.069519
## 15 North Africa / Middle East 10.160428
## 16 North America, High Income 1.069519
## 17 Oceania 4.812834
## 18 Sub-Saharan Africa, Central 3.208556
## 19 Sub-Saharan Africa, East 8.021390
## 20 Sub-Saharan Africa, Southern 3.208556
## 21 Sub-Saharan Africa, West 10.160428
library(ggplot2)
base= ggplot(data = tableFreq,
aes(x = Region,
y = proportion))
base = base + theme_classic()
plot1 = base + geom_bar(fill ="blue",
stat = 'identity') +
coord_flip()
plot1

titleText='Distribution of countries by World Bank Regions'
sourceText='Source: World Bank'
plot2 = plot1 + labs(title=titleText,
x =NULL,
y = NULL,
caption = sourceText)
plot2

plot3 = plot2 + geom_hline(yintercept = 5.88, #where
linetype="dashed",
size=1, #thickness
alpha=0.5) #transparency
plot3

library(scales) # for "unit_format""
## Warning: package 'scales' was built under R version 3.6.2
# customize Y axis
plot4 = plot3 + scale_y_continuous(breaks=c(0,2,4,6,8,10,12),
limits = c(0,12),
labels=unit_format(suffix = '%'))
plot4

plot5 = plot4 + theme(plot.caption = element_text(hjust = 1),
plot.title = element_text(hjust = 0.5))
plot5

paste0(round(tableFreq$proportion,2), '%')
## [1] "2.14%" "4.81%" "1.6%" "3.21%" "6.95%" "1.07%" "8.02%" "6.95%"
## [9] "3.74%" "11.76%" "1.6%" "4.81%" "1.6%" "1.07%" "10.16%" "1.07%"
## [17] "4.81%" "3.21%" "8.02%" "3.21%" "10.16%"
LABELS=paste0(round(tableFreq$proportion,2), '%')
plot6 = plot5 + geom_text(vjust=0.5, #hjust if flipping
size = 3,
aes(y = proportion ,
label = LABELS))
plot6 #+ coord_flip() # wanna flip the plot?

tableFreq=tableFreq[order(tableFreq$proportion),]
# then:
tableFreq
## Region proportion
## 6 Australasia 1.069519
## 14 Latin America, Tropical 1.069519
## 16 North America, High Income 1.069519
## 3 Asia, East 1.604278
## 11 Latin America, Andean 1.604278
## 13 Latin America, Southern 1.604278
## 1 Asia Pacific, High Income 2.139037
## 4 Asia, South 3.208556
## 18 Sub-Saharan Africa, Central 3.208556
## 20 Sub-Saharan Africa, Southern 3.208556
## 9 Europe, Eastern 3.743316
## 2 Asia, Central 4.812834
## 12 Latin America, Central 4.812834
## 17 Oceania 4.812834
## 5 Asia, Southeast 6.951872
## 8 Europe, Central 6.951872
## 7 Caribbean 8.021390
## 19 Sub-Saharan Africa, East 8.021390
## 15 North Africa / Middle East 10.160428
## 21 Sub-Saharan Africa, West 10.160428
## 10 Europe, Western 11.764706
regionOrd=tableFreq[order(tableFreq$proportion),'Region']
LABELS=paste0(round(tableFreq$proportion,2), '%')
base= ggplot(data = tableFreq,
aes(x = region,
y = proportion))
base= base + scale_x_discrete(limits=regionOrd)
base= base + theme_classic()
base= ggplot(data = tableFreq,
aes(x = Region,
y = proportion))
base = base + scale_x_discrete(limits=regionOrd)
plot1 = base + geom_bar(fill ="blue",
stat = 'identity') +
coord_flip()
plot1

titleText='Distribution of countries by World Bank Regions'
sourceText='Source: World Bank'
plot2 = plot1 + labs(title=titleText,
x =NULL,
y = NULL,
caption = sourceText)
plot2

plot3 = plot2 + geom_hline(yintercept = 5.88, #where
linetype="dashed",
size=1, #thickness
alpha=0.5) #transparency
plot3

plot4 = plot3 + scale_y_continuous(breaks=c(0,2,4,6,8,10,12),
limits = c(0,13),
labels=unit_format(suffix = '%'))
plot4

plot5 = plot4 + theme(plot.caption = element_text(hjust = 1),
plot.title = element_text(hjust = 0.5))
plot5

plot6 = plot5 + geom_text(vjust=0.5,hjust=0, #hjust if flipping
size = 3,
aes(y = proportion ,
label = LABELS))
plot6 #+ coord_flip() # wanna flip the plot?

#familiar
base = ggplot(tableFreq, aes(x=Region,proportion)) + theme_classic()
base = base + scale_x_discrete(limits=regionOrd)
#new:geom_segment
lolliplot1=base +coord_flip()+ geom_segment(aes(y = 0,
x = Region,
yend = proportion,
xend = Region),
color = "grey50")
lolliplot1

lolliplot2=lolliplot1 + geom_point()
lolliplot2

tableFreq$gap=tableFreq$proportion-5.88
tableFreq
## Region proportion gap
## 6 Australasia 1.069519 -4.810481
## 14 Latin America, Tropical 1.069519 -4.810481
## 16 North America, High Income 1.069519 -4.810481
## 3 Asia, East 1.604278 -4.275722
## 11 Latin America, Andean 1.604278 -4.275722
## 13 Latin America, Southern 1.604278 -4.275722
## 1 Asia Pacific, High Income 2.139037 -3.740963
## 4 Asia, South 3.208556 -2.671444
## 18 Sub-Saharan Africa, Central 3.208556 -2.671444
## 20 Sub-Saharan Africa, Southern 3.208556 -2.671444
## 9 Europe, Eastern 3.743316 -2.136684
## 2 Asia, Central 4.812834 -1.067166
## 12 Latin America, Central 4.812834 -1.067166
## 17 Oceania 4.812834 -1.067166
## 5 Asia, Southeast 6.951872 1.071872
## 8 Europe, Central 6.951872 1.071872
## 7 Caribbean 8.021390 2.141390
## 19 Sub-Saharan Africa, East 8.021390 2.141390
## 15 North Africa / Middle East 10.160428 4.280428
## 21 Sub-Saharan Africa, West 10.160428 4.280428
## 10 Europe, Western 11.764706 5.884706
base = ggplot(tableFreq, aes(x=Region,
y=gap)) #change
base = base + theme_classic()+coord_flip()
base = base + scale_x_discrete(limits=regionOrd)
lolliplot1=base + geom_segment(aes(y = 0,
yend = gap, #change
x = Region,
xend = Region),
color = "gray")
lolliplot2 = lolliplot1 + geom_point()
lolliplot2
