Data Reference Link: data.worldbank.org
library(dplyr)
library(downloader)
library(stringr)
library(htmlTable)
library(tidyverse)
# Import the data from the csv file
gdp_data <- read.csv("https://raw.githubusercontent.com/baruab/msdsrepo/main/DATA-607/GDP_by_country.csv", stringsAsFactors = FALSE)
head(gdp_data, 5)
## ï..Country.Name Country.Code Indicator.Name
## 1 Aruba ABW GDP growth (annual %)
## 2 Africa Eastern and Southern AFE GDP growth (annual %)
## 3 Afghanistan AFG GDP growth (annual %)
## 4 Africa Western and Central AFW GDP growth (annual %)
## 5 Angola AGO GDP growth (annual %)
## Indicator.Code X1960 X1961 X1962 X1963 X1964 X1965
## 1 NY.GDP.MKTP.KD.ZG NA NA NA NA NA NA
## 2 NY.GDP.MKTP.KD.ZG NA 1.063696 7.453563 5.740520 5.473950 5.594137
## 3 NY.GDP.MKTP.KD.ZG NA NA NA NA NA NA
## 4 NY.GDP.MKTP.KD.ZG NA 1.898596 3.816073 7.040888 5.233236 4.175162
## 5 NY.GDP.MKTP.KD.ZG NA NA NA NA NA NA
## X1966 X1967 X1968 X1969 X1970 X1971 X1972 X1973
## 1 NA NA NA NA NA NA NA NA
## 2 4.058715 5.813018 4.046609 5.178724 4.855967 5.100963 2.203884 4.458723
## 3 NA NA NA NA NA NA NA NA
## 4 -1.796436 -9.401674 1.422819 15.107822 17.618959 10.628708 3.212971 4.100759
## 5 NA NA NA NA NA NA NA NA
## X1974 X1975 X1976 X1977 X1978 X1979 X1980 X1981
## 1 NA NA NA NA NA NA NA NA
## 2 5.839322 1.421272 2.444660 0.7431271 1.647541 3.174390 5.707294 4.181481
## 3 NA NA NA NA NA NA NA NA
## 4 10.533280 -1.908240 8.773445 4.3362697 -2.555202 5.192982 2.278913 -6.635271
## 5 NA NA NA NA NA NA NA -4.400001
## X1982 X1983 X1984 X1985 X1986 X1987 X1988
## 1 NA NA NA NA NA 16.078431 18.648649
## 2 0.2044245 -0.1678771 3.599962 -0.3106704 1.801654 3.626758 4.244825
## 3 NA NA NA NA NA NA NA
## 4 -3.0906407 -6.1737644 0.766286 5.4873474 1.338868 1.205109 4.906503
## 5 0.0000000 4.2000014 6.000002 3.4999995 2.900002 4.082749 6.128890
## X1989 X1990 X1991 X1992 X1993 X1994
## 1 12.12984055 3.96140173 7.96287250 5.882353 7.3076923 8.2039028
## 2 2.64672228 0.05297704 -0.08690589 -2.155483 -0.6660328 2.0872612
## 3 NA NA NA NA NA NA
## 4 2.32247485 6.43720769 1.22080653 2.683972 -1.1609721 -0.2260965
## 5 0.04162146 -3.45009868 0.99135930 -5.838281 -23.9834174 1.3393634
## X1995 X1996 X1997 X1998 X1999 X2000 X2001 X2002
## 1 2.547144 1.185788 7.046874 1.991986 1.238042 7.616588 -2.971257 -3.273646
## 2 4.308948 5.410609 3.433427 1.657682 2.672356 3.407952 3.385073 4.077465
## 3 NA NA NA NA NA NA NA NA
## 4 2.011852 4.596463 3.828704 3.606729 1.403042 3.611657 5.667418 9.930416
## 5 15.000000 13.544370 7.274277 4.691146 2.181490 3.054624 4.205999 13.665687
## X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1 1.975547 7.911563 1.214349 1.050608 1.800226 -0.09070805 -10.5197485
## 2 3.156648 5.423484 6.312341 6.832111 7.104249 4.79681982 1.0394013
## 3 8.832278 1.414118 11.229715 5.357403 13.826320 3.92498382 21.3905284
## 4 5.873041 8.017312 6.005428 5.257805 5.588151 6.17543276 6.1454385
## 5 2.989850 10.952862 15.028915 11.547683 14.010018 11.16613833 0.8587126
## X2010 X2011 X2012 X2013 X2014 X2015 X2016
## 1 -3.685029 3.4460548 -1.369863 4.198232 0.300000 5.7000009 2.099999586
## 2 4.809783 4.2019919 3.240976 4.470306 4.090918 2.7632428 2.004538890
## 3 14.362441 0.4263548 12.752287 5.600745 2.724543 1.4513147 2.260314201
## 4 6.641963 5.0048745 5.272612 5.831383 5.833539 2.7351660 -0.001994532
## 5 4.403933 3.4719763 8.542188 4.954545 4.822628 0.9435716 -2.580049644
## X2017 X2018 X2019 X2020 X
## 1 1.9999991 NA NA NA NA
## 2 2.8322876 2.385829 1.6729245 -3.5751725 NA
## 3 2.6470032 1.189228 3.9116034 -1.9347782 NA
## 4 2.1637475 2.831539 3.1474817 -0.9789222 NA
## 5 -0.1472129 -2.003630 -0.6246443 -4.0405100 NA
gdp_data2 <- gdp_data %>% mutate_if(is.numeric, round, digits=2)
subset_gdp_data <- gdp_data2 %>% select(-'Indicator.Name',-'Indicator.Code')
names(subset_gdp_data) <- gsub("X", "", names(subset_gdp_data))
subset_gdp_data <- subset_gdp_data %>% {colnames(.)[1] = "country"; .}
# Import the country latitude/longitude data from the csv file
country_latlng_data <- read.csv("https://raw.githubusercontent.com/baruab/msdsrepo/main/DATA-607/country_latitude_longitude.csv")
head(country_latlng_data)
## country_code latitude longitude country usa_state_code
## 1 AD 42.54624 1.601554 Andorra AK
## 2 AE 23.42408 53.847818 United Arab Emirates AL
## 3 AF 33.93911 67.709953 Afghanistan AR
## 4 AG 17.06082 -61.796428 Antigua and Barbuda AZ
## 5 AI 18.22055 -63.068615 Anguilla CA
## 6 AL 41.15333 20.168331 Albania CO
## usa_state_latitude usa_state_longitude usa_state
## 1 63.58875 -154.49306 Alaska
## 2 32.31823 -86.90230 Alabama
## 3 35.20105 -91.83183 Arkansas
## 4 34.04893 -111.09373 Arizona
## 5 36.77826 -119.41793 California
## 6 39.55005 -105.78207 Colorado
subset_latlng_data <- subset(country_latlng_data, select= c('latitude', 'longitude', 'country'))
col_order <- c("country","latitude","longitude")
new_latlng_data <- subset_latlng_data[, col_order]
joined_df <- left_join( new_latlng_data,subset_gdp_data, by ="country")
### Selecting few columns
sel_df <- select(joined_df, country,'2016', '2017', '2018', '2019' ,'2020', latitude, longitude) %>%
mutate(total = joined_df$'2016' + joined_df$'2017'+ joined_df$'2018'+ joined_df$'2019' + joined_df$'2020') %>% filter(!is.na(total))
top_10_countries <- sel_df %>% arrange(desc(total)) %>% head(10)
knitr::kable(top_10_countries, "html")
| country | 2016 | 2017 | 2018 | 2019 | 2020 | latitude | longitude | total |
|---|---|---|---|---|---|---|---|---|
| Guyana | 3.81 | 3.73 | 4.44 | 5.35 | 43.48 | 4.860416 | -58.930180 | 60.81 |
| Ethiopia | 9.43 | 9.56 | 6.82 | 8.36 | 6.06 | 9.145000 | 40.489673 | 40.23 |
| Guinea | 10.82 | 10.30 | 6.36 | 5.65 | 6.99 | 9.945587 | -9.696645 | 40.12 |
| Tajikistan | 6.90 | 7.10 | 7.60 | 7.40 | 4.50 | 38.861034 | 71.276093 | 33.50 |
| Bangladesh | 7.11 | 7.28 | 7.86 | 8.15 | 2.38 | 23.684994 | 90.356331 | 32.78 |
| Vietnam | 6.21 | 6.81 | 7.08 | 7.02 | 2.91 | 14.058324 | 108.277199 | 30.03 |
| China | 6.85 | 6.95 | 6.75 | 5.95 | 2.30 | 35.861660 | 104.195397 | 28.80 |
| Djibouti | 6.65 | 5.40 | 8.41 | 7.77 | 0.50 | 11.825138 | 42.590275 | 28.73 |
| Ireland | 1.99 | 9.13 | 8.52 | 5.57 | 3.42 | 53.412910 | -8.243890 | 28.63 |
| Tanzania | 6.87 | 6.79 | 5.44 | 5.79 | 2.00 | -6.369028 | 34.888822 | 26.89 |
# Basic histogram
ggplot(top_10_countries, aes(x=country, y=total)) + geom_bar(stat="identity") +
scale_x_discrete(guide = guide_axis(angle = 90)) + NULL
library("rnaturalearth")
library("rnaturalearthdata")
world <- ne_countries(scale = "medium", returnclass = "sf")
ggplot(data = world) +
geom_sf() +
geom_point(data = top_10_countries, aes(x = longitude, y = latitude), color = "#e60000") +
geom_text(data= top_10_countries,aes(x=longitude, y=latitude, label=country),
color = "darkblue", fontface = "bold", size = 3, check_overlap = TRUE) +
scale_fill_manual(values = c("#CCCCCC","#e60000")) +
labs(title = 'Countries with highest GDP') +
theme(text = element_text(family = "Arial", color = "#FFFFFF")
,panel.background = element_rect(fill = "#FFFFF4")
,plot.background = element_rect(fill = "#FFFFF4")
,panel.grid = element_blank()
,plot.title = element_text(size = 30)
,plot.subtitle = element_text(size = 10)
,axis.text = element_blank()
,axis.title = element_blank()
,axis.ticks = element_blank()
,legend.position = "none"
)