This R notebook explores the World Data 2023 dataset downloaded from Kaggle.com.
Global Country Information Dataset 2023 (kaggle.com) https://www.kaggle.com/datasets/nelgiriyewithana/countries-of-the-world-2023
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)
library(ggplot2)
library(tmap)
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
library(tmaptools)
library(sf)
## Linking to GEOS 3.11.2, GDAL 3.7.2, PROJ 9.3.0; sf_use_s2() is TRUE
library(geojsonsf)
library(rjson)
library(sp)
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
options(scipen = 999)
options(repr.plot.width = 12, repr.plot.height = 6)
world.data <- read.csv(file = "world-data-2023.csv", header = TRUE)
head(world.data, 20)
## Country Density.P.Km2 Abbreviation Agricultural.Land.Pct
## 1 Afghanistan 60 AF 58.10%
## 2 Albania 105 AL 43.10%
## 3 Algeria 18 DZ 17.40%
## 4 Andorra 164 AD 40.00%
## 5 Angola 26 AO 47.50%
## 6 Antigua and Barbuda 223 AG 20.50%
## 7 Argentina 17 AR 54.30%
## 8 Armenia 104 AM 58.90%
## 9 Australia 3 AU 48.20%
## 10 Austria 109 AT 32.40%
## 11 Azerbaijan 123 AZ 57.70%
## 12 The Bahamas 39 BS 1.40%
## 13 Bahrain 2,239 BH 11.10%
## 14 Bangladesh 1,265 BD 70.60%
## 15 Barbados 668 BB 23.30%
## 16 Belarus 47 BY 42.00%
## 17 Belgium 383 BE 44.60%
## 18 Belize 17 BZ 7.00%
## 19 Benin 108 BJ 33.30%
## 20 Bhutan 20 BT 13.60%
## Land.Area.Km2 Armed.Forces.Size Birth.Rate Calling.Code
## 1 652,230 323,000 32.49 93
## 2 28,748 9,000 11.78 355
## 3 2,381,741 317,000 24.28 213
## 4 468 7.20 376
## 5 1,246,700 117,000 40.73 244
## 6 443 0 15.33 1
## 7 2,780,400 105,000 17.02 54
## 8 29,743 49,000 13.99 374
## 9 7,741,220 58,000 12.60 61
## 10 83,871 21,000 9.70 43
## 11 86,600 82,000 14.00 994
## 12 13,880 1,000 13.97 1
## 13 765 19,000 13.99 973
## 14 148,460 221,000 18.18 880
## 15 430 1,000 10.65 1
## 16 207,600 155,000 9.90 375
## 17 30,528 32,000 10.30 32
## 18 22,966 2,000 20.79 501
## 19 112,622 12,000 36.22 229
## 20 38,394 6,000 17.26 975
## Capital.MajorCity Co2.Emissions CPI CPI.Change.Pct Currency.Code
## 1 Kabul 8,672 149.9 2.30% AFN
## 2 Tirana 4,536 119.05 1.40% ALL
## 3 Algiers 150,006 151.36 2.00% DZD
## 4 Andorra la Vella 469 EUR
## 5 Luanda 34,693 261.73 17.10% AOA
## 6 St. John's, Saint John 557 113.81 1.20% XCD
## 7 Buenos Aires 201,348 232.75 53.50% ARS
## 8 Yerevan 5,156 129.18 1.40% AMD
## 9 Canberra 375,908 119.8 1.60% AUD
## 10 Vienna 61,448 118.06 1.50% EUR
## 11 Baku 37,620 156.32 2.60% AZN
## 12 Nassau, Bahamas 1,786 116.22 2.50%
## 13 Manama 31,694 117.59 2.10% BHD
## 14 Dhaka 84,246 179.68 5.60% BDT
## 15 Bridgetown 1,276 134.09 4.10% BBD
## 16 Minsk 58,280 5.60% BYN
## 17 City of Brussels 96,889 117.11 1.40% EUR
## 18 Belmopan 568 105.68 -0.90% BZD
## 19 Porto-Novo 6,476 110.71 -0.90% XOF
## 20 Thimphu 1,261 167.18 2.70%
## Fertility.Rate Forested.Area.Pct Gasoline.Price GDP
## 1 4.47 2.10% $0.70 $19,101,353,833
## 2 1.62 28.10% $1.36 $15,278,077,447
## 3 3.02 0.80% $0.28 $169,988,236,398
## 4 1.27 34.00% $1.51 $3,154,057,987
## 5 5.52 46.30% $0.97 $94,635,415,870
## 6 1.99 22.30% $0.99 $1,727,759,259
## 7 2.26 9.80% $1.10 $449,663,446,954
## 8 1.76 11.70% $0.77 $13,672,802,158
## 9 1.74 16.30% $0.93 $1,392,680,589,329
## 10 1.47 46.90% $1.20 $446,314,739,528
## 11 1.73 14.10% $0.56 $39,207,000,000
## 12 1.75 51.40% $0.92 $12,827,000,000
## 13 1.99 0.80% $0.43 $38,574,069,149
## 14 2.04 11.00% $1.12 $302,571,254,131
## 15 1.62 14.70% $1.81 $5,209,000,000
## 16 1.45 42.60% $0.60 $63,080,457,023
## 17 1.62 22.60% $1.43 $529,606,710,418
## 18 2.31 59.70% $1.13 $1,879,613,600
## 19 4.84 37.80% $0.72 $14,390,709,095
## 20 1.98 72.50% $0.98 $2,446,674,101
## Gross.primary.education.enrollment.pct
## 1 104.00%
## 2 107.00%
## 3 109.90%
## 4 106.40%
## 5 113.50%
## 6 105.00%
## 7 109.70%
## 8 92.70%
## 9 100.30%
## 10 103.10%
## 11 99.70%
## 12 81.40%
## 13 99.40%
## 14 116.50%
## 15 99.40%
## 16 100.50%
## 17 103.90%
## 18 111.70%
## 19 122.00%
## 20 100.10%
## Gross.tertiary.education.enrollment.pct Infant.mortality
## 1 9.70% 47.9
## 2 55.00% 7.8
## 3 51.40% 20.1
## 4 2.7
## 5 9.30% 51.6
## 6 24.80% 5.0
## 7 90.00% 8.8
## 8 54.60% 11.0
## 9 113.10% 3.1
## 10 85.10% 2.9
## 11 27.70% 19.2
## 12 15.10% 8.3
## 13 50.50% 6.1
## 14 20.60% 25.1
## 15 65.40% 11.3
## 16 87.40% 2.6
## 17 79.70% 2.9
## 18 24.70% 11.2
## 19 12.30% 60.5
## 20 15.60% 24.8
## Largest.city Life.expectancy Maternal.mortality.ratio Minimum.wage
## 1 Kabul 64.5 638 $0.43
## 2 Tirana 78.5 15 $1.12
## 3 Algiers 76.7 112 $0.95
## 4 Andorra la Vella NA NA $6.63
## 5 Luanda 60.8 241 $0.71
## 6 St. John's, Saint John 76.9 42 $3.04
## 7 Buenos Aires 76.5 39 $3.35
## 8 Yerevan 74.9 26 $0.66
## 9 Sydney 82.7 6 $13.59
## 10 Vienna 81.6 5
## 11 Baku 72.9 26 $0.47
## 12 Nassau, Bahamas 73.8 70 $5.25
## 13 Riffa 77.2 14
## 14 Dhaka 72.3 173 $0.51
## 15 Bridgetown 79.1 27 $3.13
## 16 Minsk 74.2 2 $1.49
## 17 Brussels 81.6 5 $10.31
## 18 Belize City 74.5 36 $1.65
## 19 Cotonou 61.5 397 $0.39
## 20 Thimphu 71.5 183 $0.32
## Official.language Out.of.pocket.health.expenditure
## 1 Pashto 78.40%
## 2 Albanian 56.90%
## 3 Arabic 28.10%
## 4 Catalan 36.40%
## 5 Portuguese 33.40%
## 6 English 24.30%
## 7 Spanish 17.60%
## 8 Armenian 81.60%
## 9 None 19.60%
## 10 German 17.90%
## 11 Azerbaijani language 78.60%
## 12 English 27.80%
## 13 Arabic 25.10%
## 14 Bengali 71.80%
## 15 English 45.20%
## 16 Russian 34.50%
## 17 French 17.60%
## 18 English 22.70%
## 19 French 40.50%
## 20 Dzongkha 19.80%
## Physicians.per.thousand Population
## 1 0.28 38,041,754
## 2 1.20 2,854,191
## 3 1.72 43,053,054
## 4 3.33 77,142
## 5 0.21 31,825,295
## 6 2.76 97,118
## 7 3.96 44,938,712
## 8 4.40 2,957,731
## 9 3.68 25,766,605
## 10 5.17 8,877,067
## 11 3.45 10,023,318
## 12 1.94 389,482
## 13 0.93 1,501,635
## 14 0.58 167,310,838
## 15 2.48 287,025
## 16 5.19 9,466,856
## 17 3.07 11,484,055
## 18 1.12 390,353
## 19 0.08 11,801,151
## 20 0.42 727,145
## Population..Labor.force.participation.... Tax.revenue.... Total.tax.rate
## 1 48.90% 9.30% 71.40%
## 2 55.70% 18.60% 36.60%
## 3 41.20% 37.20% 66.10%
## 4
## 5 77.50% 9.20% 49.10%
## 6 16.50% 43.00%
## 7 61.30% 10.10% 106.30%
## 8 55.60% 20.90% 22.60%
## 9 65.50% 23.00% 47.40%
## 10 60.70% 25.40% 51.40%
## 11 66.50% 13.00% 40.70%
## 12 74.60% 14.80% 33.80%
## 13 73.40% 4.20% 13.80%
## 14 59.00% 8.80% 33.40%
## 15 65.20% 27.50% 35.60%
## 16 64.10% 14.70% 53.30%
## 17 53.60% 24.00% 55.40%
## 18 65.10% 26.30% 31.10%
## 19 70.90% 10.80% 48.90%
## 20 66.70% 16.00% 35.30%
## Unemployment.rate Urban_population Latitude Longitude
## 1 11.12% 9,797,273 33.93911 67.709953
## 2 12.33% 1,747,593 41.15333 20.168331
## 3 11.70% 31,510,100 28.03389 1.659626
## 4 67,873 42.50628 1.521801
## 5 6.89% 21,061,025 -11.20269 17.873887
## 6 23,800 17.06082 -61.796428
## 7 9.79% 41,339,571 -38.41610 -63.616672
## 8 16.99% 1,869,848 40.06910 45.038189
## 9 5.27% 21,844,756 -25.27440 133.775136
## 10 4.67% 5,194,416 47.51623 14.550072
## 11 5.51% 5,616,165 40.14310 47.576927
## 12 10.36% 323,784 25.03428 -77.396280
## 13 0.71% 1,467,109 26.06670 50.557700
## 14 4.19% 60,987,417 23.68499 90.356331
## 15 10.33% 89,431 13.19389 -59.543198
## 16 4.59% 7,482,982 53.70981 27.953389
## 17 5.59% 11,259,082 50.50389 4.469936
## 18 6.41% 179,039 17.18988 -88.497650
## 19 2.23% 5,648,149 9.30769 2.315834
## 20 2.34% 317,538 27.51416 90.433601
countries <- fromJSON(file = "world-ash-ms.geojson")
countries <- geojson_sf(toJSON(countries))
countries <- subset(countries, st_is_valid(geometry) & name != "Antarctica")
countries[countries$continent == "Seven seas (open ocean)", c("continent")] <-
"Seven Seas"
head(countries[, c("continent", "admin", "iso_a2", "geometry")])
## Simple feature collection with 6 features and 3 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: -87.67017 ymin: 8.070654 xmax: -63.00942 ymax: 20.09365
## Geodetic CRS: WGS 84
## continent admin iso_a2 geometry
## 1 North America Costa Rica CR POLYGON ((-82.56357 9.57666...
## 2 North America Nicaragua NI POLYGON ((-83.15752 14.9930...
## 3 North America Saint Martin MF POLYGON ((-63.01118 18.0689...
## 4 North America Sint Maarten SX POLYGON ((-63.12305 18.0689...
## 5 North America Haiti HT MULTIPOLYGON (((-71.77925 1...
## 6 North America Dominican Republic DO POLYGON ((-71.76831 18.0391...
world.data$Agricultural.Land.Pct <-
as.numeric(str_replace(world.data$Agricultural.Land.Pct, "%", ""))
world.data$Land.Area.Km2 <-
as.numeric(str_replace_all(world.data$Land.Area.Km2, ",", "")) / 100000
chart.data <- world.data[, c("Agricultural.Land.Pct", "Land.Area.Km2")]
head(chart.data, 10)
## Agricultural.Land.Pct Land.Area.Km2
## 1 58.1 6.52230
## 2 43.1 0.28748
## 3 17.4 23.81741
## 4 40.0 0.00468
## 5 47.5 12.46700
## 6 20.5 0.00443
## 7 54.3 27.80400
## 8 58.9 0.29743
## 9 48.2 77.41220
## 10 32.4 0.83871
Perform a histogram on the agricultural land percentage for all countries.
hist(chart.data$Agricultural.Land.Pct, col = "royalblue",
xlim = c(0, 100),
main = "Histogram of Agrcultural Land Percentage",
xlab = "Percentage")
Compare the agricultural land and percentage used.
plot(Agricultural.Land.Pct ~ Land.Area.Km2,
data = chart.data,
col = "royalblue",
pch = 19,
xlab = "Land Area (100K km2)",
ylab = "Agricultural Land %",
main = "Agricultural Land / Area Correlation")
grid(nx = NULL, ny = NULL)
chart.data <- world.data[, c("Abbreviation", "Agricultural.Land.Pct")] %>%
merge(countries[, c("iso_a2", "admin", "continent")], .,
by.x = "iso_a2", by.y = "Abbreviation")
plot(Agricultural.Land.Pct ~ as.factor(continent),
data = chart.data,
col = "royalblue",
ylim = c(0, 100),
ylab = "Percentage",
xlab = "",
main = "Agricultural Land Percentage by Continent",
cex.axis = 0.8)
chart.data <- world.data[, c("Abbreviation",
"Agricultural.Land.Pct",
"Land.Area.Km2")] %>%
mutate(Ag.Land.Km2 = Agricultural.Land.Pct * Land.Area.Km2) %>%
merge(countries[, c("iso_a2", "admin", "continent")], .,
by.x = "iso_a2", by.y = "Abbreviation") %>%
subset(., Ag.Land.Km2 > 0)
plot(Ag.Land.Km2 ~ as.factor(continent),
data = chart.data,
col = "darkgreen",
ylab = "100K (km2)",
xlab = "",
main = "Agricultural Land Area by Continent",
cex.axis = 0.8)
# Create stacked barchart of Land Area (Agricultural vs Other)
chart.data <-
world.data[, c("Country", "Land.Area.Km2", "Agricultural.Land.Pct")] %>%
mutate(Agricultural =
round(Land.Area.Km2 * (Agricultural.Land.Pct/100), 5),
Other =
Land.Area.Km2 - Agricultural
) %>%
arrange(desc(Agricultural)) %>%
head(20)
chart.matrix <-
pivot_longer(
chart.data[, c("Country", "Agricultural", "Other")],
cols = c("Agricultural", "Other"),
names_to = "Land.Category", values_to = "Land.Area"
) %>%
pivot_wider(., names_from = "Country", values_from = "Land.Area") %>%
subset(., select = -c(Land.Category)) %>%
as.matrix()
rownames(chart.matrix) <- c("Agricultural", "Other")
as.data.frame(chart.matrix)
## China United States Australia Brazil Russia Kazakhstan
## Agricultural 53.93492 43.66082 37.31268 28.86846 22.74066 21.9082
## Other 42.03468 54.67435 40.09952 56.28924 148.24174 5.3408
## India Saudi Arabia Argentina Mongolia Mexico South Africa
## Agricultural 19.85507 17.3695 15.09757 11.18343 10.72549 9.72834
## Other 13.01756 4.1274 12.70643 4.45773 8.91826 2.46256
## Nigeria Canada Indonesia Angola Sudan Chad Mozambique
## Agricultural 7.17768 6.88942 5.99939 5.92182 5.34246 5.09748 5.07606
## Other 2.06000 92.95728 13.04630 6.54518 13.27238 7.74252 2.91774
## Iran
## Agricultural 4.64791
## Other 11.83404
colors <- c("darkgreen", "brown")
barplot(chart.matrix,
col = colors,
cex.names = 0.75,
names.arg = colnames(as.data.frame(chart.matrix)),
las = 2,
ylim = c(0, 200),
ylab = "100K (km2)",
axes = TRUE)
title("Land Use by Category\nTop 20 Countries by Highest Agricultural Land Percentage")
legend(x = 19, y = 190,
legend = rownames(chart.matrix),
col = colors, lwd = 4, cex = 0.8)
grid(nx = NULL, ny = NULL)
Perform some spatial analysis by joining to the countries data frame.
map.data <- world.data[, c("Abbreviation", "Agricultural.Land.Pct")] %>%
merge(countries[, c("iso_a2", "admin")], .,
by.x = "iso_a2", by.y = "Abbreviation")
tm_shape(countries) +
tm_fill(col = "darkgray") +
tm_shape(map.data) +
tm_fill(col = "Agricultural.Land.Pct", title = "Ag Land %",
palette = "Spectral") +
tm_layout(main.title = "Agricultural Land Percentage by Country",
main.title.position = "center",
main.title.size = 1.2,
legend.position = c("left", "bottom"),
legend.title.size = 0.9)
map.data <- world.data[, c("Abbreviation", "Land.Area.Km2")] %>%
merge(countries[, c("iso_a2", "admin")], .,
by.x = "iso_a2", by.y = "Abbreviation")
tm_shape(countries) +
tm_fill(col = "darkgray") +
tm_shape(map.data) +
tm_fill(col = "Land.Area.Km2", title = "100K km2",
palette = "Spectral") +
tm_layout(main.title = "Land Area by Country",
main.title.position = "center",
main.title.size = 1.2,
legend.position = c("left", "bottom"),
legend.title.size = 0.9)
map.data <- world.data[, c("Abbreviation",
"Land.Area.Km2",
"Agricultural.Land.Pct")] %>%
merge(countries[, c("iso_a2", "admin")], .,
by.x = "iso_a2", by.y = "Abbreviation") %>%
mutate(Agricultural.Land.Km2 =
Land.Area.Km2 * (Agricultural.Land.Pct / 100)
)
tm_shape(countries) +
tm_fill(col = "darkgray") +
tm_shape(map.data) +
tm_fill(col = "Agricultural.Land.Km2", title = "100K km2",
palette = "Spectral") +
tm_layout(main.title = "Agricultural Land Area by Country",
main.title.position = "center",
main.title.size = 1.2,
legend.position = c("left", "bottom"),
legend.title.size = 0.9)