World Data 2023 Agricultural Analytics

This R notebook explores the World Data 2023 dataset downloaded from Kaggle.com.

Global Country Information Dataset 2023 (kaggle.com) https://www.kaggle.com/datasets/nelgiriyewithana/countries-of-the-world-2023


library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)
library(ggplot2)
library(tmap)
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
library(tmaptools)
library(sf)
## Linking to GEOS 3.11.2, GDAL 3.7.2, PROJ 9.3.0; sf_use_s2() is TRUE
library(geojsonsf)
library(rjson)
library(sp)
library(reshape2)
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
options(scipen = 999)
options(repr.plot.width = 12, repr.plot.height = 6)
world.data <- read.csv(file = "world-data-2023.csv", header = TRUE)
head(world.data, 20)
##                Country Density.P.Km2 Abbreviation Agricultural.Land.Pct
## 1          Afghanistan            60           AF                58.10%
## 2              Albania           105           AL                43.10%
## 3              Algeria            18           DZ                17.40%
## 4              Andorra           164           AD                40.00%
## 5               Angola            26           AO                47.50%
## 6  Antigua and Barbuda           223           AG                20.50%
## 7            Argentina            17           AR                54.30%
## 8              Armenia           104           AM                58.90%
## 9            Australia             3           AU                48.20%
## 10             Austria           109           AT                32.40%
## 11          Azerbaijan           123           AZ                57.70%
## 12         The Bahamas            39           BS                 1.40%
## 13             Bahrain         2,239           BH                11.10%
## 14          Bangladesh         1,265           BD                70.60%
## 15            Barbados           668           BB                23.30%
## 16             Belarus            47           BY                42.00%
## 17             Belgium           383           BE                44.60%
## 18              Belize            17           BZ                 7.00%
## 19               Benin           108           BJ                33.30%
## 20              Bhutan            20           BT                13.60%
##    Land.Area.Km2 Armed.Forces.Size Birth.Rate Calling.Code
## 1        652,230           323,000      32.49           93
## 2         28,748             9,000      11.78          355
## 3      2,381,741           317,000      24.28          213
## 4            468                         7.20          376
## 5      1,246,700           117,000      40.73          244
## 6            443                 0      15.33            1
## 7      2,780,400           105,000      17.02           54
## 8         29,743            49,000      13.99          374
## 9      7,741,220            58,000      12.60           61
## 10        83,871            21,000       9.70           43
## 11        86,600            82,000      14.00          994
## 12        13,880             1,000      13.97            1
## 13           765            19,000      13.99          973
## 14       148,460           221,000      18.18          880
## 15           430             1,000      10.65            1
## 16       207,600           155,000       9.90          375
## 17        30,528            32,000      10.30           32
## 18        22,966             2,000      20.79          501
## 19       112,622            12,000      36.22          229
## 20        38,394             6,000      17.26          975
##         Capital.MajorCity Co2.Emissions    CPI CPI.Change.Pct Currency.Code
## 1                   Kabul         8,672  149.9          2.30%           AFN
## 2                  Tirana         4,536 119.05          1.40%           ALL
## 3                 Algiers       150,006 151.36          2.00%           DZD
## 4        Andorra la Vella           469                                 EUR
## 5                  Luanda        34,693 261.73         17.10%           AOA
## 6  St. John's, Saint John           557 113.81          1.20%           XCD
## 7            Buenos Aires       201,348 232.75         53.50%           ARS
## 8                 Yerevan         5,156 129.18          1.40%           AMD
## 9                Canberra       375,908  119.8          1.60%           AUD
## 10                 Vienna        61,448 118.06          1.50%           EUR
## 11                   Baku        37,620 156.32          2.60%           AZN
## 12        Nassau, Bahamas         1,786 116.22          2.50%              
## 13                 Manama        31,694 117.59          2.10%           BHD
## 14                  Dhaka        84,246 179.68          5.60%           BDT
## 15             Bridgetown         1,276 134.09          4.10%           BBD
## 16                  Minsk        58,280                 5.60%           BYN
## 17       City of Brussels        96,889 117.11          1.40%           EUR
## 18               Belmopan           568 105.68         -0.90%           BZD
## 19             Porto-Novo         6,476 110.71         -0.90%           XOF
## 20                Thimphu         1,261 167.18          2.70%              
##    Fertility.Rate Forested.Area.Pct Gasoline.Price                 GDP
## 1            4.47             2.10%         $0.70     $19,101,353,833 
## 2            1.62            28.10%         $1.36     $15,278,077,447 
## 3            3.02             0.80%         $0.28    $169,988,236,398 
## 4            1.27            34.00%         $1.51      $3,154,057,987 
## 5            5.52            46.30%         $0.97     $94,635,415,870 
## 6            1.99            22.30%         $0.99      $1,727,759,259 
## 7            2.26             9.80%         $1.10    $449,663,446,954 
## 8            1.76            11.70%         $0.77     $13,672,802,158 
## 9            1.74            16.30%         $0.93  $1,392,680,589,329 
## 10           1.47            46.90%         $1.20    $446,314,739,528 
## 11           1.73            14.10%         $0.56     $39,207,000,000 
## 12           1.75            51.40%         $0.92     $12,827,000,000 
## 13           1.99             0.80%         $0.43     $38,574,069,149 
## 14           2.04            11.00%         $1.12    $302,571,254,131 
## 15           1.62            14.70%         $1.81      $5,209,000,000 
## 16           1.45            42.60%         $0.60     $63,080,457,023 
## 17           1.62            22.60%         $1.43    $529,606,710,418 
## 18           2.31            59.70%         $1.13      $1,879,613,600 
## 19           4.84            37.80%         $0.72     $14,390,709,095 
## 20           1.98            72.50%         $0.98      $2,446,674,101 
##    Gross.primary.education.enrollment.pct
## 1                                 104.00%
## 2                                 107.00%
## 3                                 109.90%
## 4                                 106.40%
## 5                                 113.50%
## 6                                 105.00%
## 7                                 109.70%
## 8                                  92.70%
## 9                                 100.30%
## 10                                103.10%
## 11                                 99.70%
## 12                                 81.40%
## 13                                 99.40%
## 14                                116.50%
## 15                                 99.40%
## 16                                100.50%
## 17                                103.90%
## 18                                111.70%
## 19                                122.00%
## 20                                100.10%
##    Gross.tertiary.education.enrollment.pct Infant.mortality
## 1                                    9.70%             47.9
## 2                                   55.00%              7.8
## 3                                   51.40%             20.1
## 4                                                       2.7
## 5                                    9.30%             51.6
## 6                                   24.80%              5.0
## 7                                   90.00%              8.8
## 8                                   54.60%             11.0
## 9                                  113.10%              3.1
## 10                                  85.10%              2.9
## 11                                  27.70%             19.2
## 12                                  15.10%              8.3
## 13                                  50.50%              6.1
## 14                                  20.60%             25.1
## 15                                  65.40%             11.3
## 16                                  87.40%              2.6
## 17                                  79.70%              2.9
## 18                                  24.70%             11.2
## 19                                  12.30%             60.5
## 20                                  15.60%             24.8
##              Largest.city Life.expectancy Maternal.mortality.ratio Minimum.wage
## 1                   Kabul            64.5                      638       $0.43 
## 2                  Tirana            78.5                       15       $1.12 
## 3                 Algiers            76.7                      112       $0.95 
## 4        Andorra la Vella              NA                       NA       $6.63 
## 5                  Luanda            60.8                      241       $0.71 
## 6  St. John's, Saint John            76.9                       42       $3.04 
## 7            Buenos Aires            76.5                       39       $3.35 
## 8                 Yerevan            74.9                       26       $0.66 
## 9                  Sydney            82.7                        6      $13.59 
## 10                 Vienna            81.6                        5             
## 11                   Baku            72.9                       26       $0.47 
## 12        Nassau, Bahamas            73.8                       70       $5.25 
## 13                  Riffa            77.2                       14             
## 14                  Dhaka            72.3                      173       $0.51 
## 15             Bridgetown            79.1                       27       $3.13 
## 16                  Minsk            74.2                        2       $1.49 
## 17               Brussels            81.6                        5      $10.31 
## 18            Belize City            74.5                       36       $1.65 
## 19                Cotonou            61.5                      397       $0.39 
## 20                Thimphu            71.5                      183       $0.32 
##       Official.language Out.of.pocket.health.expenditure
## 1                Pashto                           78.40%
## 2              Albanian                           56.90%
## 3                Arabic                           28.10%
## 4               Catalan                           36.40%
## 5            Portuguese                           33.40%
## 6               English                           24.30%
## 7               Spanish                           17.60%
## 8              Armenian                           81.60%
## 9                  None                           19.60%
## 10               German                           17.90%
## 11 Azerbaijani language                           78.60%
## 12              English                           27.80%
## 13               Arabic                           25.10%
## 14              Bengali                           71.80%
## 15              English                           45.20%
## 16              Russian                           34.50%
## 17               French                           17.60%
## 18              English                           22.70%
## 19               French                           40.50%
## 20             Dzongkha                           19.80%
##    Physicians.per.thousand  Population
## 1                     0.28  38,041,754
## 2                     1.20   2,854,191
## 3                     1.72  43,053,054
## 4                     3.33      77,142
## 5                     0.21  31,825,295
## 6                     2.76      97,118
## 7                     3.96  44,938,712
## 8                     4.40   2,957,731
## 9                     3.68  25,766,605
## 10                    5.17   8,877,067
## 11                    3.45  10,023,318
## 12                    1.94     389,482
## 13                    0.93   1,501,635
## 14                    0.58 167,310,838
## 15                    2.48     287,025
## 16                    5.19   9,466,856
## 17                    3.07  11,484,055
## 18                    1.12     390,353
## 19                    0.08  11,801,151
## 20                    0.42     727,145
##    Population..Labor.force.participation.... Tax.revenue.... Total.tax.rate
## 1                                     48.90%           9.30%         71.40%
## 2                                     55.70%          18.60%         36.60%
## 3                                     41.20%          37.20%         66.10%
## 4                                                                          
## 5                                     77.50%           9.20%         49.10%
## 6                                                     16.50%         43.00%
## 7                                     61.30%          10.10%        106.30%
## 8                                     55.60%          20.90%         22.60%
## 9                                     65.50%          23.00%         47.40%
## 10                                    60.70%          25.40%         51.40%
## 11                                    66.50%          13.00%         40.70%
## 12                                    74.60%          14.80%         33.80%
## 13                                    73.40%           4.20%         13.80%
## 14                                    59.00%           8.80%         33.40%
## 15                                    65.20%          27.50%         35.60%
## 16                                    64.10%          14.70%         53.30%
## 17                                    53.60%          24.00%         55.40%
## 18                                    65.10%          26.30%         31.10%
## 19                                    70.90%          10.80%         48.90%
## 20                                    66.70%          16.00%         35.30%
##    Unemployment.rate Urban_population  Latitude  Longitude
## 1             11.12%        9,797,273  33.93911  67.709953
## 2             12.33%        1,747,593  41.15333  20.168331
## 3             11.70%       31,510,100  28.03389   1.659626
## 4                              67,873  42.50628   1.521801
## 5              6.89%       21,061,025 -11.20269  17.873887
## 6                              23,800  17.06082 -61.796428
## 7              9.79%       41,339,571 -38.41610 -63.616672
## 8             16.99%        1,869,848  40.06910  45.038189
## 9              5.27%       21,844,756 -25.27440 133.775136
## 10             4.67%        5,194,416  47.51623  14.550072
## 11             5.51%        5,616,165  40.14310  47.576927
## 12            10.36%          323,784  25.03428 -77.396280
## 13             0.71%        1,467,109  26.06670  50.557700
## 14             4.19%       60,987,417  23.68499  90.356331
## 15            10.33%           89,431  13.19389 -59.543198
## 16             4.59%        7,482,982  53.70981  27.953389
## 17             5.59%       11,259,082  50.50389   4.469936
## 18             6.41%          179,039  17.18988 -88.497650
## 19             2.23%        5,648,149   9.30769   2.315834
## 20             2.34%          317,538  27.51416  90.433601
countries <- fromJSON(file = "world-ash-ms.geojson")

countries <- geojson_sf(toJSON(countries))

countries <- subset(countries, st_is_valid(geometry) & name != "Antarctica")
countries[countries$continent == "Seven seas (open ocean)", c("continent")] <-
    "Seven Seas"
head(countries[, c("continent", "admin", "iso_a2", "geometry")])
## Simple feature collection with 6 features and 3 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -87.67017 ymin: 8.070654 xmax: -63.00942 ymax: 20.09365
## Geodetic CRS:  WGS 84
##       continent              admin iso_a2                       geometry
## 1 North America         Costa Rica     CR POLYGON ((-82.56357 9.57666...
## 2 North America          Nicaragua     NI POLYGON ((-83.15752 14.9930...
## 3 North America       Saint Martin     MF POLYGON ((-63.01118 18.0689...
## 4 North America       Sint Maarten     SX POLYGON ((-63.12305 18.0689...
## 5 North America              Haiti     HT MULTIPOLYGON (((-71.77925 1...
## 6 North America Dominican Republic     DO POLYGON ((-71.76831 18.0391...
world.data$Agricultural.Land.Pct <- 
    as.numeric(str_replace(world.data$Agricultural.Land.Pct, "%", ""))
world.data$Land.Area.Km2 <- 
    as.numeric(str_replace_all(world.data$Land.Area.Km2, ",", "")) / 100000
chart.data <- world.data[, c("Agricultural.Land.Pct", "Land.Area.Km2")]
head(chart.data, 10)
##    Agricultural.Land.Pct Land.Area.Km2
## 1                   58.1       6.52230
## 2                   43.1       0.28748
## 3                   17.4      23.81741
## 4                   40.0       0.00468
## 5                   47.5      12.46700
## 6                   20.5       0.00443
## 7                   54.3      27.80400
## 8                   58.9       0.29743
## 9                   48.2      77.41220
## 10                  32.4       0.83871

Distribution Analysis

Perform a histogram on the agricultural land percentage for all countries.

hist(chart.data$Agricultural.Land.Pct, col = "royalblue",
     xlim = c(0, 100),
     main = "Histogram of Agrcultural Land Percentage",
     xlab = "Percentage")

Comparative Variable Analysis

Compare the agricultural land and percentage used.

plot(Agricultural.Land.Pct ~ Land.Area.Km2, 
     data = chart.data,
     col = "royalblue",
     pch = 19,
     xlab = "Land Area (100K km2)",
     ylab = "Agricultural Land %",
     main = "Agricultural Land / Area Correlation")
grid(nx = NULL, ny = NULL)

chart.data <- world.data[, c("Abbreviation", "Agricultural.Land.Pct")] %>%
    merge(countries[, c("iso_a2", "admin", "continent")], ., 
          by.x = "iso_a2", by.y = "Abbreviation")

plot(Agricultural.Land.Pct ~ as.factor(continent),
     data = chart.data,
     col = "royalblue",
     ylim = c(0, 100),
     ylab = "Percentage",
     xlab = "",
     main = "Agricultural Land Percentage by Continent",
     cex.axis = 0.8)

chart.data <- world.data[, c("Abbreviation", 
                             "Agricultural.Land.Pct",
                             "Land.Area.Km2")] %>%
    mutate(Ag.Land.Km2 = Agricultural.Land.Pct * Land.Area.Km2) %>%
    merge(countries[, c("iso_a2", "admin", "continent")], ., 
          by.x = "iso_a2", by.y = "Abbreviation") %>%
    subset(., Ag.Land.Km2 > 0)

plot(Ag.Land.Km2 ~ as.factor(continent),
     data = chart.data,
     col = "darkgreen",
     ylab = "100K (km2)",
     xlab = "",
     main = "Agricultural Land Area by Continent",
     cex.axis = 0.8)

# Create stacked barchart of Land Area (Agricultural vs Other)

chart.data <- 
    world.data[, c("Country", "Land.Area.Km2", "Agricultural.Land.Pct")] %>%
    mutate(Agricultural = 
               round(Land.Area.Km2 * (Agricultural.Land.Pct/100), 5),
           Other = 
               Land.Area.Km2 - Agricultural
    ) %>%
    arrange(desc(Agricultural)) %>%
    head(20)

chart.matrix <-
    pivot_longer(
        chart.data[, c("Country", "Agricultural", "Other")],
        cols = c("Agricultural", "Other"),
        names_to = "Land.Category", values_to = "Land.Area"
    ) %>%
    pivot_wider(., names_from = "Country", values_from = "Land.Area") %>%
    subset(., select = -c(Land.Category)) %>%
    as.matrix()

rownames(chart.matrix) <- c("Agricultural", "Other")
as.data.frame(chart.matrix)
##                 China United States Australia   Brazil    Russia Kazakhstan
## Agricultural 53.93492      43.66082  37.31268 28.86846  22.74066    21.9082
## Other        42.03468      54.67435  40.09952 56.28924 148.24174     5.3408
##                 India Saudi Arabia Argentina Mongolia   Mexico South Africa
## Agricultural 19.85507      17.3695  15.09757 11.18343 10.72549      9.72834
## Other        13.01756       4.1274  12.70643  4.45773  8.91826      2.46256
##              Nigeria   Canada Indonesia  Angola    Sudan    Chad Mozambique
## Agricultural 7.17768  6.88942   5.99939 5.92182  5.34246 5.09748    5.07606
## Other        2.06000 92.95728  13.04630 6.54518 13.27238 7.74252    2.91774
##                  Iran
## Agricultural  4.64791
## Other        11.83404
colors <- c("darkgreen", "brown")

barplot(chart.matrix, 
        col = colors,
        cex.names = 0.75,
        names.arg = colnames(as.data.frame(chart.matrix)),
        las = 2,
        ylim = c(0, 200),
        ylab = "100K (km2)",
        axes = TRUE)
title("Land Use by Category\nTop 20 Countries by Highest Agricultural Land Percentage")
legend(x = 19, y = 190, 
       legend = rownames(chart.matrix),
       col = colors, lwd = 4, cex = 0.8)
grid(nx = NULL, ny = NULL)

Spatial Analytics

Perform some spatial analysis by joining to the countries data frame.

map.data <- world.data[, c("Abbreviation", "Agricultural.Land.Pct")] %>%
    merge(countries[, c("iso_a2", "admin")], ., 
          by.x = "iso_a2", by.y = "Abbreviation")

tm_shape(countries) +
    tm_fill(col = "darkgray") +
    tm_shape(map.data) +
    tm_fill(col = "Agricultural.Land.Pct", title = "Ag Land %",
            palette = "Spectral") +
    tm_layout(main.title = "Agricultural Land Percentage by Country",
              main.title.position = "center",
              main.title.size = 1.2,
              legend.position = c("left", "bottom"),
              legend.title.size = 0.9)

map.data <- world.data[, c("Abbreviation", "Land.Area.Km2")] %>%
    merge(countries[, c("iso_a2", "admin")], ., 
          by.x = "iso_a2", by.y = "Abbreviation")

tm_shape(countries) +
    tm_fill(col = "darkgray") +
    tm_shape(map.data) +
    tm_fill(col = "Land.Area.Km2", title = "100K km2",
            palette = "Spectral") +
    tm_layout(main.title = "Land Area by Country",
              main.title.position = "center",
              main.title.size = 1.2,
              legend.position = c("left", "bottom"),
              legend.title.size = 0.9)

map.data <- world.data[, c("Abbreviation", 
                           "Land.Area.Km2",
                           "Agricultural.Land.Pct")] %>%
    merge(countries[, c("iso_a2", "admin")], ., 
          by.x = "iso_a2", by.y = "Abbreviation") %>%
    mutate(Agricultural.Land.Km2 = 
               Land.Area.Km2 * (Agricultural.Land.Pct / 100)
    )

tm_shape(countries) +
    tm_fill(col = "darkgray") +
    tm_shape(map.data) +
    tm_fill(col = "Agricultural.Land.Km2", title = "100K km2",
            palette = "Spectral") +
    tm_layout(main.title = "Agricultural Land Area by Country",
              main.title.position = "center",
              main.title.size = 1.2,
              legend.position = c("left", "bottom"),
              legend.title.size = 0.9)