Converting country code/name using countrycode package

Add ISO-3 country code to the list of countries used in analysis

library(here)
library(tidyverse)
library(data.table)
library(states)
library(countrycode)

The list of countries used in the analysis is included in the country_included.csv file in the raw data folder.

countries <- read.csv(here("raw data", "country_included.csv"), header = TRUE)
head(countries, 20)
          country_name
1          Afghanistan
2              Albania
3              Algeria
4              Andorra
5               Angola
6  Antigua and Barbuda
7            Argentina
8              Armenia
9            Australia
10             Austria
11          Azerbaijan
12             Bahrain
13          Bangladesh
14            Barbados
15             Belarus
16             Belgium
17              Belize
18               Benin
19              Bhutan
20             Bolivia
tail(countries, 20)
            country_name
167          Timor-Leste
168                 Togo
169                Tonga
170  Trinidad and Tobago
171              Tunisia
172               Turkey
173         Turkmenistan
174               Uganda
175              Ukraine
176 United Arab Emirates
177       United Kingdom
178        United States
179              Uruguay
180           Uzbekistan
181              Vanuatu
182            Venezuela
183              Vietnam
184                Yemen
185               Zambia
186             Zimbabwe
  • The country names are in character.

  • We want to add ISO-3 country code in character as a new variable in this data set

  • This will make merging and restricting data sets from different sources easier

  • To do that, we will use the countrycode package in CRAN

  • Run the chunk below if you need to install the package

# first install the countrycode package
install.packages("countrycode")
# then load the package
library(countrycode)
  • Then proceed to add the ISO-3 country code as a new variable
# specify the variable that includes the country names first
# use origin option to specify the format of the country names
# use destination to specify the format you want to convert the countries into
countries$ISO <- countrycode(countries$country_name, 
                             origin = "country.name", 
                             destination = "iso3c")
head(countries, 20)
          country_name ISO
1          Afghanistan AFG
2              Albania ALB
3              Algeria DZA
4              Andorra AND
5               Angola AGO
6  Antigua and Barbuda ATG
7            Argentina ARG
8              Armenia ARM
9            Australia AUS
10             Austria AUT
11          Azerbaijan AZE
12             Bahrain BHR
13          Bangladesh BGD
14            Barbados BRB
15             Belarus BLR
16             Belgium BEL
17              Belize BLZ
18               Benin BEN
19              Bhutan BTN
20             Bolivia BOL
tail(countries, 20)
            country_name ISO
167          Timor-Leste TLS
168                 Togo TGO
169                Tonga TON
170  Trinidad and Tobago TTO
171              Tunisia TUN
172               Turkey TUR
173         Turkmenistan TKM
174               Uganda UGA
175              Ukraine UKR
176 United Arab Emirates ARE
177       United Kingdom GBR
178        United States USA
179              Uruguay URY
180           Uzbekistan UZB
181              Vanuatu VUT
182            Venezuela VEN
183              Vietnam VNM
184                Yemen YEM
185               Zambia ZMB
186             Zimbabwe ZWE
  • Make sure all of the country names were converted
countries$ISO
  [1] "AFG" "ALB" "DZA" "AND" "AGO" "ATG" "ARG" "ARM" "AUS" "AUT" "AZE" "BHR"
 [13] "BGD" "BRB" "BLR" "BEL" "BLZ" "BEN" "BTN" "BOL" "BIH" "BWA" "BRA" "BRN"
 [25] "BGR" "BFA" "BDI" "KHM" "CMR" "CAN" "CPV" "CAF" "TCD" "CHL" "CHN" "COL"
 [37] "COM" "COG" "CRI" "CIV" "HRV" "CUB" "CYP" "CZE" "COD" "DNK" "DJI" "DMA"
 [49] "DOM" "ECU" "EGY" "SLV" "GNQ" "ERI" "EST" "ETH" "FSM" "FJI" "FIN" "FRA"
 [61] "GAB" "GEO" "DEU" "GHA" "GRC" "GRD" "GTM" "GIN" "GNB" "GUY" "HTI" "HND"
 [73] "HUN" "ISL" "IND" "IDN" "IRN" "IRQ" "IRL" "ITA" "JAM" "JPN" "JOR" "KAZ"
 [85] "KEN" "KIR" "KWT" "KGZ" "LAO" "LVA" "LBN" "LSO" "LBR" "LBY" "LTU" "LUX"
 [97] "MKD" "MDG" "MWI" "MYS" "MDV" "MLI" "MLT" "MHL" "MRT" "MUS" "MEX" "MDA"
[109] "MNG" "MNE" "MAR" "MOZ" "MMR" "NAM" "NPL" "NLD" "NZL" "NIC" "NER" "NGA"
[121] "PRK" "NOR" "OMN" "PAK" "PAN" "PNG" "PRY" "PER" "PHL" "POL" "PRT" "PRI"
[133] "QAT" "ROU" "RUS" "RWA" "LCA" "VCT" "WSM" "STP" "SAU" "SEN" "SRB" "SYC"
[145] "SLE" "SGP" "SVK" "SVN" "SLB" "SOM" "ZAF" "KOR" "SSD" "ESP" "LKA" "SDN"
[157] "SUR" "SWZ" "SWE" "CHE" "SYR" "TJK" "TZA" "THA" "BHS" "GMB" "TLS" "TGO"
[169] "TON" "TTO" "TUN" "TUR" "TKM" "UGA" "UKR" "ARE" "GBR" "USA" "URY" "UZB"
[181] "VUT" "VEN" "VNM" "YEM" "ZMB" "ZWE"

Looks good!

  • Each data source uses a different country ID

    • IHME uses its own location_id and location_name that are not available in countrycode options. Unfortunately, locations are not always countries. For example, the data contains observations from Georgia (country) and Georgia (US state) and they are both “Georgia” under location_name.

    • UCDP uses gwn (Gleditsch & Ward numeric): use origin = "gwn" to convert

    • IDD uses ISO-3!

    • EDI uses COW (Correlates of War numeric), but some countries are unavailable. It also has the character variable country_name which is more convenient in this case: use origin = "country.name" to convert

    • IHME uses a character variable Country but some countries need to be recoded: use code below

hief <- read.csv(here("raw data", "HIEF", "HIEF_data.csv"), header = TRUE)

hiefnew <- hief |>
  mutate(Country = recode(Country, 
                          "Czechoslovakia" = "Czech Republic", 
                          "German Democratic Republic" ="Germany",
                          "Republic of Vietnam" = "Vietnam",
                          "Yemen Arab Republic" = "Yemen",
                          "Yemen PDR" = "Yemen",
                          "Yugoslavia" = "Serbia"))
hiefnew$ISO <- countrycode(hiefnew$Country,
                           origin = "country.name",
                           destination = "iso3c")
-   WB data also uses a character variable `Country`: use `origin = "country.name"` to convert

Add region, sub-region, and intermediate region

  • Use this GitHub repo to add region, sub-region and intermediate region to the ISO data

    • The csv file was downloaded
  • Need to use the readr package

regions <- read.csv(here("raw data", "regions.csv"), header = TRUE)
head(regions)
                  name alpha_2 alpha_3 country_code    iso_3166_2  region
1          Afghanistan      AF     AFG            4 ISO 3166-2:AF    Asia
2 \xfd\xfdland Islands      AX     ALA          248 ISO 3166-2:AX  Europe
3              Albania      AL     ALB            8 ISO 3166-2:AL  Europe
4              Algeria      DZ     DZA           12 ISO 3166-2:DZ  Africa
5       American Samoa      AS     ASM           16 ISO 3166-2:AS Oceania
6              Andorra      AD     AND           20 ISO 3166-2:AD  Europe
       sub_region intermediate_region region_code sub_region_code
1   Southern Asia                             142              34
2 Northern Europe                             150             154
3 Southern Europe                             150              39
4 Northern Africa                               2              15
5       Polynesia                               9              61
6 Southern Europe                             150              39
  intermediate_region_code
1                       NA
2                       NA
3                       NA
4                       NA
5                       NA
6                       NA
table(regions$region)

           Africa Americas     Asia   Europe  Oceania 
       1       60       57       51       51       29 
table(regions$region, regions$intermediate_region)
          
              Caribbean Central America Channel Islands Eastern Africa
            1         0               0               0              0
  Africa    7         0               0               0             22
  Americas  5        28               8               0              0
  Asia     51         0               0               0              0
  Europe   49         0               0               2              0
  Oceania  29         0               0               0              0
          
           Middle Africa South America Southern Africa Western Africa
                       0             0               0              0
  Africa               9             0               5             17
  Americas             0            16               0              0
  Asia                 0             0               0              0
  Europe               0             0               0              0
  Oceania              0             0               0              0
table(regions$region, regions$sub_region)
          
              Australia and New Zealand Central Asia Eastern Asia
            1                         0            0            0
  Africa    0                         0            0            0
  Americas  0                         0            0            0
  Asia      0                         0            5            8
  Europe    0                         0            0            0
  Oceania   0                         6            0            0
          
           Eastern Europe Latin America and the Caribbean Melanesia Micronesia
                        0                               0         0          0
  Africa                0                               0         0          0
  Americas              0                              52         0          0
  Asia                  0                               0         0          0
  Europe               10                               0         0          0
  Oceania               0                               0         5          8
          
           Northern Africa Northern America Northern Europe Polynesia
                         0                0               0         0
  Africa                 7                0               0         0
  Americas               0                5               0         0
  Asia                   0                0               0         0
  Europe                 0                0              16         0
  Oceania                0                0               0        10
          
           South-eastern Asia Southern Asia Southern Europe Sub-Saharan Africa
                            0             0               0                  0
  Africa                    0             0               0                 53
  Americas                  0             0               0                  0
  Asia                     11             9               0                  0
  Europe                    0             0              16                  0
  Oceania                   0             0               0                  0
          
           Western Asia Western Europe
                      0              0
  Africa              0              0
  Americas            0              0
  Asia               18              0
  Europe              0              9
  Oceania             0              0
  • I want to add sub.region to the ISO data
regions <- regions |>
  dplyr::select(alpha_3, sub_region) |>
  rename(ISO = alpha_3, region = sub_region)
countries <- countries |>
  inner_join(regions, by = "ISO")
  • Save the new data as a csv file
# row.names = FALSE option prevents from saving row numbers as an additional variable in the data
write.csv(countries, file = here("raw data", "countries_iso3.csv"), row.names = FALSE)