library(here)
library(tidyverse)
library(data.table)
library(states)
library(countrycode)Converting country code/name using countrycode package
Add ISO-3 country code to the list of countries used in analysis
The list of countries used in the analysis is included in the country_included.csv file in the raw data folder.
countries <- read.csv(here("raw data", "country_included.csv"), header = TRUE)
head(countries, 20) country_name
1 Afghanistan
2 Albania
3 Algeria
4 Andorra
5 Angola
6 Antigua and Barbuda
7 Argentina
8 Armenia
9 Australia
10 Austria
11 Azerbaijan
12 Bahrain
13 Bangladesh
14 Barbados
15 Belarus
16 Belgium
17 Belize
18 Benin
19 Bhutan
20 Bolivia
tail(countries, 20) country_name
167 Timor-Leste
168 Togo
169 Tonga
170 Trinidad and Tobago
171 Tunisia
172 Turkey
173 Turkmenistan
174 Uganda
175 Ukraine
176 United Arab Emirates
177 United Kingdom
178 United States
179 Uruguay
180 Uzbekistan
181 Vanuatu
182 Venezuela
183 Vietnam
184 Yemen
185 Zambia
186 Zimbabwe
The country names are in character.
We want to add ISO-3 country code in character as a new variable in this data set
This will make merging and restricting data sets from different sources easier
To do that, we will use the
countrycodepackage in CRAN- Also available from this GitHub repo
Run the chunk below if you need to install the package
# first install the countrycode package
install.packages("countrycode")
# then load the package
library(countrycode)- Then proceed to add the ISO-3 country code as a new variable
# specify the variable that includes the country names first
# use origin option to specify the format of the country names
# use destination to specify the format you want to convert the countries into
countries$ISO <- countrycode(countries$country_name,
origin = "country.name",
destination = "iso3c")
head(countries, 20) country_name ISO
1 Afghanistan AFG
2 Albania ALB
3 Algeria DZA
4 Andorra AND
5 Angola AGO
6 Antigua and Barbuda ATG
7 Argentina ARG
8 Armenia ARM
9 Australia AUS
10 Austria AUT
11 Azerbaijan AZE
12 Bahrain BHR
13 Bangladesh BGD
14 Barbados BRB
15 Belarus BLR
16 Belgium BEL
17 Belize BLZ
18 Benin BEN
19 Bhutan BTN
20 Bolivia BOL
tail(countries, 20) country_name ISO
167 Timor-Leste TLS
168 Togo TGO
169 Tonga TON
170 Trinidad and Tobago TTO
171 Tunisia TUN
172 Turkey TUR
173 Turkmenistan TKM
174 Uganda UGA
175 Ukraine UKR
176 United Arab Emirates ARE
177 United Kingdom GBR
178 United States USA
179 Uruguay URY
180 Uzbekistan UZB
181 Vanuatu VUT
182 Venezuela VEN
183 Vietnam VNM
184 Yemen YEM
185 Zambia ZMB
186 Zimbabwe ZWE
- Make sure all of the country names were converted
countries$ISO [1] "AFG" "ALB" "DZA" "AND" "AGO" "ATG" "ARG" "ARM" "AUS" "AUT" "AZE" "BHR"
[13] "BGD" "BRB" "BLR" "BEL" "BLZ" "BEN" "BTN" "BOL" "BIH" "BWA" "BRA" "BRN"
[25] "BGR" "BFA" "BDI" "KHM" "CMR" "CAN" "CPV" "CAF" "TCD" "CHL" "CHN" "COL"
[37] "COM" "COG" "CRI" "CIV" "HRV" "CUB" "CYP" "CZE" "COD" "DNK" "DJI" "DMA"
[49] "DOM" "ECU" "EGY" "SLV" "GNQ" "ERI" "EST" "ETH" "FSM" "FJI" "FIN" "FRA"
[61] "GAB" "GEO" "DEU" "GHA" "GRC" "GRD" "GTM" "GIN" "GNB" "GUY" "HTI" "HND"
[73] "HUN" "ISL" "IND" "IDN" "IRN" "IRQ" "IRL" "ITA" "JAM" "JPN" "JOR" "KAZ"
[85] "KEN" "KIR" "KWT" "KGZ" "LAO" "LVA" "LBN" "LSO" "LBR" "LBY" "LTU" "LUX"
[97] "MKD" "MDG" "MWI" "MYS" "MDV" "MLI" "MLT" "MHL" "MRT" "MUS" "MEX" "MDA"
[109] "MNG" "MNE" "MAR" "MOZ" "MMR" "NAM" "NPL" "NLD" "NZL" "NIC" "NER" "NGA"
[121] "PRK" "NOR" "OMN" "PAK" "PAN" "PNG" "PRY" "PER" "PHL" "POL" "PRT" "PRI"
[133] "QAT" "ROU" "RUS" "RWA" "LCA" "VCT" "WSM" "STP" "SAU" "SEN" "SRB" "SYC"
[145] "SLE" "SGP" "SVK" "SVN" "SLB" "SOM" "ZAF" "KOR" "SSD" "ESP" "LKA" "SDN"
[157] "SUR" "SWZ" "SWE" "CHE" "SYR" "TJK" "TZA" "THA" "BHS" "GMB" "TLS" "TGO"
[169] "TON" "TTO" "TUN" "TUR" "TKM" "UGA" "UKR" "ARE" "GBR" "USA" "URY" "UZB"
[181] "VUT" "VEN" "VNM" "YEM" "ZMB" "ZWE"
Looks good!
Each data source uses a different country ID
IHME uses its own
location_idandlocation_namethat are not available incountrycodeoptions. Unfortunately, locations are not always countries. For example, the data contains observations from Georgia (country) and Georgia (US state) and they are both “Georgia” underlocation_name.UCDP uses gwn (Gleditsch & Ward numeric): use
origin = "gwn"to convertIDD uses ISO-3!
EDI uses COW (Correlates of War numeric), but some countries are unavailable. It also has the character variable
country_namewhich is more convenient in this case: useorigin = "country.name"to convertIHME uses a character variable
Countrybut some countries need to be recoded: use code below
hief <- read.csv(here("raw data", "HIEF", "HIEF_data.csv"), header = TRUE)
hiefnew <- hief |>
mutate(Country = recode(Country,
"Czechoslovakia" = "Czech Republic",
"German Democratic Republic" ="Germany",
"Republic of Vietnam" = "Vietnam",
"Yemen Arab Republic" = "Yemen",
"Yemen PDR" = "Yemen",
"Yugoslavia" = "Serbia"))
hiefnew$ISO <- countrycode(hiefnew$Country,
origin = "country.name",
destination = "iso3c")- WB data also uses a character variable `Country`: use `origin = "country.name"` to convert
Add region, sub-region, and intermediate region
Use this GitHub repo to add region, sub-region and intermediate region to the ISO data
- The csv file was downloaded
Need to use the
readrpackage
regions <- read.csv(here("raw data", "regions.csv"), header = TRUE)
head(regions) name alpha_2 alpha_3 country_code iso_3166_2 region
1 Afghanistan AF AFG 4 ISO 3166-2:AF Asia
2 \xfd\xfdland Islands AX ALA 248 ISO 3166-2:AX Europe
3 Albania AL ALB 8 ISO 3166-2:AL Europe
4 Algeria DZ DZA 12 ISO 3166-2:DZ Africa
5 American Samoa AS ASM 16 ISO 3166-2:AS Oceania
6 Andorra AD AND 20 ISO 3166-2:AD Europe
sub_region intermediate_region region_code sub_region_code
1 Southern Asia 142 34
2 Northern Europe 150 154
3 Southern Europe 150 39
4 Northern Africa 2 15
5 Polynesia 9 61
6 Southern Europe 150 39
intermediate_region_code
1 NA
2 NA
3 NA
4 NA
5 NA
6 NA
table(regions$region)
Africa Americas Asia Europe Oceania
1 60 57 51 51 29
table(regions$region, regions$intermediate_region)
Caribbean Central America Channel Islands Eastern Africa
1 0 0 0 0
Africa 7 0 0 0 22
Americas 5 28 8 0 0
Asia 51 0 0 0 0
Europe 49 0 0 2 0
Oceania 29 0 0 0 0
Middle Africa South America Southern Africa Western Africa
0 0 0 0
Africa 9 0 5 17
Americas 0 16 0 0
Asia 0 0 0 0
Europe 0 0 0 0
Oceania 0 0 0 0
table(regions$region, regions$sub_region)
Australia and New Zealand Central Asia Eastern Asia
1 0 0 0
Africa 0 0 0 0
Americas 0 0 0 0
Asia 0 0 5 8
Europe 0 0 0 0
Oceania 0 6 0 0
Eastern Europe Latin America and the Caribbean Melanesia Micronesia
0 0 0 0
Africa 0 0 0 0
Americas 0 52 0 0
Asia 0 0 0 0
Europe 10 0 0 0
Oceania 0 0 5 8
Northern Africa Northern America Northern Europe Polynesia
0 0 0 0
Africa 7 0 0 0
Americas 0 5 0 0
Asia 0 0 0 0
Europe 0 0 16 0
Oceania 0 0 0 10
South-eastern Asia Southern Asia Southern Europe Sub-Saharan Africa
0 0 0 0
Africa 0 0 0 53
Americas 0 0 0 0
Asia 11 9 0 0
Europe 0 0 16 0
Oceania 0 0 0 0
Western Asia Western Europe
0 0
Africa 0 0
Americas 0 0
Asia 18 0
Europe 0 9
Oceania 0 0
- I want to add
sub.regionto the ISO data
regions <- regions |>
dplyr::select(alpha_3, sub_region) |>
rename(ISO = alpha_3, region = sub_region)
countries <- countries |>
inner_join(regions, by = "ISO")- Save the new data as a csv file
# row.names = FALSE option prevents from saving row numbers as an additional variable in the data
write.csv(countries, file = here("raw data", "countries_iso3.csv"), row.names = FALSE)