This is the cleaning process for the Macroeconomics risk predictions for Global indicators project
This project is fully documented on https://rpubs.com/ntuli/1384909
library(tidyverse)
library(rstatix)
library(lubridate)
library(rstatix)
Clean inflation dataset
#import dataset
inflation_consumer_price <- read_csv("inflation_consumer_price.csv")
#clean inflation dataset
#remove the fidt two colomns
inflation_consumer_price <- inflation_consumer_price %>%
dplyr::slice(-1, -2) %>% #rename the colomns
rename("Country" = "Data Source",
"Country_Code" = "World Development Indicators",
"Economic_Indicator" = "...3")
#split the indicator colomn into separate colomns
inflation_consumer_price <- inflation_consumer_price %>%
separate_wider_delim(
Economic_Indicator,
delim = ",",
names = c("Economic_Indicator", "Indicator_Name", "Indicator_Code", as.character(1960:2024)),
too_few = 'align_start',
too_many = "drop"
)
#RESHAPE AND PIVOTE THE DATA TO LONGER
inflation_consumer_price <- inflation_consumer_price %>%
pivot_longer( cols = "1960":"2024",
names_to = "Years",
values_to = "Values"
) %>% #values to numeric
mutate(Values = as.numeric(Values)) %>% #remove nulls
drop_na()
#check the countries
unique(inflation_consumer_price$Country)
## [1] "Aruba"
## [2] "Africa Eastern and Southern"
## [3] "Afghanistan"
## [4] "Africa Western and Central"
## [5] "Angola"
## [6] "Albania"
## [7] "Arab World"
## [8] "United Arab Emirates"
## [9] "Argentina"
## [10] "Armenia"
## [11] "Antigua and Barbuda"
## [12] "Australia"
## [13] "Austria"
## [14] "Azerbaijan"
## [15] "Burundi"
## [16] "Belgium"
## [17] "Benin"
## [18] "Burkina Faso"
## [19] "Bangladesh"
## [20] "Bulgaria"
## [21] "Bahrain"
## [22] "Bahamas, The"
## [23] "Bosnia and Herzegovina"
## [24] "Belarus"
## [25] "Belize"
## [26] "Bolivia"
## [27] "Brazil"
## [28] "Barbados"
## [29] "Brunei Darussalam"
## [30] "Bhutan"
## [31] "Botswana"
## [32] "Central African Republic"
## [33] "Canada"
## [34] "Central Europe and the Baltics"
## [35] "Switzerland"
## [36] "Chile"
## [37] "China"
## [38] "Cote d'Ivoire"
## [39] "Cameroon"
## [40] "Congo, Dem. Rep."
## [41] "Congo, Rep."
## [42] "Colombia"
## [43] "Comoros"
## [44] "Cabo Verde"
## [45] "Costa Rica"
## [46] "Caribbean small states"
## [47] "Curacao"
## [48] "Cayman Islands"
## [49] "Cyprus"
## [50] "Czechia"
## [51] "Germany"
## [52] "Djibouti"
## [53] "Dominica"
## [54] "Denmark"
## [55] "Dominican Republic"
## [56] "Algeria"
## [57] "East Asia & Pacific (excluding high income)"
## [58] "Early-demographic dividend"
## [59] "East Asia & Pacific"
## [60] "Europe & Central Asia (excluding high income)"
## [61] "Europe & Central Asia"
## [62] "Ecuador"
## [63] "Egypt, Arab Rep."
## [64] "Euro area"
## [65] "Spain"
## [66] "Estonia"
## [67] "Ethiopia"
## [68] "European Union"
## [69] "Fragile and conflict affected situations"
## [70] "Finland"
## [71] "Fiji"
## [72] "France"
## [73] "Micronesia, Fed. Sts."
## [74] "Gabon"
## [75] "United Kingdom"
## [76] "Georgia"
## [77] "Ghana"
## [78] "Guinea"
## [79] "Gambia, The"
## [80] "Guinea-Bissau"
## [81] "Equatorial Guinea"
## [82] "Greece"
## [83] "Grenada"
## [84] "Guatemala"
## [85] "Guyana"
## [86] "High income"
## [87] "Hong Kong SAR, China"
## [88] "Honduras"
## [89] "Heavily indebted poor countries (HIPC)"
## [90] "Croatia"
## [91] "Haiti"
## [92] "Hungary"
## [93] "IBRD only"
## [94] "IDA & IBRD total"
## [95] "IDA total"
## [96] "IDA blend"
## [97] "Indonesia"
## [98] "IDA only"
## [99] "India"
## [100] "Ireland"
## [101] "Iran, Islamic Rep."
## [102] "Iraq"
## [103] "Iceland"
## [104] "Israel"
## [105] "Italy"
## [106] "Jamaica"
## [107] "Jordan"
## [108] "Japan"
## [109] "Kazakhstan"
## [110] "Kenya"
## [111] "Kyrgyz Republic"
## [112] "Cambodia"
## [113] "Kiribati"
## [114] "St. Kitts and Nevis"
## [115] "Korea, Rep."
## [116] "Kuwait"
## [117] "Latin America & Caribbean (excluding high income)"
## [118] "Lao PDR"
## [119] "Lebanon"
## [120] "Liberia"
## [121] "Libya"
## [122] "St. Lucia"
## [123] "Latin America & Caribbean"
## [124] "Least developed countries: UN classification"
## [125] "Low income"
## [126] "Sri Lanka"
## [127] "Lower middle income"
## [128] "Low & middle income"
## [129] "Lesotho"
## [130] "Late-demographic dividend"
## [131] "Lithuania"
## [132] "Luxembourg"
## [133] "Latvia"
## [134] "Macao SAR, China"
## [135] "Morocco"
## [136] "Moldova"
## [137] "Madagascar"
## [138] "Maldives"
## [139] "Middle East, North Africa, Afghanistan & Pakistan"
## [140] "Mexico"
## [141] "Middle income"
## [142] "North Macedonia"
## [143] "Mali"
## [144] "Malta"
## [145] "Myanmar"
## [146] "Middle East, North Africa, Afghanistan & Pakistan (excluding high income)"
## [147] "Montenegro"
## [148] "Mongolia"
## [149] "Mozambique"
## [150] "Mauritania"
## [151] "Mauritius"
## [152] "Malawi"
## [153] "Malaysia"
## [154] "North America"
## [155] "Namibia"
## [156] "New Caledonia"
## [157] "Niger"
## [158] "Nigeria"
## [159] "Nicaragua"
## [160] "Netherlands"
## [161] "Norway"
## [162] "Nepal"
## [163] "Nauru"
## [164] "New Zealand"
## [165] "OECD members"
## [166] "Oman"
## [167] "Other small states"
## [168] "Pakistan"
## [169] "Panama"
## [170] "Peru"
## [171] "Philippines"
## [172] "Palau"
## [173] "Papua New Guinea"
## [174] "Poland"
## [175] "Pre-demographic dividend"
## [176] "Portugal"
## [177] "Paraguay"
## [178] "West Bank and Gaza"
## [179] "Pacific island small states"
## [180] "Post-demographic dividend"
## [181] "Qatar"
## [182] "Romania"
## [183] "Russian Federation"
## [184] "Rwanda"
## [185] "South Asia"
## [186] "Saudi Arabia"
## [187] "Sudan"
## [188] "Senegal"
## [189] "Singapore"
## [190] "Solomon Islands"
## [191] "Sierra Leone"
## [192] "El Salvador"
## [193] "San Marino"
## [194] "Serbia"
## [195] "Sub-Saharan Africa (excluding high income)"
## [196] "South Sudan"
## [197] "Sub-Saharan Africa"
## [198] "Small states"
## [199] "Sao Tome and Principe"
## [200] "Suriname"
## [201] "Slovak Republic"
## [202] "Slovenia"
## [203] "Sweden"
## [204] "Eswatini"
## [205] "Sint Maarten (Dutch part)"
## [206] "Seychelles"
## [207] "Syrian Arab Republic"
## [208] "Chad"
## [209] "East Asia & Pacific (IDA & IBRD countries)"
## [210] "Europe & Central Asia (IDA & IBRD countries)"
## [211] "Togo"
## [212] "Thailand"
## [213] "Tajikistan"
## [214] "Latin America & the Caribbean (IDA & IBRD countries)"
## [215] "Timor-Leste"
## [216] "Middle East, North Africa, Afghanistan & Pakistan (IDA & IBRD)"
## [217] "Tonga"
## [218] "South Asia (IDA & IBRD)"
## [219] "Sub-Saharan Africa (IDA & IBRD countries)"
## [220] "Trinidad and Tobago"
## [221] "Tunisia"
## [222] "Turkiye"
## [223] "Tuvalu"
## [224] "Tanzania"
## [225] "Uganda"
## [226] "Ukraine"
## [227] "Upper middle income"
## [228] "Uruguay"
## [229] "United States"
## [230] "Uzbekistan"
## [231] "St. Vincent and the Grenadines"
## [232] "Venezuela, RB"
## [233] "Viet Nam"
## [234] "Vanuatu"
## [235] "World"
## [236] "Samoa"
## [237] "Kosovo"
## [238] "Yemen, Rep."
## [239] "South Africa"
## [240] "Zambia"
## [241] "Zimbabwe"
# remove the unneeded rows
inflation_consumer_price <- inflation_consumer_price %>%
mutate(Country = trimws(Country)) %>%
filter(!grepl(paste0(
"^Sub-Saharan Africa \\(excluding high income\\)$|",
"^Sub-Saharan Africa \\(IDA & IBRD countries\\)$|",
"^Upper middle income$|",
"^South Asia \\(IDA & IBRD\\)$|",
"^Middle East, North Africa, Afghanistan & Pakistan \\(IDA & IBRD\\)$|",
"^Latin America & the Caribbean \\(IDA & IBRD countries\\)$|",
"^Europe & Central Asia \\(IDA & IBRD countries\\)$|",
"^East Asia & Pacific \\(IDA & IBRD countries\\)$|",
"^World$|",
"^Post-demographic dividend$|",
"^Small states$|",
"^Pacific island small states$|",
"^Pre-demographic dividend$|",
"^Other small states$|",
"^OECD members$|",
"^Middle East, North Africa, Afghanistan & Pakistan \\(excluding high income\\)$|",
"^Middle East, North Africa, Afghanistan & Pakistan$|",
"^Middle income$|",
"^Late-demographic dividend$|",
"^Low & middle income$|",
"^Lower middle income$|",
"^Low income$|",
"^Least developed countries: UN classification$|",
"^Latin America & Caribbean$|",
"^Latin America & Caribbean \\(excluding high income\\)$|",
"^IDA only$|",
"^IDA blend$|",
"^IDA total$|",
"^IDA & IBRD total$|",
"^IBRD only$|",
"^Heavily indebted poor countries \\(HIPC\\)$|",
"^High income$|",
"^Fragile and conflict affected situations$|",
"^Europe & Central Asia$|",
"^East Asia & Pacific$|",
"^Europe & Central Asia \\(excluding high income\\)$|",
"^East Asia & Pacific \\(excluding high income\\)$|",
"^Caribbean small states$|",
"^Central Europe and the Baltics$|",
"^Arab World$|",
"^Africa Western and Central$|",
"^Africa Eastern and Southern$|",
"^Sub-Saharan Africa$|","^West Bank and Gaza$"
), Country, ignore.case = TRUE))
#remove deplicates
inflation_consumer_price <- inflation_consumer_price %>%
distinct()
#see if theres any duplicates left
infl_dup <- inflation_consumer_price %>%
duplicated() %>%
sum()
print(paste("Number of duplicate rows:",infl_dup))
## [1] "Number of duplicate rows: 0"
clean gdp current
#import dataset
gdp_current_us <- read_csv("gdp_current$us.csv")
gdp_current_us <- gdp_current_us %>%
dplyr:: slice(-1, -2) %>% #rename the colomns
rename("Country" = "Data Source",
"Country_Code" = "World Development Indicators",
"Economic_Indicator" = "...3")
#split the indicator colomn into separate colomns
gdp_current_us <- gdp_current_us %>%
separate_wider_delim(
Economic_Indicator,
delim = ",",
names = c("Economic_Indicator", "Indicator_Code", as.character(1960:2024)),
too_few = 'align_start',
too_many = "drop"
)
#RESHAPE AND PIVOTE THE DATA TO LONGER
gdp_current_us <- gdp_current_us %>%
pivot_longer( cols = "1960":"2024",
names_to = "Years",
values_to = "Values"
) %>% #values to numeric
mutate(Values = as.numeric(Values)) %>% #remove nulls
drop_na()
#check the countries
unique(gdp_current_us$Country)
## [1] "Aruba"
## [2] "Africa Eastern and Southern"
## [3] "Afghanistan"
## [4] "Africa Western and Central"
## [5] "Angola"
## [6] "Albania"
## [7] "Andorra"
## [8] "Arab World"
## [9] "United Arab Emirates"
## [10] "Argentina"
## [11] "Armenia"
## [12] "American Samoa"
## [13] "Antigua and Barbuda"
## [14] "Australia"
## [15] "Austria"
## [16] "Azerbaijan"
## [17] "Burundi"
## [18] "Belgium"
## [19] "Benin"
## [20] "Burkina Faso"
## [21] "Bangladesh"
## [22] "Bulgaria"
## [23] "Bahrain"
## [24] "Bahamas, The"
## [25] "Bosnia and Herzegovina"
## [26] "Belarus"
## [27] "Belize"
## [28] "Bermuda"
## [29] "Bolivia"
## [30] "Brazil"
## [31] "Barbados"
## [32] "Brunei Darussalam"
## [33] "Bhutan"
## [34] "Botswana"
## [35] "Central African Republic"
## [36] "Canada"
## [37] "Central Europe and the Baltics"
## [38] "Switzerland"
## [39] "Channel Islands"
## [40] "Chile"
## [41] "China"
## [42] "Cote d'Ivoire"
## [43] "Cameroon"
## [44] "Congo, Dem. Rep."
## [45] "Congo, Rep."
## [46] "Colombia"
## [47] "Comoros"
## [48] "Cabo Verde"
## [49] "Costa Rica"
## [50] "Caribbean small states"
## [51] "Cuba"
## [52] "Curacao"
## [53] "Cayman Islands"
## [54] "Cyprus"
## [55] "Czechia"
## [56] "Germany"
## [57] "Djibouti"
## [58] "Dominica"
## [59] "Denmark"
## [60] "Dominican Republic"
## [61] "Algeria"
## [62] "East Asia & Pacific (excluding high income)"
## [63] "Early-demographic dividend"
## [64] "East Asia & Pacific"
## [65] "Europe & Central Asia (excluding high income)"
## [66] "Europe & Central Asia"
## [67] "Ecuador"
## [68] "Egypt, Arab Rep."
## [69] "Euro area"
## [70] "Eritrea"
## [71] "Spain"
## [72] "Estonia"
## [73] "Ethiopia"
## [74] "European Union"
## [75] "Fragile and conflict affected situations"
## [76] "Finland"
## [77] "Fiji"
## [78] "France"
## [79] "Faroe Islands"
## [80] "Micronesia, Fed. Sts."
## [81] "Gabon"
## [82] "United Kingdom"
## [83] "Georgia"
## [84] "Ghana"
## [85] "Guinea"
## [86] "Gambia, The"
## [87] "Guinea-Bissau"
## [88] "Equatorial Guinea"
## [89] "Greece"
## [90] "Grenada"
## [91] "Greenland"
## [92] "Guatemala"
## [93] "Guam"
## [94] "Guyana"
## [95] "High income"
## [96] "Hong Kong SAR, China"
## [97] "Honduras"
## [98] "Heavily indebted poor countries (HIPC)"
## [99] "Croatia"
## [100] "Haiti"
## [101] "Hungary"
## [102] "IBRD only"
## [103] "IDA & IBRD total"
## [104] "IDA total"
## [105] "IDA blend"
## [106] "Indonesia"
## [107] "IDA only"
## [108] "Isle of Man"
## [109] "India"
## [110] "Ireland"
## [111] "Iran, Islamic Rep."
## [112] "Iraq"
## [113] "Iceland"
## [114] "Israel"
## [115] "Italy"
## [116] "Jamaica"
## [117] "Jordan"
## [118] "Japan"
## [119] "Kazakhstan"
## [120] "Kenya"
## [121] "Kyrgyz Republic"
## [122] "Cambodia"
## [123] "Kiribati"
## [124] "St. Kitts and Nevis"
## [125] "Korea, Rep."
## [126] "Kuwait"
## [127] "Latin America & Caribbean (excluding high income)"
## [128] "Lao PDR"
## [129] "Lebanon"
## [130] "Liberia"
## [131] "Libya"
## [132] "St. Lucia"
## [133] "Latin America & Caribbean"
## [134] "Least developed countries: UN classification"
## [135] "Low income"
## [136] "Liechtenstein"
## [137] "Sri Lanka"
## [138] "Lower middle income"
## [139] "Low & middle income"
## [140] "Lesotho"
## [141] "Late-demographic dividend"
## [142] "Lithuania"
## [143] "Luxembourg"
## [144] "Latvia"
## [145] "Macao SAR, China"
## [146] "St. Martin (French part)"
## [147] "Morocco"
## [148] "Monaco"
## [149] "Moldova"
## [150] "Madagascar"
## [151] "Maldives"
## [152] "Middle East, North Africa, Afghanistan & Pakistan"
## [153] "Mexico"
## [154] "Marshall Islands"
## [155] "Middle income"
## [156] "North Macedonia"
## [157] "Mali"
## [158] "Malta"
## [159] "Myanmar"
## [160] "Middle East, North Africa, Afghanistan & Pakistan (excluding high income)"
## [161] "Montenegro"
## [162] "Mongolia"
## [163] "Northern Mariana Islands"
## [164] "Mozambique"
## [165] "Mauritania"
## [166] "Mauritius"
## [167] "Malawi"
## [168] "Malaysia"
## [169] "North America"
## [170] "Namibia"
## [171] "New Caledonia"
## [172] "Niger"
## [173] "Nigeria"
## [174] "Nicaragua"
## [175] "Netherlands"
## [176] "Norway"
## [177] "Nepal"
## [178] "Nauru"
## [179] "New Zealand"
## [180] "OECD members"
## [181] "Oman"
## [182] "Other small states"
## [183] "Pakistan"
## [184] "Panama"
## [185] "Peru"
## [186] "Philippines"
## [187] "Palau"
## [188] "Papua New Guinea"
## [189] "Poland"
## [190] "Pre-demographic dividend"
## [191] "Puerto Rico (US)"
## [192] "Portugal"
## [193] "Paraguay"
## [194] "West Bank and Gaza"
## [195] "Pacific island small states"
## [196] "Post-demographic dividend"
## [197] "French Polynesia"
## [198] "Qatar"
## [199] "Romania"
## [200] "Russian Federation"
## [201] "Rwanda"
## [202] "South Asia"
## [203] "Saudi Arabia"
## [204] "Sudan"
## [205] "Senegal"
## [206] "Singapore"
## [207] "Solomon Islands"
## [208] "Sierra Leone"
## [209] "El Salvador"
## [210] "San Marino"
## [211] "Somalia, Fed. Rep."
## [212] "Serbia"
## [213] "Sub-Saharan Africa (excluding high income)"
## [214] "South Sudan"
## [215] "Sub-Saharan Africa"
## [216] "Small states"
## [217] "Sao Tome and Principe"
## [218] "Suriname"
## [219] "Slovak Republic"
## [220] "Slovenia"
## [221] "Sweden"
## [222] "Eswatini"
## [223] "Sint Maarten (Dutch part)"
## [224] "Seychelles"
## [225] "Syrian Arab Republic"
## [226] "Turks and Caicos Islands"
## [227] "Chad"
## [228] "East Asia & Pacific (IDA & IBRD countries)"
## [229] "Europe & Central Asia (IDA & IBRD countries)"
## [230] "Togo"
## [231] "Thailand"
## [232] "Tajikistan"
## [233] "Turkmenistan"
## [234] "Latin America & the Caribbean (IDA & IBRD countries)"
## [235] "Timor-Leste"
## [236] "Middle East, North Africa, Afghanistan & Pakistan (IDA & IBRD)"
## [237] "Tonga"
## [238] "South Asia (IDA & IBRD)"
## [239] "Sub-Saharan Africa (IDA & IBRD countries)"
## [240] "Trinidad and Tobago"
## [241] "Tunisia"
## [242] "Turkiye"
## [243] "Tuvalu"
## [244] "Tanzania"
## [245] "Uganda"
## [246] "Ukraine"
## [247] "Upper middle income"
## [248] "Uruguay"
## [249] "United States"
## [250] "Uzbekistan"
## [251] "St. Vincent and the Grenadines"
## [252] "Venezuela, RB"
## [253] "Virgin Islands (U.S.)"
## [254] "Viet Nam"
## [255] "Vanuatu"
## [256] "World"
## [257] "Samoa"
## [258] "Kosovo"
## [259] "Yemen, Rep."
## [260] "South Africa"
## [261] "Zambia"
## [262] "Zimbabwe"
# remove the unneeded rows
gdp_current_us <- gdp_current_us %>%
mutate(Country = trimws(Country)) %>%
filter(!grepl(paste0(
"^Sub-Saharan Africa \\(excluding high income\\)$|",
"^Sub-Saharan Africa \\(IDA & IBRD countries\\)$|",
"^Upper middle income$|",
"^South Asia \\(IDA & IBRD\\)$|",
"^Middle East, North Africa, Afghanistan & Pakistan \\(IDA & IBRD\\)$|",
"^Latin America & the Caribbean \\(IDA & IBRD countries\\)$|",
"^Europe & Central Asia \\(IDA & IBRD countries\\)$|",
"^East Asia & Pacific \\(IDA & IBRD countries\\)$|",
"^World$|",
"^Post-demographic dividend$|",
"^Small states$|",
"^Pacific island small states$|",
"^Pre-demographic dividend$|",
"^Other small states$|",
"^OECD members$|",
"^Middle East, North Africa, Afghanistan & Pakistan \\(excluding high income\\)$|",
"^Middle East, North Africa, Afghanistan & Pakistan$|",
"^Middle income$|",
"^Late-demographic dividend$|",
"^Low & middle income$|",
"^Lower middle income$|",
"^Low income$|",
"^Least developed countries: UN classification$|",
"^Latin America & Caribbean$|",
"^Latin America & Caribbean \\(excluding high income\\)$|",
"^IDA only$|",
"^IDA blend$|",
"^IDA total$|",
"^IDA & IBRD total$|",
"^IBRD only$|",
"^Heavily indebted poor countries \\(HIPC\\)$|",
"^High income$|",
"^Fragile and conflict affected situations$|",
"^Europe & Central Asia$|",
"^East Asia & Pacific$|",
"^Europe & Central Asia \\(excluding high income\\)$|",
"^East Asia & Pacific \\(excluding high income\\)$|",
"^Caribbean small states$|",
"^Central Europe and the Baltics$|",
"^Arab World$|",
"^Africa Western and Central$|",
"^Africa Eastern and Southern$|",
"^Sub-Saharan Africa$|","^West Bank and Gaza$"
), Country, ignore.case = TRUE))
#remove deplicates
gdp_current_us <- gdp_current_us %>%
distinct()
#see if theres any duplicates left
gdp_current_dup <- gdp_current_us %>%
duplicated() %>%
sum()
print(paste("Number of duplicate rows:",gdp_current_dup))
## [1] "Number of duplicate rows: 0"
clean gdp per capita
gdp_per_capita <- read_csv("gdp_per_capita.csv")
gdp_per_capita <- gdp_per_capita %>%
dplyr::slice(-1, -2) %>% #rename the colomns
rename("Country" = "Data Source",
"Country_Code" = "World Development Indicators",
"Economic_Indicator" = "...3")
#split the indicator colomn into separate colomns
gdp_per_capita <- gdp_per_capita %>%
separate_wider_delim(
Economic_Indicator,
delim = ",",
names = c("Economic_Indicator", "Indicator_Code", as.character(1960:2024)),
too_few = 'align_start',
too_many = "drop"
)
#RESHAPE AND PIVOTE THE DATA TO LONGER
gdp_per_capita <- gdp_per_capita %>%
pivot_longer( cols = "1960":"2024",
names_to = "Years",
values_to = "Values"
) %>% #values to numeric
mutate(Values = as.numeric(Values)) %>% #remove nulls
drop_na()
#check the countries
unique(gdp_per_capita$Country)
## [1] "Aruba"
## [2] "Africa Eastern and Southern"
## [3] "Afghanistan"
## [4] "Africa Western and Central"
## [5] "Angola"
## [6] "Albania"
## [7] "Andorra"
## [8] "Arab World"
## [9] "United Arab Emirates"
## [10] "Argentina"
## [11] "Armenia"
## [12] "American Samoa"
## [13] "Antigua and Barbuda"
## [14] "Australia"
## [15] "Austria"
## [16] "Azerbaijan"
## [17] "Burundi"
## [18] "Belgium"
## [19] "Benin"
## [20] "Burkina Faso"
## [21] "Bangladesh"
## [22] "Bulgaria"
## [23] "Bahrain"
## [24] "Bahamas, The"
## [25] "Bosnia and Herzegovina"
## [26] "Belarus"
## [27] "Belize"
## [28] "Bermuda"
## [29] "Bolivia"
## [30] "Brazil"
## [31] "Barbados"
## [32] "Brunei Darussalam"
## [33] "Bhutan"
## [34] "Botswana"
## [35] "Central African Republic"
## [36] "Canada"
## [37] "Central Europe and the Baltics"
## [38] "Switzerland"
## [39] "Channel Islands"
## [40] "Chile"
## [41] "China"
## [42] "Cote d'Ivoire"
## [43] "Cameroon"
## [44] "Congo, Dem. Rep."
## [45] "Congo, Rep."
## [46] "Colombia"
## [47] "Comoros"
## [48] "Cabo Verde"
## [49] "Costa Rica"
## [50] "Caribbean small states"
## [51] "Cuba"
## [52] "Curacao"
## [53] "Cayman Islands"
## [54] "Cyprus"
## [55] "Czechia"
## [56] "Germany"
## [57] "Djibouti"
## [58] "Dominica"
## [59] "Denmark"
## [60] "Dominican Republic"
## [61] "Algeria"
## [62] "East Asia & Pacific (excluding high income)"
## [63] "Early-demographic dividend"
## [64] "East Asia & Pacific"
## [65] "Europe & Central Asia (excluding high income)"
## [66] "Europe & Central Asia"
## [67] "Ecuador"
## [68] "Egypt, Arab Rep."
## [69] "Euro area"
## [70] "Eritrea"
## [71] "Spain"
## [72] "Estonia"
## [73] "Ethiopia"
## [74] "European Union"
## [75] "Fragile and conflict affected situations"
## [76] "Finland"
## [77] "Fiji"
## [78] "France"
## [79] "Faroe Islands"
## [80] "Micronesia, Fed. Sts."
## [81] "Gabon"
## [82] "United Kingdom"
## [83] "Georgia"
## [84] "Ghana"
## [85] "Guinea"
## [86] "Gambia, The"
## [87] "Guinea-Bissau"
## [88] "Equatorial Guinea"
## [89] "Greece"
## [90] "Grenada"
## [91] "Greenland"
## [92] "Guatemala"
## [93] "Guam"
## [94] "Guyana"
## [95] "High income"
## [96] "Hong Kong SAR, China"
## [97] "Honduras"
## [98] "Heavily indebted poor countries (HIPC)"
## [99] "Croatia"
## [100] "Haiti"
## [101] "Hungary"
## [102] "IBRD only"
## [103] "IDA & IBRD total"
## [104] "IDA total"
## [105] "IDA blend"
## [106] "Indonesia"
## [107] "IDA only"
## [108] "Isle of Man"
## [109] "India"
## [110] "Ireland"
## [111] "Iran, Islamic Rep."
## [112] "Iraq"
## [113] "Iceland"
## [114] "Israel"
## [115] "Italy"
## [116] "Jamaica"
## [117] "Jordan"
## [118] "Japan"
## [119] "Kazakhstan"
## [120] "Kenya"
## [121] "Kyrgyz Republic"
## [122] "Cambodia"
## [123] "Kiribati"
## [124] "St. Kitts and Nevis"
## [125] "Korea, Rep."
## [126] "Kuwait"
## [127] "Latin America & Caribbean (excluding high income)"
## [128] "Lao PDR"
## [129] "Lebanon"
## [130] "Liberia"
## [131] "Libya"
## [132] "St. Lucia"
## [133] "Latin America & Caribbean"
## [134] "Least developed countries: UN classification"
## [135] "Low income"
## [136] "Liechtenstein"
## [137] "Sri Lanka"
## [138] "Lower middle income"
## [139] "Low & middle income"
## [140] "Lesotho"
## [141] "Late-demographic dividend"
## [142] "Lithuania"
## [143] "Luxembourg"
## [144] "Latvia"
## [145] "Macao SAR, China"
## [146] "St. Martin (French part)"
## [147] "Morocco"
## [148] "Monaco"
## [149] "Moldova"
## [150] "Madagascar"
## [151] "Maldives"
## [152] "Middle East, North Africa, Afghanistan & Pakistan"
## [153] "Mexico"
## [154] "Marshall Islands"
## [155] "Middle income"
## [156] "North Macedonia"
## [157] "Mali"
## [158] "Malta"
## [159] "Myanmar"
## [160] "Middle East, North Africa, Afghanistan & Pakistan (excluding high income)"
## [161] "Montenegro"
## [162] "Mongolia"
## [163] "Northern Mariana Islands"
## [164] "Mozambique"
## [165] "Mauritania"
## [166] "Mauritius"
## [167] "Malawi"
## [168] "Malaysia"
## [169] "North America"
## [170] "Namibia"
## [171] "New Caledonia"
## [172] "Niger"
## [173] "Nigeria"
## [174] "Nicaragua"
## [175] "Netherlands"
## [176] "Norway"
## [177] "Nepal"
## [178] "Nauru"
## [179] "New Zealand"
## [180] "OECD members"
## [181] "Oman"
## [182] "Other small states"
## [183] "Pakistan"
## [184] "Panama"
## [185] "Peru"
## [186] "Philippines"
## [187] "Palau"
## [188] "Papua New Guinea"
## [189] "Poland"
## [190] "Pre-demographic dividend"
## [191] "Puerto Rico (US)"
## [192] "Portugal"
## [193] "Paraguay"
## [194] "West Bank and Gaza"
## [195] "Pacific island small states"
## [196] "Post-demographic dividend"
## [197] "French Polynesia"
## [198] "Qatar"
## [199] "Romania"
## [200] "Russian Federation"
## [201] "Rwanda"
## [202] "South Asia"
## [203] "Saudi Arabia"
## [204] "Sudan"
## [205] "Senegal"
## [206] "Singapore"
## [207] "Solomon Islands"
## [208] "Sierra Leone"
## [209] "El Salvador"
## [210] "San Marino"
## [211] "Somalia, Fed. Rep."
## [212] "Serbia"
## [213] "Sub-Saharan Africa (excluding high income)"
## [214] "South Sudan"
## [215] "Sub-Saharan Africa"
## [216] "Small states"
## [217] "Sao Tome and Principe"
## [218] "Suriname"
## [219] "Slovak Republic"
## [220] "Slovenia"
## [221] "Sweden"
## [222] "Eswatini"
## [223] "Sint Maarten (Dutch part)"
## [224] "Seychelles"
## [225] "Syrian Arab Republic"
## [226] "Turks and Caicos Islands"
## [227] "Chad"
## [228] "East Asia & Pacific (IDA & IBRD countries)"
## [229] "Europe & Central Asia (IDA & IBRD countries)"
## [230] "Togo"
## [231] "Thailand"
## [232] "Tajikistan"
## [233] "Turkmenistan"
## [234] "Latin America & the Caribbean (IDA & IBRD countries)"
## [235] "Timor-Leste"
## [236] "Middle East, North Africa, Afghanistan & Pakistan (IDA & IBRD)"
## [237] "Tonga"
## [238] "South Asia (IDA & IBRD)"
## [239] "Sub-Saharan Africa (IDA & IBRD countries)"
## [240] "Trinidad and Tobago"
## [241] "Tunisia"
## [242] "Turkiye"
## [243] "Tuvalu"
## [244] "Tanzania"
## [245] "Uganda"
## [246] "Ukraine"
## [247] "Upper middle income"
## [248] "Uruguay"
## [249] "United States"
## [250] "Uzbekistan"
## [251] "St. Vincent and the Grenadines"
## [252] "Venezuela, RB"
## [253] "Virgin Islands (U.S.)"
## [254] "Viet Nam"
## [255] "Vanuatu"
## [256] "World"
## [257] "Samoa"
## [258] "Kosovo"
## [259] "Yemen, Rep."
## [260] "South Africa"
## [261] "Zambia"
## [262] "Zimbabwe"
# remove the unneeded rows
gdp_per_capita <- gdp_per_capita %>%
mutate(Country = trimws(Country)) %>%
filter(!grepl(paste0(
"^Sub-Saharan Africa \\(excluding high income\\)$|",
"^Sub-Saharan Africa \\(IDA & IBRD countries\\)$|",
"^Upper middle income$|",
"^South Asia \\(IDA & IBRD\\)$|",
"^Middle East, North Africa, Afghanistan & Pakistan \\(IDA & IBRD\\)$|",
"^Latin America & the Caribbean \\(IDA & IBRD countries\\)$|",
"^Europe & Central Asia \\(IDA & IBRD countries\\)$|",
"^East Asia & Pacific \\(IDA & IBRD countries\\)$|",
"^World$|",
"^Post-demographic dividend$|",
"^Small states$|",
"^Pacific island small states$|",
"^Pre-demographic dividend$|",
"^Other small states$|",
"^OECD members$|",
"^Middle East, North Africa, Afghanistan & Pakistan \\(excluding high income\\)$|",
"^Middle East, North Africa, Afghanistan & Pakistan$|",
"^Middle income$|",
"^Late-demographic dividend$|",
"^Low & middle income$|",
"^Lower middle income$|",
"^Low income$|",
"^Least developed countries: UN classification$|",
"^Latin America & Caribbean$|",
"^Latin America & Caribbean \\(excluding high income\\)$|",
"^IDA only$|",
"^IDA blend$|",
"^IDA total$|",
"^IDA & IBRD total$|",
"^IBRD only$|",
"^Heavily indebted poor countries \\(HIPC\\)$|",
"^High income$|",
"^Fragile and conflict affected situations$|",
"^Europe & Central Asia$|",
"^East Asia & Pacific$|",
"^Europe & Central Asia \\(excluding high income\\)$|",
"^East Asia & Pacific \\(excluding high income\\)$|",
"^Caribbean small states$|",
"^Central Europe and the Baltics$|",
"^Arab World$|",
"^Africa Western and Central$|",
"^Africa Eastern and Southern$|",
"^Sub-Saharan Africa$|","^West Bank and Gaza$"
), Country, ignore.case = TRUE))
#remove deplicates
gdp_per_capita <- gdp_per_capita %>%
distinct()
#see if theres any duplicates left
gdp_per_capita_dup <- gdp_per_capita %>%
duplicated() %>%
sum()
print(paste("Number of duplicate rows:",gdp_per_capita_dup))
## [1] "Number of duplicate rows: 0"
CLEAN PRIVATE SECTOR
private_sector_credit <- read_csv("private_sector_credit.csv")
private_sector_credit <- private_sector_credit %>%
dplyr::slice(-1, -2) %>% #rename the colomns
rename("Country" = "Data Source",
"Country_Code" = "World Development Indicators",
"Economic_Indicator" = "...3")
#split the indicator colomn into separate colomns
private_sector_credit <- private_sector_credit %>%
separate_wider_delim(
Economic_Indicator,
delim = ",",
names = c("Economic_Indicator", "Indicator_Code", as.character(1960:2024)),
too_few = 'align_start',
too_many = "drop"
)
#RESHAPE AND PIVOTE THE DATA TO LONGER
private_sector_credit <- private_sector_credit %>%
pivot_longer( cols = "1960":"2024",
names_to = "Years",
values_to = "Values"
) %>% #values to numeric
mutate(Values = as.numeric(Values)) %>% #remove nulls
drop_na()
#check the countries
unique(private_sector_credit$Country)
## [1] "Aruba"
## [2] "Africa Eastern and Southern"
## [3] "Afghanistan"
## [4] "Africa Western and Central"
## [5] "Angola"
## [6] "Albania"
## [7] "Arab World"
## [8] "United Arab Emirates"
## [9] "Argentina"
## [10] "Armenia"
## [11] "Antigua and Barbuda"
## [12] "Australia"
## [13] "Austria"
## [14] "Azerbaijan"
## [15] "Burundi"
## [16] "Belgium"
## [17] "Benin"
## [18] "Burkina Faso"
## [19] "Bangladesh"
## [20] "Bulgaria"
## [21] "Bahrain"
## [22] "Bahamas, The"
## [23] "Bosnia and Herzegovina"
## [24] "Belarus"
## [25] "Belize"
## [26] "Bolivia"
## [27] "Brazil"
## [28] "Barbados"
## [29] "Brunei Darussalam"
## [30] "Bhutan"
## [31] "Botswana"
## [32] "Central African Republic"
## [33] "Canada"
## [34] "Central Europe and the Baltics"
## [35] "Switzerland"
## [36] "Chile"
## [37] "China"
## [38] "Cote d'Ivoire"
## [39] "Cameroon"
## [40] "Congo, Dem. Rep."
## [41] "Congo, Rep."
## [42] "Colombia"
## [43] "Comoros"
## [44] "Cabo Verde"
## [45] "Costa Rica"
## [46] "Caribbean small states"
## [47] "Cyprus"
## [48] "Czechia"
## [49] "Germany"
## [50] "Djibouti"
## [51] "Dominica"
## [52] "Denmark"
## [53] "Dominican Republic"
## [54] "Algeria"
## [55] "East Asia & Pacific (excluding high income)"
## [56] "Early-demographic dividend"
## [57] "East Asia & Pacific"
## [58] "Europe & Central Asia (excluding high income)"
## [59] "Europe & Central Asia"
## [60] "Ecuador"
## [61] "Egypt, Arab Rep."
## [62] "Euro area"
## [63] "Eritrea"
## [64] "Spain"
## [65] "Estonia"
## [66] "Ethiopia"
## [67] "European Union"
## [68] "Fragile and conflict affected situations"
## [69] "Finland"
## [70] "Fiji"
## [71] "France"
## [72] "Micronesia, Fed. Sts."
## [73] "Gabon"
## [74] "United Kingdom"
## [75] "Georgia"
## [76] "Ghana"
## [77] "Guinea"
## [78] "Gambia, The"
## [79] "Guinea-Bissau"
## [80] "Equatorial Guinea"
## [81] "Greece"
## [82] "Grenada"
## [83] "Guatemala"
## [84] "Guyana"
## [85] "High income"
## [86] "Hong Kong SAR, China"
## [87] "Honduras"
## [88] "Heavily indebted poor countries (HIPC)"
## [89] "Croatia"
## [90] "Haiti"
## [91] "Hungary"
## [92] "IBRD only"
## [93] "IDA & IBRD total"
## [94] "IDA total"
## [95] "IDA blend"
## [96] "Indonesia"
## [97] "IDA only"
## [98] "India"
## [99] "Ireland"
## [100] "Iran, Islamic Rep."
## [101] "Iraq"
## [102] "Iceland"
## [103] "Israel"
## [104] "Italy"
## [105] "Jamaica"
## [106] "Jordan"
## [107] "Japan"
## [108] "Kazakhstan"
## [109] "Kenya"
## [110] "Kyrgyz Republic"
## [111] "Cambodia"
## [112] "St. Kitts and Nevis"
## [113] "Korea, Rep."
## [114] "Kuwait"
## [115] "Latin America & Caribbean (excluding high income)"
## [116] "Lao PDR"
## [117] "Lebanon"
## [118] "Liberia"
## [119] "Libya"
## [120] "St. Lucia"
## [121] "Latin America & Caribbean"
## [122] "Least developed countries: UN classification"
## [123] "Low income"
## [124] "Sri Lanka"
## [125] "Lower middle income"
## [126] "Low & middle income"
## [127] "Lesotho"
## [128] "Late-demographic dividend"
## [129] "Lithuania"
## [130] "Luxembourg"
## [131] "Latvia"
## [132] "Macao SAR, China"
## [133] "Morocco"
## [134] "Moldova"
## [135] "Madagascar"
## [136] "Maldives"
## [137] "Middle East, North Africa, Afghanistan & Pakistan"
## [138] "Mexico"
## [139] "Middle income"
## [140] "North Macedonia"
## [141] "Mali"
## [142] "Malta"
## [143] "Myanmar"
## [144] "Middle East, North Africa, Afghanistan & Pakistan (excluding high income)"
## [145] "Montenegro"
## [146] "Mongolia"
## [147] "Mozambique"
## [148] "Mauritania"
## [149] "Mauritius"
## [150] "Malawi"
## [151] "Malaysia"
## [152] "North America"
## [153] "Namibia"
## [154] "Niger"
## [155] "Nigeria"
## [156] "Nicaragua"
## [157] "Netherlands"
## [158] "Norway"
## [159] "Nepal"
## [160] "New Zealand"
## [161] "OECD members"
## [162] "Oman"
## [163] "Other small states"
## [164] "Pakistan"
## [165] "Panama"
## [166] "Peru"
## [167] "Philippines"
## [168] "Papua New Guinea"
## [169] "Poland"
## [170] "Pre-demographic dividend"
## [171] "Portugal"
## [172] "Paraguay"
## [173] "West Bank and Gaza"
## [174] "Pacific island small states"
## [175] "Post-demographic dividend"
## [176] "Qatar"
## [177] "Romania"
## [178] "Russian Federation"
## [179] "Rwanda"
## [180] "South Asia"
## [181] "Saudi Arabia"
## [182] "Sudan"
## [183] "Senegal"
## [184] "Singapore"
## [185] "Solomon Islands"
## [186] "Sierra Leone"
## [187] "El Salvador"
## [188] "Somalia, Fed. Rep."
## [189] "Serbia"
## [190] "Sub-Saharan Africa (excluding high income)"
## [191] "South Sudan"
## [192] "Sub-Saharan Africa"
## [193] "Small states"
## [194] "Sao Tome and Principe"
## [195] "Suriname"
## [196] "Slovak Republic"
## [197] "Slovenia"
## [198] "Sweden"
## [199] "Eswatini"
## [200] "Seychelles"
## [201] "Syrian Arab Republic"
## [202] "Chad"
## [203] "East Asia & Pacific (IDA & IBRD countries)"
## [204] "Europe & Central Asia (IDA & IBRD countries)"
## [205] "Togo"
## [206] "Thailand"
## [207] "Tajikistan"
## [208] "Latin America & the Caribbean (IDA & IBRD countries)"
## [209] "Timor-Leste"
## [210] "Middle East, North Africa, Afghanistan & Pakistan (IDA & IBRD)"
## [211] "Tonga"
## [212] "South Asia (IDA & IBRD)"
## [213] "Sub-Saharan Africa (IDA & IBRD countries)"
## [214] "Trinidad and Tobago"
## [215] "Tunisia"
## [216] "Turkiye"
## [217] "Tanzania"
## [218] "Uganda"
## [219] "Ukraine"
## [220] "Upper middle income"
## [221] "Uruguay"
## [222] "United States"
## [223] "Uzbekistan"
## [224] "St. Vincent and the Grenadines"
## [225] "Venezuela, RB"
## [226] "Viet Nam"
## [227] "Vanuatu"
## [228] "World"
## [229] "Samoa"
## [230] "Kosovo"
## [231] "Yemen, Rep."
## [232] "South Africa"
## [233] "Zambia"
## [234] "Zimbabwe"
# remove the unneeded rows
private_sector_credit <- private_sector_credit %>%
mutate(Country = trimws(Country)) %>%
filter(!grepl(paste0(
"^Sub-Saharan Africa \\(excluding high income\\)$|",
"^Sub-Saharan Africa \\(IDA & IBRD countries\\)$|",
"^Upper middle income$|",
"^South Asia \\(IDA & IBRD\\)$|",
"^Middle East, North Africa, Afghanistan & Pakistan \\(IDA & IBRD\\)$|",
"^Latin America & the Caribbean \\(IDA & IBRD countries\\)$|",
"^Europe & Central Asia \\(IDA & IBRD countries\\)$|",
"^East Asia & Pacific \\(IDA & IBRD countries\\)$|",
"^World$|",
"^Post-demographic dividend$|",
"^Small states$|",
"^Pacific island small states$|",
"^Pre-demographic dividend$|",
"^Other small states$|",
"^OECD members$|",
"^Middle East, North Africa, Afghanistan & Pakistan \\(excluding high income\\)$|",
"^Middle East, North Africa, Afghanistan & Pakistan$|",
"^Middle income$|",
"^Late-demographic dividend$|",
"^Low & middle income$|",
"^Lower middle income$|",
"^Low income$|",
"^Least developed countries: UN classification$|",
"^Latin America & Caribbean$|",
"^Latin America & Caribbean \\(excluding high income\\)$|",
"^IDA only$|",
"^IDA blend$|",
"^IDA total$|",
"^IDA & IBRD total$|",
"^IBRD only$|",
"^Heavily indebted poor countries \\(HIPC\\)$|",
"^High income$|",
"^Fragile and conflict affected situations$|",
"^Europe & Central Asia$|",
"^East Asia & Pacific$|",
"^Europe & Central Asia \\(excluding high income\\)$|",
"^East Asia & Pacific \\(excluding high income\\)$|",
"^Caribbean small states$|",
"^Central Europe and the Baltics$|",
"^Arab World$|",
"^Africa Western and Central$|",
"^Africa Eastern and Southern$|",
"^Sub-Saharan Africa$|","^West Bank and Gaza$"
), Country, ignore.case = TRUE))
#remove deplicates
private_sector_credit <- private_sector_credit %>%
distinct()
#see if theres any duplicates left
private_sector_credit_dup <- private_sector_credit %>%
duplicated() %>%
sum()
print(paste("Number of duplicate rows:",private_sector_credit_dup))
## [1] "Number of duplicate rows: 0"
Clean Exchange rate
exchange_rate <- read_csv("exchange_rate.csv")
exchange_rate <- exchange_rate %>%
dplyr::slice(-1, -2) %>% #rename the colomns
rename("Country" = "Data Source",
"Country_Code" = "World Development Indicators",
"Economic_Indicator" = "...3")
#split the indicator colomn into separate colomns
exchange_rate <- exchange_rate %>%
separate_wider_delim(
Economic_Indicator,
delim = ",",
names = c("Economic_Indicator", "Indicator_Code", as.character(1960:2024)),
too_few = 'align_start',
too_many = "drop"
)
#RESHAPE AND PIVOTE THE DATA TO LONGER
exchange_rate <- exchange_rate %>%
pivot_longer( cols = "1960":"2024",
names_to = "Years",
values_to = "Values"
) %>% #values to numeric
mutate(Values = as.numeric(Values)) %>% #remove nulls
drop_na()
#check the countries
unique(exchange_rate$Country)
## [1] "Aruba" "Afghanistan"
## [3] "Angola" "Albania"
## [5] "Andorra" "United Arab Emirates"
## [7] "Argentina" "Armenia"
## [9] "American Samoa" "Antigua and Barbuda"
## [11] "Australia" "Austria"
## [13] "Azerbaijan" "Burundi"
## [15] "Belgium" "Benin"
## [17] "Burkina Faso" "Bangladesh"
## [19] "Bulgaria" "Bahrain"
## [21] "Bahamas, The" "Bosnia and Herzegovina"
## [23] "Belarus" "Belize"
## [25] "Bermuda" "Bolivia"
## [27] "Brazil" "Barbados"
## [29] "Brunei Darussalam" "Bhutan"
## [31] "Botswana" "Central African Republic"
## [33] "Canada" "Switzerland"
## [35] "Channel Islands" "Chile"
## [37] "China" "Cote d'Ivoire"
## [39] "Cameroon" "Congo, Dem. Rep."
## [41] "Congo, Rep." "Colombia"
## [43] "Comoros" "Cabo Verde"
## [45] "Costa Rica" "Cuba"
## [47] "Curacao" "Cayman Islands"
## [49] "Cyprus" "Czechia"
## [51] "Germany" "Djibouti"
## [53] "Dominica" "Denmark"
## [55] "Dominican Republic" "Algeria"
## [57] "Ecuador" "Egypt, Arab Rep."
## [59] "Euro area" "Eritrea"
## [61] "Spain" "Estonia"
## [63] "Ethiopia" "Finland"
## [65] "Fiji" "France"
## [67] "Faroe Islands" "Micronesia, Fed. Sts."
## [69] "Gabon" "United Kingdom"
## [71] "Georgia" "Ghana"
## [73] "Gibraltar" "Guinea"
## [75] "Gambia, The" "Guinea-Bissau"
## [77] "Equatorial Guinea" "Greece"
## [79] "Grenada" "Greenland"
## [81] "Guatemala" "Guam"
## [83] "Guyana" "Hong Kong SAR, China"
## [85] "Honduras" "Croatia"
## [87] "Haiti" "Hungary"
## [89] "Indonesia" "Isle of Man"
## [91] "India" "Ireland"
## [93] "Iran, Islamic Rep." "Iraq"
## [95] "Iceland" "Israel"
## [97] "Italy" "Jamaica"
## [99] "Jordan" "Japan"
## [101] "Kazakhstan" "Kenya"
## [103] "Kyrgyz Republic" "Cambodia"
## [105] "Kiribati" "St. Kitts and Nevis"
## [107] "Korea, Rep." "Kuwait"
## [109] "Lao PDR" "Lebanon"
## [111] "Liberia" "Libya"
## [113] "St. Lucia" "Sri Lanka"
## [115] "Lesotho" "Lithuania"
## [117] "Luxembourg" "Latvia"
## [119] "Macao SAR, China" "St. Martin (French part)"
## [121] "Morocco" "Monaco"
## [123] "Moldova" "Madagascar"
## [125] "Maldives" "Mexico"
## [127] "Marshall Islands" "North Macedonia"
## [129] "Mali" "Malta"
## [131] "Myanmar" "Montenegro"
## [133] "Mongolia" "Northern Mariana Islands"
## [135] "Mozambique" "Mauritania"
## [137] "Mauritius" "Malawi"
## [139] "Malaysia" "Namibia"
## [141] "New Caledonia" "Niger"
## [143] "Nigeria" "Nicaragua"
## [145] "Netherlands" "Norway"
## [147] "Nepal" "Nauru"
## [149] "New Zealand" "Oman"
## [151] "Pakistan" "Panama"
## [153] "Peru" "Philippines"
## [155] "Palau" "Papua New Guinea"
## [157] "Poland" "Puerto Rico (US)"
## [159] "Portugal" "Paraguay"
## [161] "French Polynesia" "Qatar"
## [163] "Romania" "Russian Federation"
## [165] "Rwanda" "Saudi Arabia"
## [167] "Sudan" "Senegal"
## [169] "Singapore" "Solomon Islands"
## [171] "Sierra Leone" "El Salvador"
## [173] "San Marino" "Somalia, Fed. Rep."
## [175] "Serbia" "South Sudan"
## [177] "Sao Tome and Principe" "Suriname"
## [179] "Slovak Republic" "Slovenia"
## [181] "Sweden" "Eswatini"
## [183] "Sint Maarten (Dutch part)" "Seychelles"
## [185] "Syrian Arab Republic" "Turks and Caicos Islands"
## [187] "Chad" "Togo"
## [189] "Thailand" "Tajikistan"
## [191] "Turkmenistan" "Timor-Leste"
## [193] "Tonga" "Trinidad and Tobago"
## [195] "Tunisia" "Turkiye"
## [197] "Tanzania" "Uganda"
## [199] "Ukraine" "Uruguay"
## [201] "United States" "Uzbekistan"
## [203] "St. Vincent and the Grenadines" "Venezuela, RB"
## [205] "Virgin Islands (U.S.)" "Viet Nam"
## [207] "Vanuatu" "Samoa"
## [209] "Kosovo" "Yemen, Rep."
## [211] "South Africa" "Zambia"
## [213] "Zimbabwe"
# remove the unneeded rows
exchange_rate <- exchange_rate %>%
mutate(Country = trimws(Country)) %>%
filter(!grepl(paste0(
"^Sub-Saharan Africa \\(excluding high income\\)$|",
"^Sub-Saharan Africa \\(IDA & IBRD countries\\)$|",
"^Upper middle income$|",
"^South Asia \\(IDA & IBRD\\)$|",
"^Middle East, North Africa, Afghanistan & Pakistan \\(IDA & IBRD\\)$|",
"^Latin America & the Caribbean \\(IDA & IBRD countries\\)$|",
"^Europe & Central Asia \\(IDA & IBRD countries\\)$|",
"^East Asia & Pacific \\(IDA & IBRD countries\\)$|",
"^World$|",
"^Post-demographic dividend$|",
"^Small states$|",
"^Pacific island small states$|",
"^Pre-demographic dividend$|",
"^Other small states$|",
"^OECD members$|",
"^Middle East, North Africa, Afghanistan & Pakistan \\(excluding high income\\)$|",
"^Middle East, North Africa, Afghanistan & Pakistan$|",
"^Middle income$|",
"^Late-demographic dividend$|",
"^Low & middle income$|",
"^Lower middle income$|",
"^Low income$|",
"^Least developed countries: UN classification$|",
"^Latin America & Caribbean$|",
"^Latin America & Caribbean \\(excluding high income\\)$|",
"^IDA only$|",
"^IDA blend$|",
"^IDA total$|",
"^IDA & IBRD total$|",
"^IBRD only$|",
"^Heavily indebted poor countries \\(HIPC\\)$|",
"^High income$|",
"^Fragile and conflict affected situations$|",
"^Europe & Central Asia$|",
"^East Asia & Pacific$|",
"^Europe & Central Asia \\(excluding high income\\)$|",
"^East Asia & Pacific \\(excluding high income\\)$|",
"^Caribbean small states$|",
"^Central Europe and the Baltics$|",
"^Arab World$|",
"^Africa Western and Central$|",
"^Africa Eastern and Southern$|",
"^Sub-Saharan Africa$|","^West Bank and Gaza$"
), Country, ignore.case = TRUE))
#remove deplicates
exchange_rate <- exchange_rate %>%
distinct()
#see if theres any duplicates left
exchange_rate_dup <- exchange_rate %>%
duplicated() %>%
sum()
print(paste("Number of duplicate rows:",exchange_rate_dup))
## [1] "Number of duplicate rows: 0"
Clean current account balance
current_account_balance <- read_csv("current_account_balance.csv")
current_account_balance <- current_account_balance %>%
dplyr::slice(-1, -2) %>% #rename the colomns
rename("Country" = "Data Source",
"Country_Code" = "World Development Indicators",
"Economic_Indicator" = "...3")
#split the indicator colomn into separate colomns
current_account_balance <- current_account_balance %>%
separate_wider_delim(
Economic_Indicator,
delim = ",",
names = c("Economic_Indicator", "Indicator_Code", as.character(1960:2024)),
too_few = 'align_start',
too_many = "drop"
)
#RESHAPE AND PIVOTE THE DATA TO LONGER
current_account_balance <- current_account_balance %>%
pivot_longer( cols = "1960":"2024",
names_to = "Years",
values_to = "Values"
) %>% #values to numeric
mutate(Values = as.numeric(Values)) %>% #remove nulls
drop_na()
#check the countries
unique(current_account_balance$Country)
## [1] "Aruba" "Afghanistan"
## [3] "Angola" "Albania"
## [5] "Andorra" "Argentina"
## [7] "Armenia" "Antigua and Barbuda"
## [9] "Australia" "Austria"
## [11] "Azerbaijan" "Burundi"
## [13] "Belgium" "Benin"
## [15] "Burkina Faso" "Bangladesh"
## [17] "Bulgaria" "Bahrain"
## [19] "Bahamas, The" "Bosnia and Herzegovina"
## [21] "Belarus" "Belize"
## [23] "Bermuda" "Bolivia"
## [25] "Brazil" "Barbados"
## [27] "Brunei Darussalam" "Bhutan"
## [29] "Botswana" "Central African Republic"
## [31] "Canada" "Switzerland"
## [33] "Chile" "China"
## [35] "Cote d'Ivoire" "Cameroon"
## [37] "Congo, Dem. Rep." "Congo, Rep."
## [39] "Colombia" "Comoros"
## [41] "Cabo Verde" "Costa Rica"
## [43] "Curacao" "Cayman Islands"
## [45] "Cyprus" "Czechia"
## [47] "Germany" "Djibouti"
## [49] "Dominica" "Denmark"
## [51] "Dominican Republic" "Algeria"
## [53] "Ecuador" "Egypt, Arab Rep."
## [55] "Euro area" "Eritrea"
## [57] "Spain" "Estonia"
## [59] "Ethiopia" "Finland"
## [61] "Fiji" "France"
## [63] "Faroe Islands" "Micronesia, Fed. Sts."
## [65] "Gabon" "United Kingdom"
## [67] "Georgia" "Ghana"
## [69] "Guinea" "Gambia, The"
## [71] "Guinea-Bissau" "Equatorial Guinea"
## [73] "Greece" "Grenada"
## [75] "Guatemala" "Guyana"
## [77] "Hong Kong SAR, China" "Honduras"
## [79] "Croatia" "Haiti"
## [81] "Hungary" "Indonesia"
## [83] "India" "Ireland"
## [85] "Iran, Islamic Rep." "Iraq"
## [87] "Iceland" "Israel"
## [89] "Italy" "Jamaica"
## [91] "Jordan" "Japan"
## [93] "Kazakhstan" "Kenya"
## [95] "Kyrgyz Republic" "Cambodia"
## [97] "Kiribati" "St. Kitts and Nevis"
## [99] "Korea, Rep." "Kuwait"
## [101] "Lao PDR" "Lebanon"
## [103] "Liberia" "Libya"
## [105] "St. Lucia" "Sri Lanka"
## [107] "Lesotho" "Lithuania"
## [109] "Luxembourg" "Latvia"
## [111] "Macao SAR, China" "Morocco"
## [113] "Moldova" "Madagascar"
## [115] "Maldives" "Mexico"
## [117] "Marshall Islands" "North Macedonia"
## [119] "Mali" "Malta"
## [121] "Myanmar" "Montenegro"
## [123] "Mongolia" "Mozambique"
## [125] "Mauritania" "Mauritius"
## [127] "Malawi" "Malaysia"
## [129] "Namibia" "New Caledonia"
## [131] "Niger" "Nigeria"
## [133] "Nicaragua" "Netherlands"
## [135] "Norway" "Nepal"
## [137] "Nauru" "New Zealand"
## [139] "Oman" "Pakistan"
## [141] "Panama" "Peru"
## [143] "Philippines" "Palau"
## [145] "Papua New Guinea" "Poland"
## [147] "Portugal" "Paraguay"
## [149] "West Bank and Gaza" "French Polynesia"
## [151] "Qatar" "Romania"
## [153] "Russian Federation" "Rwanda"
## [155] "Saudi Arabia" "Sudan"
## [157] "Senegal" "Singapore"
## [159] "Solomon Islands" "Sierra Leone"
## [161] "El Salvador" "San Marino"
## [163] "Serbia" "South Sudan"
## [165] "Sao Tome and Principe" "Suriname"
## [167] "Slovak Republic" "Slovenia"
## [169] "Sweden" "Eswatini"
## [171] "Sint Maarten (Dutch part)" "Seychelles"
## [173] "Syrian Arab Republic" "Turks and Caicos Islands"
## [175] "Chad" "Togo"
## [177] "Thailand" "Tajikistan"
## [179] "Timor-Leste" "Tonga"
## [181] "Trinidad and Tobago" "Tunisia"
## [183] "Turkiye" "Tuvalu"
## [185] "Tanzania" "Uganda"
## [187] "Ukraine" "Uruguay"
## [189] "United States" "Uzbekistan"
## [191] "St. Vincent and the Grenadines" "Venezuela, RB"
## [193] "Viet Nam" "Vanuatu"
## [195] "Samoa" "Kosovo"
## [197] "Yemen, Rep." "South Africa"
## [199] "Zambia" "Zimbabwe"
# remove the unneeded rows
current_account_balance <- current_account_balance %>%
mutate(Country = trimws(Country)) %>%
filter(!grepl(paste0(
"^Sub-Saharan Africa \\(excluding high income\\)$|",
"^Sub-Saharan Africa \\(IDA & IBRD countries\\)$|",
"^Upper middle income$|",
"^South Asia \\(IDA & IBRD\\)$|",
"^Middle East, North Africa, Afghanistan & Pakistan \\(IDA & IBRD\\)$|",
"^Latin America & the Caribbean \\(IDA & IBRD countries\\)$|",
"^Europe & Central Asia \\(IDA & IBRD countries\\)$|",
"^East Asia & Pacific \\(IDA & IBRD countries\\)$|",
"^World$|",
"^Post-demographic dividend$|",
"^Small states$|",
"^Pacific island small states$|",
"^Pre-demographic dividend$|",
"^Other small states$|",
"^OECD members$|",
"^Middle East, North Africa, Afghanistan & Pakistan \\(excluding high income\\)$|",
"^Middle East, North Africa, Afghanistan & Pakistan$|",
"^Middle income$|",
"^Late-demographic dividend$|",
"^Low & middle income$|",
"^Lower middle income$|",
"^Low income$|",
"^Least developed countries: UN classification$|",
"^Latin America & Caribbean$|",
"^Latin America & Caribbean \\(excluding high income\\)$|",
"^IDA only$|",
"^IDA blend$|",
"^IDA total$|",
"^IDA & IBRD total$|",
"^IBRD only$|",
"^Heavily indebted poor countries \\(HIPC\\)$|",
"^High income$|",
"^Fragile and conflict affected situations$|",
"^Europe & Central Asia$|",
"^East Asia & Pacific$|",
"^Europe & Central Asia \\(excluding high income\\)$|",
"^East Asia & Pacific \\(excluding high income\\)$|",
"^Caribbean small states$|",
"^Central Europe and the Baltics$|",
"^Arab World$|",
"^Africa Western and Central$|",
"^Africa Eastern and Southern$|",
"^Sub-Saharan Africa$|","^West Bank and Gaza$"
), Country, ignore.case = TRUE))
#remove deplicates
current_account_balance <- current_account_balance %>%
distinct()
#see if theres any duplicates left
current_account_balance_dup <- current_account_balance %>%
duplicated() %>%
sum()
print(paste("Number of duplicate rows:",current_account_balance_dup))
## [1] "Number of duplicate rows: 0"
Clean unemplyment
unemployment <- read_csv("unemployment.csv")
## Rows: 439 Columns: 54
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (54): Country Name, Country Code, Series Name, Series Code, 1975 [YR1975...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#rename the colomns
unemployment <- unemployment %>%
rename("Country" = "Country Name",
"Economic_Indicator" = "Series Name",
"Country_Code" = "Country Code",
"Indicator_Code" = "Series Code")
#clean colomns name and pivot to longer
unemployment <- unemployment %>%
rename_with(~ str_remove(., "\\s*\\[YR\\d+\\]"), matches("\\d{4}")) %>%
pivot_longer(cols = matches("^\\d{4}$"), # Match exactly 4 digits at start and end
names_to = "Years",
values_to = "Values") %>%
dplyr::select(Country, Country_Code, Economic_Indicator, Indicator_Code, Years, Values) %>%
mutate(Values = as.numeric(Values)) %>% #remove null values
drop_na()
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Values = as.numeric(Values)`.
## Caused by warning:
## ! NAs introduced by coercion
#remove deplicates
unemployment <- unemployment %>%
distinct()
#see if theres any duplicates left
unemployment_dup <- unemployment %>%
duplicated() %>%
sum()
print(paste("Number of duplicate rows:",unemployment_dup))
## [1] "Number of duplicate rows: 0"
Clean the following dataset before merging
#lending rate
lending_rate <- read_csv("lending_rate.csv")
## Rows: 4709 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country, Country_Code, Indicator_Name, Indicator_Code
## dbl (3): ...1, Years, Values
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
lending_rate <- lending_rate %>%
dplyr::mutate(Years = as.character(Years))
#deposit rate
deposit_rate <- read_csv("deposit_rate.csv")
## Rows: 4888 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country, Country_Code, Indicator_Name, Indicator_Code
## dbl (2): Years, Values
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
deposit_rate <- deposit_rate %>%
dplyr::mutate(Years = as.character(Years))
#interest rate spread
interest_rate_spread <- read_csv("interest_rate_spread.csv")
## Rows: 4988 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country, Country_Code, Indicator_Name, Indicator_Code
## dbl (3): ...1, Years, Values
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
interest_rate_spread <- interest_rate_spread %>%
dplyr::mutate(Years = as.character(Years))
#interest rate
real_interest_rate <- read_csv("real_interest_rate.csv")
## Rows: 4681 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country, Country_Code, Indicator_Name, Indicator_Code
## dbl (3): ...1, Years, Values
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
real_interest_rate <- real_interest_rate %>%
dplyr::mutate(Years = as.character(Years))
#risk premium
risk_premium <- read_csv("risk_premium.csv")
## Rows: 2413 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country, Country_Code, Indicator_Name, Indicator_Code
## dbl (3): ...1, Years, Values
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
risk_premium <- risk_premium %>%
dplyr::mutate(Years = as.character(Years))
#bank on none performance loan
bank_on_none_perfomance_loan <- read_csv("bank_on_none_perfomance_loan.csv")
## Rows: 2105 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country, Country_Code, Indicator_Name, Indicator_Code
## dbl (3): ...1, Years, Values
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
bank_on_none_perfomance_loan <- bank_on_none_perfomance_loan %>%
dplyr::mutate(Years = as.character(Years))
Merge the data sets together
Global_Rates <- gdp_current_us %>%
dplyr::select(Country, Country_Code, Years, GDP_current_us = Values) %>%
left_join(
gdp_per_capita %>%
dplyr::select(Country, Country_Code, Years, GDP_per_capita_growth = Values),
by = c("Country", "Country_Code", "Years")
) %>%
left_join(
inflation_consumer_price %>%
dplyr::select(Country, Country_Code, Years, Inflation = Values),
by = c("Country", "Country_Code", "Years")
) %>%
left_join(
lending_rate %>%
dplyr::select(Country, Country_Code, Years, Lending_rate = Values),
by = c("Country", "Country_Code", "Years")
) %>%
left_join(
deposit_rate %>%
dplyr::select(Country, Country_Code, Years, Deposit_rate = Values),
by = c("Country", "Country_Code", "Years")
) %>%
left_join(
current_account_balance %>%
dplyr::select(Country, Country_Code, Years, Current_account_balance = Values),
by = c("Country", "Country_Code", "Years")
) %>%
left_join(
exchange_rate %>%
dplyr::select(Country, Country_Code, Years, Exchange_rate = Values),
by = c("Country", "Country_Code", "Years")
) %>%
left_join(
bank_on_none_perfomance_loan %>%
dplyr::select(Country, Country_Code, Years, Non_performing_loans = Values),
by = c("Country", "Country_Code", "Years")
) %>%
left_join(
private_sector_credit %>%
dplyr::select(Country, Country_Code, Years, Private_sector_credit = Values),
by = c("Country", "Country_Code", "Years")
) %>%
left_join(
unemployment %>%
dplyr::select(Country, Country_Code, Years, Unemployment = Values),
by = c("Country", "Country_Code", "Years")
) %>%
left_join(
interest_rate_spread %>%
dplyr::select(Country, Country_Code, Years, Interest_rate_spread = Values),
by = c("Country", "Country_Code", "Years")
) %>%
left_join(
real_interest_rate %>%
dplyr::select(Country, Country_Code, Years, Real_interest_rate = Values),
by = c("Country", "Country_Code", "Years")
) %>%
left_join(
risk_premium %>%
dplyr::select(Country, Country_Code, Years, Risk_premium = Values),
by = c("Country", "Country_Code", "Years")
)
Clean the dataset and save
glimpse(Global_Rates)
## Rows: 15,329
## Columns: 16
## $ Country <chr> "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "…
## $ Country_Code <chr> "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW…
## $ Years <chr> "1986", "1987", "1988", "1989", "1990", "1991"…
## $ GDP_current_us <dbl> 405586592, 487709497, 596648045, 695530726, 76…
## $ GDP_per_capita_growth <dbl> NA, 17.59320595, 18.30468737, 10.06693198, 0.1…
## $ Inflation <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ Lending_rate <dbl> 11.00000, 10.30000, 10.24167, 10.47500, 10.600…
## $ Deposit_rate <dbl> 6.400000, 6.583333, 6.700000, 6.700000, 6.7000…
## $ Current_account_balance <dbl> NA, -18659217.9, -22569832.4, -44301676.0, -46…
## $ Exchange_rate <dbl> NA, 1.79, 1.79, 1.79, 1.79, 1.79, 1.79, 1.79, …
## $ Non_performing_loans <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ Private_sector_credit <dbl> 39.46433, 39.44192, 43.77566, 43.35639, 44.488…
## $ Unemployment <dbl> NA, NA, NA, NA, NA, 5.92, NA, NA, 5.36, NA, NA…
## $ Interest_rate_spread <dbl> NA, 4.600000, 3.716667, 3.541667, 3.775000, 3.…
## $ Real_interest_rate <dbl> NA, 6.475434, 6.918180, 6.264234, 4.566641, 4.…
## $ Risk_premium <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#remove the null values
Global_Rates <- Global_Rates %>%
na.omit()
#remove all duplicates
Global_Rates <- Global_Rates %>%
distinct()
#save the Global_rate dataset
write.csv(Global_Rates, "Global_Rates.csv", row.names = FALSE)